sourcecode 0.39.0__tar.gz → 0.42.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sourcecode-0.39.0 → sourcecode-0.42.0}/PKG-INFO +1 -1
- {sourcecode-0.39.0 → sourcecode-0.42.0}/pyproject.toml +1 -1
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/__init__.py +1 -1
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/architecture_analyzer.py +94 -8
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/cli.py +6 -2
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/contract_model.py +1 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/contract_pipeline.py +41 -67
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/doc_analyzer.py +29 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/env_analyzer.py +110 -22
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/git_analyzer.py +40 -6
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/prepare_context.py +46 -55
- sourcecode-0.42.0/src/sourcecode/ranking_engine.py +231 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/relevance_scorer.py +4 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/schema.py +29 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/semantic_analyzer.py +72 -2
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/serializer.py +151 -15
- sourcecode-0.42.0/tests/test_block1_reliability.py +474 -0
- sourcecode-0.42.0/tests/test_block2_coverage.py +449 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/.agents/skills/source-command-gsd-join-discord/SKILL.md +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/.agents/skills/source-command-gsd-review-backlog/SKILL.md +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/.agents/skills/source-command-gsd-workstreams/SKILL.md +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/.gitignore +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/.ruff.toml +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/CONTRIBUTING.md +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/LICENSE +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/README.md +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/SECURITY.md +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/docs/privacy.md +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/docs/schema.md +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/raw +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/adaptive_scanner.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/architecture_summary.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/ast_extractor.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/classifier.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/code_notes_analyzer.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/confidence_analyzer.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/context_summarizer.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/coverage_parser.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/dependency_analyzer.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/__init__.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/base.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/csproj_parser.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/dart.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/dotnet.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/elixir.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/go.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/heuristic.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/hybrid.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/java.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/jvm_ext.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/nodejs.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/parsers.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/php.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/project.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/python.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/ruby.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/rust.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/systems.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/terraform.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/tooling.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/entrypoint_classifier.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/file_classifier.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/graph_analyzer.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/metrics_analyzer.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/redactor.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/repo_classifier.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/runtime_classifier.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/scanner.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/summarizer.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/telemetry/__init__.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/telemetry/config.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/telemetry/consent.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/telemetry/events.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/telemetry/filters.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/telemetry/transport.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/tree_utils.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/workspace.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/__init__.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/conftest.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/coverage.xml +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/fastapi_app/pyproject.toml +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/fastapi_app/src/main.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/go_service/cmd/api/main.go +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/go_service/go.mod +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/jacoco.xml +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/lcov.info +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/nextjs_app/app/page.tsx +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/nextjs_app/package.json +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/nextjs_app/pnpm-lock.yaml +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/pnpm_monorepo/apps/web/app/page.tsx +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/pnpm_monorepo/apps/web/package.json +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/pnpm_monorepo/packages/api/main.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/pnpm_monorepo/packages/api/pyproject.toml +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/pnpm_monorepo/pnpm-workspace.yaml +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_architecture_analyzer.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_architecture_summary.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_ast_extractor.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_classifier.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_cli.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_code_notes_analyzer.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_contract_pipeline.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_coverage_parser.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_cross_consistency.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_dependency_analyzer_node_python.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_dependency_analyzer_polyglot.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_dependency_schema.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_detector_dotnet.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_detector_go_rust_java.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_detector_nodejs.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_detector_php_ruby_dart.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_detector_python.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_detector_universal_managed.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_detector_universal_systems.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_detectors_base.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_doc_analyzer_jsdom.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_doc_analyzer_python.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_graph_analyzer_polyglot.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_graph_analyzer_python_node.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_graph_schema.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_hybrid_inference.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_integration.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_integration_dependencies.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_integration_detection.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_integration_docs.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_integration_graph_modules.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_integration_lqn.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_integration_metrics.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_integration_multistack.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_integration_semantics.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_integration_universal.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_metrics_analyzer.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_packaging.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_phase1_improvements.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_pipeline_integrity.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_real_projects.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_redactor.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_scanner.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_schema.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_schema_normalization.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_semantic_analyzer_node.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_semantic_analyzer_python.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_semantic_import_resolution.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_semantic_schema.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_signal_hierarchy.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_summarizer.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_telemetry.py +0 -0
- {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_workspace_analyzer.py +0 -0
|
@@ -172,6 +172,7 @@ class ArchitectureAnalyzer:
|
|
|
172
172
|
graph: Optional[ModuleGraph] = None,
|
|
173
173
|
) -> ArchitectureAnalysis:
|
|
174
174
|
limitations: list[str] = []
|
|
175
|
+
evidence: list[dict] = []
|
|
175
176
|
|
|
176
177
|
# Step 1: filter paths
|
|
177
178
|
filtered = self._filter_paths(sm.file_paths)
|
|
@@ -180,6 +181,8 @@ class ArchitectureAnalyzer:
|
|
|
180
181
|
requested=True,
|
|
181
182
|
pattern="unknown",
|
|
182
183
|
limitations=["Arquitectura no inferida: proyecto sin archivos de codigo suficientes"],
|
|
184
|
+
evidence=[{"type": "none", "paths": [], "reason": "insufficient source files", "confidence": "high"}],
|
|
185
|
+
tentative=False,
|
|
183
186
|
)
|
|
184
187
|
|
|
185
188
|
# Step 2: domain clustering
|
|
@@ -193,17 +196,32 @@ class ArchitectureAnalyzer:
|
|
|
193
196
|
elif pattern == "unknown":
|
|
194
197
|
limitations.append("Patron de capas no reconocido: estructura de directorios sin senales claras")
|
|
195
198
|
|
|
196
|
-
# Step 3b: monorepo override — workspace config is hard evidence
|
|
197
|
-
|
|
199
|
+
# Step 3b: monorepo override — workspace config is hard evidence.
|
|
200
|
+
# Overrides all weak inferred patterns; only truly specialised patterns
|
|
201
|
+
# (cqrs, clean, onion, hexagonal) take precedence over workspace config.
|
|
202
|
+
has_workspace = self._has_workspace_config(sm.file_paths)
|
|
203
|
+
if has_workspace and pattern not in (
|
|
198
204
|
"monorepo", "cqrs", "clean", "onion", "hexagonal"
|
|
199
205
|
):
|
|
200
206
|
mono_layers = self._detect_monorepo_packages(filtered)
|
|
201
|
-
|
|
207
|
+
# Override whenever: monorepo packages detected, OR pattern is any weak/generic type.
|
|
208
|
+
# "fullstack", "layered", "mvc", "microservices", "modular", "flat", "unknown", None
|
|
209
|
+
# all yield to workspace config evidence.
|
|
210
|
+
_WEAK_PATTERNS = {None, "unknown", "flat", "modular", "layered",
|
|
211
|
+
"fullstack", "mvc", "microservices"}
|
|
212
|
+
if mono_layers or pattern in _WEAK_PATTERNS:
|
|
202
213
|
pattern = "monorepo"
|
|
203
214
|
layers = mono_layers
|
|
204
215
|
limitations.append(
|
|
205
216
|
"Workspace config detectado — arquitectura refleja topologia de paquetes"
|
|
206
217
|
)
|
|
218
|
+
ws_files = [p for p in sm.file_paths if p.split("/")[-1] in _WORKSPACE_CONFIG_FILES]
|
|
219
|
+
evidence.append({
|
|
220
|
+
"type": "workspace_config",
|
|
221
|
+
"paths": ws_files[:4],
|
|
222
|
+
"reason": "Monorepo workspace config file(s) detected — hard evidence for monorepo topology",
|
|
223
|
+
"confidence": "high",
|
|
224
|
+
})
|
|
207
225
|
|
|
208
226
|
# Step 4: bounded context inference
|
|
209
227
|
bounded_contexts = self._infer_bounded_contexts(domains, graph)
|
|
@@ -212,25 +230,91 @@ class ArchitectureAnalyzer:
|
|
|
212
230
|
confidence: Literal["high", "medium", "low"]
|
|
213
231
|
strong_domains = [d for d in domains if d.confidence in ("high", "medium")]
|
|
214
232
|
all_layers_weak = layers and all(l.confidence == "low" for l in layers)
|
|
233
|
+
|
|
234
|
+
method = "graph+structure" if graph is not None else "filesystem_inference"
|
|
235
|
+
# High-confidence evidence (workspace config) makes pattern non-tentative.
|
|
236
|
+
tentative = not any(e.get("confidence") == "high" for e in evidence)
|
|
237
|
+
|
|
238
|
+
# _hard_evidence: high-confidence evidence was already set (e.g. workspace_config).
|
|
239
|
+
# When True, tentative must stay False and confidence must stay at least "medium".
|
|
240
|
+
_hard_evidence = not tentative # tentative=False iff high-conf evidence present
|
|
241
|
+
|
|
215
242
|
if pattern not in (None, "unknown", "flat"):
|
|
216
|
-
if
|
|
243
|
+
if graph is not None:
|
|
244
|
+
# Import graph provided — structural validation available
|
|
245
|
+
confidence = "medium" if len(strong_domains) >= 3 else "low"
|
|
246
|
+
evidence.append({
|
|
247
|
+
"type": "import_graph",
|
|
248
|
+
"paths": [n.id for n in graph.nodes[:6]],
|
|
249
|
+
"reason": f"Module import graph with {len(graph.nodes)} nodes used for pattern validation",
|
|
250
|
+
"confidence": "medium",
|
|
251
|
+
})
|
|
252
|
+
elif all_layers_weak:
|
|
217
253
|
# Layers came from file-naming heuristic only, not directory structure
|
|
218
254
|
confidence = "low"
|
|
255
|
+
if not _hard_evidence:
|
|
256
|
+
tentative = True
|
|
219
257
|
limitations.append(
|
|
220
258
|
"Low confidence inference: pattern inferred from filenames only, without import graph confirmation"
|
|
221
259
|
)
|
|
260
|
+
evidence.append({
|
|
261
|
+
"type": "filesystem_naming",
|
|
262
|
+
"paths": [l.files[0] for l in layers if l.files][:6],
|
|
263
|
+
"reason": (
|
|
264
|
+
f"Pattern '{pattern}' inferred from file stem naming conventions only "
|
|
265
|
+
"(e.g. *_controller.py, *_service.py). "
|
|
266
|
+
"No directory structure or import graph confirmation."
|
|
267
|
+
),
|
|
268
|
+
"confidence": "low",
|
|
269
|
+
})
|
|
222
270
|
else:
|
|
223
|
-
|
|
224
|
-
if
|
|
271
|
+
# Directory structure match (or monorepo/workspace override with no layers)
|
|
272
|
+
confidence = "medium" if (_hard_evidence or len(strong_domains) >= 3) else "low"
|
|
273
|
+
if confidence == "low" and not _hard_evidence:
|
|
274
|
+
tentative = True
|
|
275
|
+
if not _hard_evidence:
|
|
225
276
|
limitations.append(
|
|
226
277
|
"Pattern not confirmed by module import graph; run with --graph-modules for structural validation"
|
|
227
278
|
)
|
|
279
|
+
if not _hard_evidence:
|
|
280
|
+
matched_dirs = sorted({
|
|
281
|
+
p.replace("\\", "/").split("/")[0]
|
|
282
|
+
for layer in layers for p in layer.files
|
|
283
|
+
})
|
|
284
|
+
evidence.append({
|
|
285
|
+
"type": "filesystem_naming",
|
|
286
|
+
"paths": matched_dirs[:8],
|
|
287
|
+
"reason": (
|
|
288
|
+
f"Pattern '{pattern}' inferred from directory names matching layer keywords. "
|
|
289
|
+
"Import graph not available — structural direction of dependencies unverified."
|
|
290
|
+
),
|
|
291
|
+
"confidence": "low" if confidence == "low" else "medium",
|
|
292
|
+
})
|
|
228
293
|
elif len(strong_domains) >= 1:
|
|
229
294
|
confidence = "medium"
|
|
295
|
+
if not _hard_evidence:
|
|
296
|
+
tentative = True
|
|
297
|
+
evidence.append({
|
|
298
|
+
"type": "filesystem_naming",
|
|
299
|
+
"paths": [d.name for d in strong_domains[:6]],
|
|
300
|
+
"reason": "Domain clustering from directory names; no layer pattern confirmed",
|
|
301
|
+
"confidence": "low",
|
|
302
|
+
})
|
|
230
303
|
else:
|
|
231
304
|
confidence = "low"
|
|
232
|
-
|
|
233
|
-
|
|
305
|
+
if not _hard_evidence:
|
|
306
|
+
tentative = True
|
|
307
|
+
if not evidence:
|
|
308
|
+
limitations.append(
|
|
309
|
+
"insufficient_evidence: no recognizable architectural signals found; "
|
|
310
|
+
"filesystem structure does not match known patterns"
|
|
311
|
+
)
|
|
312
|
+
evidence.append({
|
|
313
|
+
"type": "filesystem_naming",
|
|
314
|
+
"paths": filtered[:6],
|
|
315
|
+
"reason": "Only filesystem paths available; no pattern matched",
|
|
316
|
+
"confidence": "low",
|
|
317
|
+
})
|
|
234
318
|
|
|
235
319
|
return ArchitectureAnalysis(
|
|
236
320
|
requested=True,
|
|
@@ -241,6 +325,8 @@ class ArchitectureAnalyzer:
|
|
|
241
325
|
confidence=confidence,
|
|
242
326
|
method=method,
|
|
243
327
|
limitations=limitations,
|
|
328
|
+
evidence=evidence,
|
|
329
|
+
tentative=tentative,
|
|
244
330
|
)
|
|
245
331
|
|
|
246
332
|
# ------------------------------------------------------------------
|
|
@@ -566,7 +566,7 @@ def main(
|
|
|
566
566
|
entrypoints_only: bool = typer.Option(
|
|
567
567
|
False,
|
|
568
568
|
"--entrypoints-only",
|
|
569
|
-
help="Contract mode: include only files that are entrypoints or have exported symbols.",
|
|
569
|
+
help="Contract mode: include only files that are runtime entrypoints or have exported symbols (public API surface). Note: 'entrypoints' here includes all files with exports, not strictly detected runtime entry points.",
|
|
570
570
|
),
|
|
571
571
|
changed_only: bool = typer.Option(
|
|
572
572
|
False,
|
|
@@ -1156,11 +1156,15 @@ def main(
|
|
|
1156
1156
|
|
|
1157
1157
|
_all_call_files = set(_fan_in) | set(_fan_out)
|
|
1158
1158
|
_hotspots: list[dict] = []
|
|
1159
|
-
# Filter test
|
|
1159
|
+
# Filter test, noise, and auxiliary paths — they dominate fan-in but carry no signal
|
|
1160
1160
|
_TEST_MARKERS = {"/test", "/tests", "/spec", "/specs", "_test.", ".test.", ".spec."}
|
|
1161
|
+
from sourcecode.ranking_engine import RankingEngine as _RankingEngine
|
|
1162
|
+
_sem_engine = _RankingEngine(sm.monorepo_packages)
|
|
1161
1163
|
for _p in _all_call_files:
|
|
1162
1164
|
if any(_m in _p for _m in _TEST_MARKERS) or _p.startswith("test"):
|
|
1163
1165
|
continue
|
|
1166
|
+
if _sem_engine.is_noise(_p) or _sem_engine.is_auxiliary(_p):
|
|
1167
|
+
continue
|
|
1164
1168
|
_in = _fan_in[_p]
|
|
1165
1169
|
_out = _fan_out[_p]
|
|
1166
1170
|
_score = _in * 2.0 + _out * 1.0
|
|
@@ -91,6 +91,7 @@ class FileContract:
|
|
|
91
91
|
fan_out: int = 0 # how many files this imports
|
|
92
92
|
is_entrypoint: bool = False
|
|
93
93
|
is_changed: bool = False
|
|
94
|
+
ranking_reasons: list[str] = field(default_factory=list)
|
|
94
95
|
|
|
95
96
|
# Extraction quality
|
|
96
97
|
extraction_method: str = "heuristic" # ast | tree_sitter | heuristic
|
|
@@ -17,6 +17,7 @@ from typing import Any, Literal, Optional
|
|
|
17
17
|
|
|
18
18
|
from sourcecode.ast_extractor import AstExtractor, _LANGUAGE_MAP
|
|
19
19
|
from sourcecode.contract_model import ContractSummary, FileContract
|
|
20
|
+
from sourcecode.ranking_engine import RankingEngine
|
|
20
21
|
from sourcecode.relevance_scorer import RelevanceScorer
|
|
21
22
|
from sourcecode.schema import EntryPoint, MonorepoPackageInfo
|
|
22
23
|
|
|
@@ -27,22 +28,6 @@ from sourcecode.schema import EntryPoint, MonorepoPackageInfo
|
|
|
27
28
|
_MAX_FILES = 500 # hard cap on files extracted per run
|
|
28
29
|
_SRC_EXTENSIONS: frozenset[str] = frozenset(_LANGUAGE_MAP.keys())
|
|
29
30
|
|
|
30
|
-
# Role-based score adjustments applied after contract extraction.
|
|
31
|
-
# Runtime roles get a boost; config/util are neutral or penalized.
|
|
32
|
-
_ROLE_SCORE: dict[str, float] = {
|
|
33
|
-
"entrypoint": 0.15,
|
|
34
|
-
"service": 0.10,
|
|
35
|
-
"route": 0.10,
|
|
36
|
-
"api": 0.08,
|
|
37
|
-
"middleware": 0.06,
|
|
38
|
-
"store": 0.05,
|
|
39
|
-
"model": 0.05,
|
|
40
|
-
"hook": 0.05,
|
|
41
|
-
"component": 0.03,
|
|
42
|
-
"util": 0.00,
|
|
43
|
-
"config": -0.10,
|
|
44
|
-
"unknown": 0.00,
|
|
45
|
-
}
|
|
46
31
|
|
|
47
32
|
RankStrategy = Literal["relevance", "centrality", "git-churn"]
|
|
48
33
|
|
|
@@ -60,9 +45,10 @@ def _get_changed_files(root: Path) -> set[str]:
|
|
|
60
45
|
]:
|
|
61
46
|
try:
|
|
62
47
|
result = subprocess.run(
|
|
63
|
-
cmd, cwd=root, capture_output=True, text=True,
|
|
48
|
+
cmd, cwd=root, capture_output=True, text=True,
|
|
49
|
+
encoding="utf-8", errors="replace", timeout=10,
|
|
64
50
|
)
|
|
65
|
-
for line in result.stdout.splitlines():
|
|
51
|
+
for line in (result.stdout or "").splitlines():
|
|
66
52
|
line = line.strip()
|
|
67
53
|
if line:
|
|
68
54
|
changed.add(line.replace("\\", "/"))
|
|
@@ -71,9 +57,10 @@ def _get_changed_files(root: Path) -> set[str]:
|
|
|
71
57
|
try:
|
|
72
58
|
result = subprocess.run(
|
|
73
59
|
["git", "status", "--porcelain"],
|
|
74
|
-
cwd=root, capture_output=True, text=True,
|
|
60
|
+
cwd=root, capture_output=True, text=True,
|
|
61
|
+
encoding="utf-8", errors="replace", timeout=10,
|
|
75
62
|
)
|
|
76
|
-
for line in result.stdout.splitlines():
|
|
63
|
+
for line in (result.stdout or "").splitlines():
|
|
77
64
|
if len(line) > 3:
|
|
78
65
|
changed.add(line[3:].strip().replace("\\", "/"))
|
|
79
66
|
except Exception:
|
|
@@ -144,11 +131,12 @@ def _get_git_churn(root: Path, file_paths: list[str]) -> dict[str, int]:
|
|
|
144
131
|
try:
|
|
145
132
|
result = subprocess.run(
|
|
146
133
|
["git", "log", "--name-only", "--format=", "--since=90.days.ago"],
|
|
147
|
-
cwd=root, capture_output=True, text=True,
|
|
134
|
+
cwd=root, capture_output=True, text=True,
|
|
135
|
+
encoding="utf-8", errors="replace", timeout=15,
|
|
148
136
|
)
|
|
149
137
|
path_set = set(file_paths)
|
|
150
138
|
counter: Counter[str] = Counter()
|
|
151
|
-
for line in result.stdout.splitlines():
|
|
139
|
+
for line in (result.stdout or "").splitlines():
|
|
152
140
|
line = line.strip().replace("\\", "/")
|
|
153
141
|
if line in path_set:
|
|
154
142
|
counter[line] += 1
|
|
@@ -194,6 +182,7 @@ class ContractPipeline:
|
|
|
194
182
|
"""
|
|
195
183
|
entry_paths = {ep.path.replace("\\", "/") for ep in (entry_points or [])}
|
|
196
184
|
scorer = RelevanceScorer(monorepo_packages)
|
|
185
|
+
engine = RankingEngine(monorepo_packages)
|
|
197
186
|
|
|
198
187
|
# 1. Changed files (for --changed-only and ranking)
|
|
199
188
|
changed_files: set[str] = set()
|
|
@@ -267,9 +256,24 @@ class ContractPipeline:
|
|
|
267
256
|
if rank_by == "git-churn":
|
|
268
257
|
churn = _get_git_churn(root, [c.path for c in contracts])
|
|
269
258
|
|
|
270
|
-
# 6. Compute relevance scores
|
|
259
|
+
# 6. Compute relevance scores via unified ranking engine
|
|
260
|
+
max_fan_in = max((c.fan_in for c in contracts), default=1) if contracts else 1
|
|
261
|
+
max_churn_val = max(churn.values(), default=1) if churn else 1
|
|
271
262
|
for c in contracts:
|
|
272
|
-
|
|
263
|
+
fs = engine.score(
|
|
264
|
+
c.path,
|
|
265
|
+
fan_in=c.fan_in,
|
|
266
|
+
fan_out=c.fan_out,
|
|
267
|
+
max_fan_in=max_fan_in,
|
|
268
|
+
git_churn=churn.get(c.path, 0),
|
|
269
|
+
max_churn=max_churn_val,
|
|
270
|
+
is_entrypoint=c.is_entrypoint,
|
|
271
|
+
is_changed=c.is_changed,
|
|
272
|
+
export_count=len(c.exports),
|
|
273
|
+
task="default",
|
|
274
|
+
)
|
|
275
|
+
c.relevance_score = fs.display_score
|
|
276
|
+
c.ranking_reasons = fs.reasons
|
|
273
277
|
|
|
274
278
|
# 7. Rank
|
|
275
279
|
contracts = self._rank(contracts, rank_by)
|
|
@@ -285,7 +289,7 @@ class ContractPipeline:
|
|
|
285
289
|
known_paths=set(src_paths),
|
|
286
290
|
entry_paths=entry_paths,
|
|
287
291
|
changed_files=changed_files,
|
|
288
|
-
|
|
292
|
+
engine=engine,
|
|
289
293
|
)
|
|
290
294
|
|
|
291
295
|
# 9. Entrypoints-only filter
|
|
@@ -312,45 +316,13 @@ class ContractPipeline:
|
|
|
312
316
|
)
|
|
313
317
|
return contracts, summary
|
|
314
318
|
|
|
315
|
-
def _score(
|
|
316
|
-
self,
|
|
317
|
-
c: FileContract,
|
|
318
|
-
scorer: RelevanceScorer,
|
|
319
|
-
churn: dict[str, int],
|
|
320
|
-
) -> float:
|
|
321
|
-
base = scorer.score(c.path)
|
|
322
|
-
|
|
323
|
-
if c.is_entrypoint:
|
|
324
|
-
base += 0.3
|
|
325
|
-
if c.is_changed:
|
|
326
|
-
base += 0.2
|
|
327
|
-
|
|
328
|
-
# Fan-in is the strongest signal: many callers = critical contract
|
|
329
|
-
fi_score = min(c.fan_in / 10.0, 0.3)
|
|
330
|
-
fo_score = min(c.fan_out / 15.0, 0.15)
|
|
331
|
-
base += fi_score + fo_score
|
|
332
|
-
|
|
333
|
-
# Exported API value
|
|
334
|
-
export_count = len(c.exports)
|
|
335
|
-
base += min(export_count / 20.0, 0.1)
|
|
336
|
-
|
|
337
|
-
# Churn
|
|
338
|
-
churn_score = min(churn.get(c.path, 0) / 20.0, 0.1)
|
|
339
|
-
base += churn_score
|
|
340
|
-
|
|
341
|
-
# Role-based boost: runtime roles score higher than auxiliary
|
|
342
|
-
base += _ROLE_SCORE.get(c.role, 0.0)
|
|
343
|
-
|
|
344
|
-
return min(1.0, base)
|
|
345
|
-
|
|
346
319
|
def _rank(self, contracts: list[FileContract], rank_by: RankStrategy) -> list[FileContract]:
|
|
347
320
|
if rank_by == "centrality":
|
|
348
|
-
|
|
349
|
-
return sorted(contracts, key=lambda c: -(c.fan_in + c.fan_out))
|
|
321
|
+
return sorted(contracts, key=lambda c: (-(c.fan_in + c.fan_out), c.path))
|
|
350
322
|
if rank_by == "git-churn":
|
|
351
|
-
return sorted(contracts, key=lambda c: (-c.is_changed, -c.relevance_score))
|
|
352
|
-
# Default: relevance
|
|
353
|
-
return sorted(contracts, key=lambda c: (-c.is_entrypoint, -c.relevance_score))
|
|
323
|
+
return sorted(contracts, key=lambda c: (-c.is_changed, -c.relevance_score, c.path))
|
|
324
|
+
# Default: relevance — path breaks ties deterministically
|
|
325
|
+
return sorted(contracts, key=lambda c: (-c.is_entrypoint, -c.relevance_score, c.path))
|
|
354
326
|
|
|
355
327
|
def _symbol_deep_scan(
|
|
356
328
|
self,
|
|
@@ -359,7 +331,7 @@ class ContractPipeline:
|
|
|
359
331
|
known_paths: set[str],
|
|
360
332
|
entry_paths: set[str],
|
|
361
333
|
changed_files: set[str],
|
|
362
|
-
|
|
334
|
+
engine: RankingEngine,
|
|
363
335
|
) -> list[FileContract]:
|
|
364
336
|
"""Grep-based fallback when the shallow scan missed the defining files.
|
|
365
337
|
|
|
@@ -367,7 +339,7 @@ class ContractPipeline:
|
|
|
367
339
|
extracts contracts for candidates not already processed, then re-applies
|
|
368
340
|
the symbol filter. Fan-in/fan-out are not computed for these contracts.
|
|
369
341
|
"""
|
|
370
|
-
candidates = _find_symbol_files(root, symbol, known_paths,
|
|
342
|
+
candidates = _find_symbol_files(root, symbol, known_paths, engine)
|
|
371
343
|
if not candidates:
|
|
372
344
|
return []
|
|
373
345
|
|
|
@@ -379,7 +351,9 @@ class ContractPipeline:
|
|
|
379
351
|
continue
|
|
380
352
|
contract.is_entrypoint = rel_path in entry_paths
|
|
381
353
|
contract.is_changed = rel_path in changed_files
|
|
382
|
-
|
|
354
|
+
fs = engine.score(rel_path, is_entrypoint=contract.is_entrypoint, is_changed=contract.is_changed)
|
|
355
|
+
contract.relevance_score = fs.display_score
|
|
356
|
+
contract.ranking_reasons = fs.reasons
|
|
383
357
|
extra.append(contract)
|
|
384
358
|
|
|
385
359
|
return _filter_by_symbol(extra, symbol)
|
|
@@ -531,7 +505,7 @@ def _find_symbol_files(
|
|
|
531
505
|
root: Path,
|
|
532
506
|
symbol: str,
|
|
533
507
|
known_paths: set[str],
|
|
534
|
-
|
|
508
|
+
engine: RankingEngine,
|
|
535
509
|
) -> list[str]:
|
|
536
510
|
"""Find source files outside *known_paths* that contain *symbol* as text.
|
|
537
511
|
|
|
@@ -560,7 +534,7 @@ def _find_symbol_files(
|
|
|
560
534
|
if line.startswith("./"):
|
|
561
535
|
line = line[2:]
|
|
562
536
|
line = line.replace("\\", "/")
|
|
563
|
-
if line and line not in known_paths and not
|
|
537
|
+
if line and line not in known_paths and not engine.is_noise(line):
|
|
564
538
|
found.append(line)
|
|
565
539
|
return found
|
|
566
540
|
except Exception:
|
|
@@ -578,7 +552,7 @@ def _find_symbol_files(
|
|
|
578
552
|
rel_str = str(rel).replace("\\", "/")
|
|
579
553
|
except ValueError:
|
|
580
554
|
continue
|
|
581
|
-
if rel_str in known_paths or
|
|
555
|
+
if rel_str in known_paths or engine.is_noise(rel_str):
|
|
582
556
|
continue
|
|
583
557
|
try:
|
|
584
558
|
content = Path(full).read_text(encoding="utf-8", errors="replace")
|
|
@@ -132,6 +132,8 @@ class DocAnalyzer:
|
|
|
132
132
|
records: list[DocRecord] = []
|
|
133
133
|
limitations: list[str] = list(limitations_pre)
|
|
134
134
|
languages: set[str] = set()
|
|
135
|
+
# Track per-language support status for honest reporting
|
|
136
|
+
unsupported_langs: set[str] = set()
|
|
135
137
|
|
|
136
138
|
for relative_path in file_paths:
|
|
137
139
|
abs_path = root / relative_path
|
|
@@ -176,8 +178,18 @@ class DocAnalyzer:
|
|
|
176
178
|
# Unsupported language — D-04: no emitir DocRecord, solo registrar limitation
|
|
177
179
|
limitations.append(f"docs_unavailable:{norm_path}:language={lang}")
|
|
178
180
|
languages.add(lang)
|
|
181
|
+
unsupported_langs.add(lang)
|
|
179
182
|
# NO records.append() here
|
|
180
183
|
|
|
184
|
+
# Build language_coverage: explicit per-language support status
|
|
185
|
+
_SUPPORTED_LANGS = {"python", "javascript", "typescript"}
|
|
186
|
+
lang_coverage: dict[str, str] = {}
|
|
187
|
+
for lang in languages:
|
|
188
|
+
if lang in _SUPPORTED_LANGS:
|
|
189
|
+
lang_coverage[lang] = "supported"
|
|
190
|
+
else:
|
|
191
|
+
lang_coverage[lang] = "unsupported"
|
|
192
|
+
|
|
181
193
|
# Build summary
|
|
182
194
|
symbol_count = sum(1 for r in records if r.kind != "module")
|
|
183
195
|
total_count = len(records)
|
|
@@ -185,6 +197,22 @@ class DocAnalyzer:
|
|
|
185
197
|
if any(r.doc_text and r.doc_text.endswith(self._TRUNCATION_SUFFIX) for r in records):
|
|
186
198
|
truncated = True
|
|
187
199
|
|
|
200
|
+
# Explicit absence signal: scanned files but found nothing
|
|
201
|
+
if total_count == 0 and file_paths:
|
|
202
|
+
limitations.append(
|
|
203
|
+
f"no_docs_found: {len(file_paths)} file(s) scanned, "
|
|
204
|
+
"no docstrings or JSDoc comments found"
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
# Warn explicitly when unsupported languages are present — agents must not
|
|
208
|
+
# assume full coverage when Java/Go/Rust files are in scope but not analyzed.
|
|
209
|
+
if unsupported_langs:
|
|
210
|
+
sorted_unsupported = sorted(unsupported_langs)
|
|
211
|
+
limitations.append(
|
|
212
|
+
f"docs_not_extracted: language(s) {sorted_unsupported} present but not supported; "
|
|
213
|
+
"only Python and JS/TS docstrings are extracted"
|
|
214
|
+
)
|
|
215
|
+
|
|
188
216
|
summary = DocSummary(
|
|
189
217
|
requested=True,
|
|
190
218
|
total_count=total_count,
|
|
@@ -193,6 +221,7 @@ class DocAnalyzer:
|
|
|
193
221
|
depth=depth,
|
|
194
222
|
truncated=truncated,
|
|
195
223
|
limitations=limitations,
|
|
224
|
+
language_coverage=lang_coverage,
|
|
196
225
|
)
|
|
197
226
|
return records, summary
|
|
198
227
|
|