sourcecode 0.28.0__py3-none-any.whl → 0.29.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sourcecode/__init__.py +1 -1
- sourcecode/architecture_analyzer.py +76 -11
- sourcecode/cli.py +203 -1
- sourcecode/confidence_analyzer.py +54 -2
- sourcecode/detectors/heuristic.py +19 -1
- sourcecode/detectors/nodejs.py +21 -11
- sourcecode/detectors/project.py +7 -0
- sourcecode/env_analyzer.py +25 -13
- sourcecode/git_analyzer.py +57 -7
- sourcecode/schema.py +26 -0
- sourcecode/serializer.py +10 -2
- {sourcecode-0.28.0.dist-info → sourcecode-0.29.0.dist-info}/METADATA +1 -1
- {sourcecode-0.28.0.dist-info → sourcecode-0.29.0.dist-info}/RECORD +16 -16
- {sourcecode-0.28.0.dist-info → sourcecode-0.29.0.dist-info}/WHEEL +0 -0
- {sourcecode-0.28.0.dist-info → sourcecode-0.29.0.dist-info}/entry_points.txt +0 -0
- {sourcecode-0.28.0.dist-info → sourcecode-0.29.0.dist-info}/licenses/LICENSE +0 -0
sourcecode/__init__.py
CHANGED
|
@@ -12,6 +12,10 @@ from sourcecode.schema import (
|
|
|
12
12
|
SourceMap,
|
|
13
13
|
)
|
|
14
14
|
|
|
15
|
+
_WORKSPACE_CONFIG_FILES: frozenset[str] = frozenset({
|
|
16
|
+
"turbo.json", "nx.json", "pnpm-workspace.yaml", "lerna.json", "rush.json",
|
|
17
|
+
})
|
|
18
|
+
|
|
15
19
|
_TOOLING_PREFIXES = (
|
|
16
20
|
".claude/",
|
|
17
21
|
".vscode/",
|
|
@@ -34,6 +38,18 @@ _CODE_EXTENSIONS = {
|
|
|
34
38
|
_GENERIC_NAMES = {"utils", "helpers", "common", "shared", "misc", "core", "root", ""}
|
|
35
39
|
|
|
36
40
|
_TEST_DIRS: frozenset[str] = frozenset({"tests", "test", "spec", "specs", "__tests__", "e2e"})
|
|
41
|
+
_BENCHMARK_DIRS: frozenset[str] = frozenset({
|
|
42
|
+
"benchmark", "benchmarks", "bench",
|
|
43
|
+
"example", "examples",
|
|
44
|
+
"demo", "demos",
|
|
45
|
+
"playground", "playgrounds",
|
|
46
|
+
"fixture", "fixtures",
|
|
47
|
+
"sandbox",
|
|
48
|
+
})
|
|
49
|
+
_DOCS_DIRS: frozenset[str] = frozenset({"docs", "doc", "documentation", "wiki"})
|
|
50
|
+
_TOOLING_DIRS: frozenset[str] = frozenset({"scripts", "script", "tools", "tool", "ci"})
|
|
51
|
+
# All dirs that are not part of the runtime source architecture
|
|
52
|
+
_NON_SOURCE_DIRS: frozenset[str] = _TEST_DIRS | _BENCHMARK_DIRS | _DOCS_DIRS | _TOOLING_DIRS
|
|
37
53
|
|
|
38
54
|
# Exact file stems that signal a specific architectural layer
|
|
39
55
|
_LAYER_STEM_EXACT: dict[str, str] = {
|
|
@@ -177,15 +193,35 @@ class ArchitectureAnalyzer:
|
|
|
177
193
|
elif pattern == "unknown":
|
|
178
194
|
limitations.append("Patron de capas no reconocido: estructura de directorios sin senales claras")
|
|
179
195
|
|
|
196
|
+
# Step 3b: monorepo override — workspace config is hard evidence
|
|
197
|
+
if self._has_workspace_config(sm.file_paths) and pattern not in (
|
|
198
|
+
"monorepo", "cqrs", "clean", "onion", "hexagonal"
|
|
199
|
+
):
|
|
200
|
+
mono_layers = self._detect_monorepo_packages(filtered)
|
|
201
|
+
if mono_layers or pattern in (None, "unknown", "flat", "modular", "layered"):
|
|
202
|
+
pattern = "monorepo"
|
|
203
|
+
layers = mono_layers
|
|
204
|
+
limitations.append(
|
|
205
|
+
"Workspace config detectado — arquitectura refleja topologia de paquetes"
|
|
206
|
+
)
|
|
207
|
+
|
|
180
208
|
# Step 4: bounded context inference
|
|
181
209
|
bounded_contexts = self._infer_bounded_contexts(domains, graph)
|
|
182
210
|
|
|
183
|
-
# Overall confidence
|
|
211
|
+
# Overall confidence — based on domain quality, not raw count
|
|
184
212
|
confidence: Literal["high", "medium", "low"]
|
|
213
|
+
strong_domains = [d for d in domains if d.confidence in ("high", "medium")]
|
|
214
|
+
all_layers_weak = layers and all(l.confidence == "low" for l in layers)
|
|
185
215
|
if pattern not in (None, "unknown", "flat"):
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
216
|
+
if all_layers_weak:
|
|
217
|
+
# Layers came from file-naming heuristic only, not directory structure
|
|
218
|
+
confidence = "medium"
|
|
219
|
+
limitations.append(
|
|
220
|
+
"Patron inferido de nombres de archivo — sin estructura de directorios confirmatoria"
|
|
221
|
+
)
|
|
222
|
+
else:
|
|
223
|
+
confidence = "high" if len(strong_domains) >= 3 else "medium"
|
|
224
|
+
elif len(strong_domains) >= 1:
|
|
189
225
|
confidence = "medium"
|
|
190
226
|
else:
|
|
191
227
|
confidence = "low"
|
|
@@ -217,6 +253,10 @@ class ArchitectureAnalyzer:
|
|
|
217
253
|
norm = p.replace("\\", "/")
|
|
218
254
|
if self._is_tooling(norm):
|
|
219
255
|
continue
|
|
256
|
+
# Exclude non-source dirs at every path segment (benchmarks, docs, tests, scripts…)
|
|
257
|
+
parts = norm.split("/")
|
|
258
|
+
if any(part.lower() in _NON_SOURCE_DIRS for part in parts[:-1]):
|
|
259
|
+
continue
|
|
220
260
|
ext = Path(norm).suffix.lower()
|
|
221
261
|
if ext not in _CODE_EXTENSIONS:
|
|
222
262
|
continue
|
|
@@ -250,6 +290,8 @@ class ArchitectureAnalyzer:
|
|
|
250
290
|
for name, files in groups.items():
|
|
251
291
|
if len(files) < 2:
|
|
252
292
|
continue
|
|
293
|
+
if name.lower() in _NON_SOURCE_DIRS:
|
|
294
|
+
continue
|
|
253
295
|
role = DOMAIN_ROLES.get(name, "")
|
|
254
296
|
domain_confidence: Literal["high", "medium", "low"]
|
|
255
297
|
if name in DOMAIN_ROLES:
|
|
@@ -262,10 +304,10 @@ class ArchitectureAnalyzer:
|
|
|
262
304
|
return domains
|
|
263
305
|
|
|
264
306
|
def _detect_layers(self, paths: list[str]) -> tuple[str, list[ArchitectureLayer]]:
|
|
265
|
-
# Exclude
|
|
307
|
+
# Exclude non-source paths (tests, benchmarks, docs, tooling) from layer scoring
|
|
266
308
|
source_paths = [
|
|
267
309
|
p for p in paths
|
|
268
|
-
if not any(part.lower() in
|
|
310
|
+
if not any(part.lower() in _NON_SOURCE_DIRS for part in p.replace("\\", "/").split("/"))
|
|
269
311
|
]
|
|
270
312
|
if not source_paths:
|
|
271
313
|
return "unknown", []
|
|
@@ -360,7 +402,7 @@ class ArchitectureAnalyzer:
|
|
|
360
402
|
parts = p.replace("\\", "/").split("/")
|
|
361
403
|
if len(parts) >= 2 and parts[-1].lower() in _ENTRY_FILES:
|
|
362
404
|
top = parts[0]
|
|
363
|
-
if top.lower() not in _SRC_TRANSPARENT and top.lower() not in
|
|
405
|
+
if top.lower() not in _SRC_TRANSPARENT and top.lower() not in _NON_SOURCE_DIRS:
|
|
364
406
|
entry_dirs.setdefault(top, []).append(p)
|
|
365
407
|
if len(entry_dirs) >= 4:
|
|
366
408
|
return "microservices", [
|
|
@@ -394,7 +436,7 @@ class ArchitectureAnalyzer:
|
|
|
394
436
|
non_empty = {k: v for k, v in layer_files.items() if v}
|
|
395
437
|
if len(non_empty) >= 2:
|
|
396
438
|
return "layered", [
|
|
397
|
-
ArchitectureLayer(name=k, pattern="layered", files=v, confidence="
|
|
439
|
+
ArchitectureLayer(name=k, pattern="layered", files=v, confidence="low")
|
|
398
440
|
for k, v in non_empty.items()
|
|
399
441
|
]
|
|
400
442
|
return None
|
|
@@ -412,19 +454,42 @@ class ArchitectureAnalyzer:
|
|
|
412
454
|
parts = p.replace("\\", "/").split("/")
|
|
413
455
|
for part in parts[:-1]:
|
|
414
456
|
if (part not in _SRC_TRANSPARENT
|
|
415
|
-
and part.lower() not in
|
|
457
|
+
and part.lower() not in _NON_SOURCE_DIRS
|
|
416
458
|
and part.lower() not in _GENERIC_NAMES):
|
|
417
459
|
module_files.setdefault(part, []).append(p)
|
|
418
460
|
break
|
|
419
461
|
|
|
420
|
-
meaningful = {k: v for k, v in module_files.items() if len(v) >=
|
|
462
|
+
meaningful = {k: v for k, v in module_files.items() if len(v) >= 3}
|
|
421
463
|
if len(meaningful) >= 2:
|
|
422
464
|
return "modular", [
|
|
423
|
-
ArchitectureLayer(name=k, pattern="modular", files=v, confidence="
|
|
465
|
+
ArchitectureLayer(name=k, pattern="modular", files=v, confidence="low")
|
|
424
466
|
for k, v in meaningful.items()
|
|
425
467
|
]
|
|
426
468
|
return None
|
|
427
469
|
|
|
470
|
+
def _has_workspace_config(self, file_paths: list[str]) -> bool:
|
|
471
|
+
for path in file_paths:
|
|
472
|
+
parts = path.replace("\\", "/").split("/")
|
|
473
|
+
if len(parts) == 1 and parts[0] in _WORKSPACE_CONFIG_FILES:
|
|
474
|
+
return True
|
|
475
|
+
return False
|
|
476
|
+
|
|
477
|
+
def _detect_monorepo_packages(self, paths: list[str]) -> list[ArchitectureLayer]:
|
|
478
|
+
"""Find workspace packages (packages/*, apps/*, libs/*) in a monorepo."""
|
|
479
|
+
_WORKSPACE_ROOTS = {"packages", "apps", "libs", "applications"}
|
|
480
|
+
groups: dict[str, list[str]] = {}
|
|
481
|
+
for p in paths:
|
|
482
|
+
parts = p.replace("\\", "/").split("/")
|
|
483
|
+
if len(parts) >= 2 and parts[0].lower() in _WORKSPACE_ROOTS:
|
|
484
|
+
key = f"{parts[0]}/{parts[1]}"
|
|
485
|
+
groups.setdefault(key, []).append(p)
|
|
486
|
+
result = [
|
|
487
|
+
ArchitectureLayer(name=k, pattern="monorepo", files=v, confidence="medium")
|
|
488
|
+
for k, v in groups.items()
|
|
489
|
+
if len(v) >= 2
|
|
490
|
+
]
|
|
491
|
+
return result[:16]
|
|
492
|
+
|
|
428
493
|
def _infer_bounded_contexts(
|
|
429
494
|
self,
|
|
430
495
|
domains: list[ArchitectureDomain],
|
sourcecode/cli.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import hashlib
|
|
3
4
|
import json
|
|
4
5
|
import time
|
|
5
6
|
from pathlib import Path
|
|
@@ -9,6 +10,146 @@ import typer
|
|
|
9
10
|
|
|
10
11
|
from sourcecode import __version__
|
|
11
12
|
|
|
13
|
+
|
|
14
|
+
# ---------------------------------------------------------------------------
|
|
15
|
+
# Analyzer fingerprints — short hashes of each analyzer's key rule constants.
|
|
16
|
+
# A change in heuristics, filter lists, or pattern maps changes the hash,
|
|
17
|
+
# making it immediately visible that two runs used different rule versions
|
|
18
|
+
# even if the semver string is the same.
|
|
19
|
+
# ---------------------------------------------------------------------------
|
|
20
|
+
|
|
21
|
+
def _fingerprint(*objects: object) -> str:
|
|
22
|
+
raw = json.dumps([repr(o) for o in objects], sort_keys=True)
|
|
23
|
+
return hashlib.sha256(raw.encode()).hexdigest()[:8]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _compute_analyzer_fingerprints() -> dict[str, str]:
|
|
27
|
+
from sourcecode.detectors.heuristic import (
|
|
28
|
+
_AUXILIARY_DIRS as _HEUR_AUX,
|
|
29
|
+
_ENTRYPOINT_NAMES,
|
|
30
|
+
_EXTENSION_MAP,
|
|
31
|
+
)
|
|
32
|
+
from sourcecode.detectors.nodejs import _FRAMEWORK_MAP, NodejsDetector
|
|
33
|
+
from sourcecode.confidence_analyzer import (
|
|
34
|
+
_AUXILIARY_DIR_PREFIXES,
|
|
35
|
+
_HARD_SOURCES,
|
|
36
|
+
_SOFT_SOURCES,
|
|
37
|
+
)
|
|
38
|
+
from sourcecode.architecture_analyzer import (
|
|
39
|
+
_BENCHMARK_DIRS,
|
|
40
|
+
_NON_SOURCE_DIRS,
|
|
41
|
+
LAYER_PATTERNS,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
return {
|
|
45
|
+
"heuristic": _fingerprint(_EXTENSION_MAP, _ENTRYPOINT_NAMES, sorted(_HEUR_AUX)),
|
|
46
|
+
"nodejs": _fingerprint(_FRAMEWORK_MAP, sorted(NodejsDetector._AUXILIARY_DIRS)),
|
|
47
|
+
"confidence": _fingerprint(sorted(_AUXILIARY_DIR_PREFIXES), sorted(_HARD_SOURCES), sorted(_SOFT_SOURCES)),
|
|
48
|
+
"architecture": _fingerprint(sorted(_BENCHMARK_DIRS), sorted(_NON_SOURCE_DIRS), list(LAYER_PATTERNS.keys())),
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# ---------------------------------------------------------------------------
|
|
53
|
+
# Pipeline trace collector
|
|
54
|
+
# ---------------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
class _TraceCollector:
|
|
57
|
+
"""Lightweight collector for pipeline trace events."""
|
|
58
|
+
|
|
59
|
+
def __init__(self, enabled: bool = False) -> None:
|
|
60
|
+
self._enabled = enabled
|
|
61
|
+
self._events: list[dict[str, Any]] = []
|
|
62
|
+
|
|
63
|
+
def emit(
|
|
64
|
+
self,
|
|
65
|
+
stage: str,
|
|
66
|
+
component: str,
|
|
67
|
+
action: str,
|
|
68
|
+
target: Optional[str] = None,
|
|
69
|
+
reason: Optional[str] = None,
|
|
70
|
+
) -> None:
|
|
71
|
+
if not self._enabled:
|
|
72
|
+
return
|
|
73
|
+
self._events.append({
|
|
74
|
+
"stage": stage,
|
|
75
|
+
"component": component,
|
|
76
|
+
"action": action,
|
|
77
|
+
**({"target": target} if target else {}),
|
|
78
|
+
**({"reason": reason} if reason else {}),
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
def build_trace(self) -> "PipelineTrace":
|
|
82
|
+
from sourcecode.schema import PipelineEvent, PipelineTrace
|
|
83
|
+
events = [
|
|
84
|
+
PipelineEvent(
|
|
85
|
+
stage=e["stage"],
|
|
86
|
+
component=e["component"],
|
|
87
|
+
action=e["action"],
|
|
88
|
+
target=e.get("target"),
|
|
89
|
+
reason=e.get("reason"),
|
|
90
|
+
)
|
|
91
|
+
for e in self._events
|
|
92
|
+
]
|
|
93
|
+
return PipelineTrace(requested=True, events=events)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# ---------------------------------------------------------------------------
|
|
97
|
+
# E2E pipeline coherence check
|
|
98
|
+
# ---------------------------------------------------------------------------
|
|
99
|
+
|
|
100
|
+
def _check_pipeline_coherence(sm: "SourceMap") -> list[str]: # type: ignore[name-defined]
|
|
101
|
+
"""Verify no contradictory states exist between analyzers.
|
|
102
|
+
|
|
103
|
+
Returns a list of human-readable violation strings (empty when clean).
|
|
104
|
+
These are emitted to stderr as [coherence] warnings — never abort a run.
|
|
105
|
+
"""
|
|
106
|
+
issues: list[str] = []
|
|
107
|
+
cs = sm.confidence_summary
|
|
108
|
+
|
|
109
|
+
if cs is not None:
|
|
110
|
+
# overall:high requires at least one manifest-detected stack
|
|
111
|
+
if cs.overall == "high":
|
|
112
|
+
manifest_stacks = [s for s in sm.stacks if s.detection_method != "heuristic"]
|
|
113
|
+
if not manifest_stacks:
|
|
114
|
+
issues.append(
|
|
115
|
+
"[coherence] overall=high but all stacks are heuristic — "
|
|
116
|
+
"downgrade not applied; check confidence_analyzer"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# overall:high requires at least one production entry point
|
|
120
|
+
if cs.overall == "high":
|
|
121
|
+
prod_eps = [
|
|
122
|
+
ep for ep in sm.entry_points
|
|
123
|
+
if ep.entrypoint_type in ("production", None)
|
|
124
|
+
]
|
|
125
|
+
if not prod_eps and sm.entry_points:
|
|
126
|
+
issues.append(
|
|
127
|
+
"[coherence] overall=high but no production entry points exist — "
|
|
128
|
+
"all detected EPs are auxiliary (benchmark/example/dev)"
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# entry_point_confidence must not be high when entry_points is empty
|
|
132
|
+
if cs.entry_point_confidence == "high" and not sm.entry_points:
|
|
133
|
+
issues.append(
|
|
134
|
+
"[coherence] entry_point_confidence=high but entry_points is empty"
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Contradictory EP classification: EPs with entrypoint_type=benchmark must not
|
|
138
|
+
# appear in agent_view output (checked post-facto via produced_by + type)
|
|
139
|
+
benchmark_eps = [
|
|
140
|
+
ep for ep in sm.entry_points
|
|
141
|
+
if ep.entrypoint_type in ("benchmark", "example")
|
|
142
|
+
]
|
|
143
|
+
if benchmark_eps and sm.entry_points and all(
|
|
144
|
+
ep.entrypoint_type in ("benchmark", "example") for ep in sm.entry_points
|
|
145
|
+
):
|
|
146
|
+
issues.append(
|
|
147
|
+
f"[coherence] all {len(sm.entry_points)} entry point(s) are benchmark/example — "
|
|
148
|
+
"no production entry detected; analysis_gaps should reflect impact=high"
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
return issues
|
|
152
|
+
|
|
12
153
|
_HELP = """\
|
|
13
154
|
Deterministic codebase context for AI coding agents.
|
|
14
155
|
|
|
@@ -327,6 +468,11 @@ def main(
|
|
|
327
468
|
"--agent",
|
|
328
469
|
help="Modo agente: output estructurado y sin ruido para consumo por IA. Incluye identidad, entrypoints, arquitectura, dependencias clave, señales operacionales y gaps. Sin arbol de ficheros ni secciones vacias.",
|
|
329
470
|
),
|
|
471
|
+
trace_pipeline: bool = typer.Option(
|
|
472
|
+
False,
|
|
473
|
+
"--trace-pipeline",
|
|
474
|
+
help="Modo trazabilidad: incluye pipeline_trace con candidatos, filtros, descartes y origen de cada dato. Para diagnóstico de contaminación de resultados.",
|
|
475
|
+
),
|
|
330
476
|
) -> None:
|
|
331
477
|
"""Analyze a repository and produce structured context for AI coding agents.
|
|
332
478
|
|
|
@@ -672,7 +818,18 @@ def main(
|
|
|
672
818
|
)
|
|
673
819
|
|
|
674
820
|
# 3. Construir el schema
|
|
675
|
-
|
|
821
|
+
# Compute analyzer fingerprints: short hashes of each analyzer's key rule
|
|
822
|
+
# constants so that a rule change is always visible in the output, regardless
|
|
823
|
+
# of whether the semver was bumped.
|
|
824
|
+
try:
|
|
825
|
+
_fingerprints = _compute_analyzer_fingerprints()
|
|
826
|
+
except Exception:
|
|
827
|
+
_fingerprints = {}
|
|
828
|
+
|
|
829
|
+
metadata = AnalysisMetadata(
|
|
830
|
+
analyzed_path=str(target),
|
|
831
|
+
analyzer_fingerprints=_fingerprints,
|
|
832
|
+
)
|
|
676
833
|
sm = SourceMap(
|
|
677
834
|
metadata=metadata,
|
|
678
835
|
file_tree=file_tree,
|
|
@@ -812,6 +969,51 @@ def main(
|
|
|
812
969
|
_conf_summary, _analysis_gaps = ConfidenceAnalyzer().analyze(sm)
|
|
813
970
|
sm = _replace(sm, confidence_summary=_conf_summary, analysis_gaps=_analysis_gaps)
|
|
814
971
|
|
|
972
|
+
# E2E pipeline coherence check — emits [coherence] warnings to stderr.
|
|
973
|
+
# Catches contradictory states that can survive individual-analyzer validation.
|
|
974
|
+
for _issue in _check_pipeline_coherence(sm):
|
|
975
|
+
typer.echo(_issue, err=True)
|
|
976
|
+
|
|
977
|
+
# Build pipeline trace when --trace-pipeline is set.
|
|
978
|
+
if trace_pipeline:
|
|
979
|
+
_trace = _TraceCollector(enabled=True)
|
|
980
|
+
_trace.emit("scan", "scanner", "complete",
|
|
981
|
+
reason=f"{len(sm.file_paths)} files, {len(manifests)} manifests")
|
|
982
|
+
for _s in sm.stacks:
|
|
983
|
+
_trace.emit("detect", _s.produced_by or "unknown", "emit_stack",
|
|
984
|
+
target=_s.stack,
|
|
985
|
+
reason=f"method={_s.detection_method} confidence={_s.confidence}")
|
|
986
|
+
for _ep in sm.entry_points:
|
|
987
|
+
_trace.emit("detect", _ep.produced_by or "unknown", "emit_ep",
|
|
988
|
+
target=_ep.path,
|
|
989
|
+
reason=f"type={_ep.entrypoint_type} confidence={_ep.confidence} reason={_ep.reason}")
|
|
990
|
+
# Record EPs filtered from agent_view (benchmark/example with path-auxiliary parts)
|
|
991
|
+
_aux_parts = frozenset({
|
|
992
|
+
"benchmark", "benchmarks", "bench", "demo", "demos",
|
|
993
|
+
"example", "examples", "docs", "doc", "fixtures", "fixture",
|
|
994
|
+
})
|
|
995
|
+
for _ep in sm.entry_points:
|
|
996
|
+
_ep_type = _ep.entrypoint_type
|
|
997
|
+
_path_parts = _ep.path.replace("\\", "/").lower().split("/")
|
|
998
|
+
_filtered = (
|
|
999
|
+
_ep_type in ("benchmark", "example")
|
|
1000
|
+
or any(p in _aux_parts for p in _path_parts)
|
|
1001
|
+
)
|
|
1002
|
+
if _filtered:
|
|
1003
|
+
_trace.emit("output", "agent_view", "filter_ep",
|
|
1004
|
+
target=_ep.path,
|
|
1005
|
+
reason=f"entrypoint_type={_ep_type} (auxiliary)")
|
|
1006
|
+
if sm.confidence_summary is not None:
|
|
1007
|
+
_cs = sm.confidence_summary
|
|
1008
|
+
_trace.emit("confidence", "confidence_analyzer", "computed",
|
|
1009
|
+
reason=(
|
|
1010
|
+
f"overall={_cs.overall} "
|
|
1011
|
+
f"stack={_cs.stack_confidence} "
|
|
1012
|
+
f"ep={_cs.entry_point_confidence} "
|
|
1013
|
+
f"anomalies={len(_cs.anomalies)}"
|
|
1014
|
+
))
|
|
1015
|
+
sm = _replace(sm, pipeline_trace=_trace.build_trace())
|
|
1016
|
+
|
|
815
1017
|
# 4. Serializar
|
|
816
1018
|
if agent:
|
|
817
1019
|
data = agent_view(sm)
|
|
@@ -98,6 +98,31 @@ class ConfidenceAnalyzer:
|
|
|
98
98
|
if sm.entry_points and all(ep.confidence == "low" for ep in sm.entry_points):
|
|
99
99
|
anomalies.append("All entry points are low-confidence (heuristic/code_signal only)")
|
|
100
100
|
|
|
101
|
+
# ── Anomaly: all production EPs are convention-only (no manifest evidence) ──
|
|
102
|
+
production_eps_check = [
|
|
103
|
+
ep for ep in sm.entry_points
|
|
104
|
+
if ep.entrypoint_type in ("production", None)
|
|
105
|
+
]
|
|
106
|
+
if production_eps_check and all(
|
|
107
|
+
ep.source in ("convention", "heuristic") or ep.reason in ("convention", "entry_file_pattern")
|
|
108
|
+
for ep in production_eps_check
|
|
109
|
+
):
|
|
110
|
+
anomalies.append(
|
|
111
|
+
"All production entry points inferred from filename conventions only — "
|
|
112
|
+
"no package.json scripts, bin declaration, or manifest reference found"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# ── Anomaly: no production entry points ───────────────────────────────
|
|
116
|
+
if sm.entry_points:
|
|
117
|
+
production_eps = [
|
|
118
|
+
ep for ep in sm.entry_points
|
|
119
|
+
if ep.entrypoint_type in ("production", None)
|
|
120
|
+
]
|
|
121
|
+
if not production_eps:
|
|
122
|
+
anomalies.append(
|
|
123
|
+
"No production entry points — all detected entries are dev/benchmark/example"
|
|
124
|
+
)
|
|
125
|
+
|
|
101
126
|
# ── Gaps ──────────────────────────────────────────────────────────────
|
|
102
127
|
if not sm.entry_points:
|
|
103
128
|
gaps.append(AnalysisGap(
|
|
@@ -105,6 +130,19 @@ class ConfidenceAnalyzer:
|
|
|
105
130
|
reason="No entry point detected — project may use non-standard structure or be a library",
|
|
106
131
|
impact="high",
|
|
107
132
|
))
|
|
133
|
+
elif all(
|
|
134
|
+
ep.entrypoint_type in ("benchmark", "example", "development")
|
|
135
|
+
for ep in sm.entry_points
|
|
136
|
+
):
|
|
137
|
+
gaps.append(AnalysisGap(
|
|
138
|
+
area="entry_points",
|
|
139
|
+
reason=(
|
|
140
|
+
"All detected entry points are auxiliary (benchmark/example/dev) — "
|
|
141
|
+
"no production entry point found. Verify project has a 'start'/'serve' "
|
|
142
|
+
"script or production binary."
|
|
143
|
+
),
|
|
144
|
+
impact="high",
|
|
145
|
+
))
|
|
108
146
|
elif all(ep.confidence == "low" for ep in sm.entry_points):
|
|
109
147
|
gaps.append(AnalysisGap(
|
|
110
148
|
area="entry_points",
|
|
@@ -155,10 +193,24 @@ class ConfidenceAnalyzer:
|
|
|
155
193
|
if manifest_stacks
|
|
156
194
|
else _min_confidence([s.confidence for s in sm.stacks] or ["low"])
|
|
157
195
|
)
|
|
158
|
-
# Entry points:
|
|
159
|
-
|
|
196
|
+
# Entry points: only consider production EPs for confidence scoring.
|
|
197
|
+
# Benchmark/example/dev-only entries are not evidence of production readiness.
|
|
198
|
+
production_eps = [
|
|
199
|
+
ep for ep in sm.entry_points
|
|
200
|
+
if ep.entrypoint_type in ("production", None)
|
|
201
|
+
]
|
|
202
|
+
ep_conf = _max_confidence([ep.confidence for ep in production_eps] or ["low"])
|
|
160
203
|
overall = _min_confidence([stack_conf, ep_conf])
|
|
161
204
|
|
|
205
|
+
# Factor in architecture confidence when available
|
|
206
|
+
arch = sm.architecture
|
|
207
|
+
if arch is not None and arch.requested:
|
|
208
|
+
overall = _min_confidence([overall, arch.confidence])
|
|
209
|
+
if arch.pattern in (None, "unknown"):
|
|
210
|
+
# Architecture could not be inferred — don't let stack alone push to high
|
|
211
|
+
if overall == "high":
|
|
212
|
+
overall = "medium"
|
|
213
|
+
|
|
162
214
|
# Downgrade if gaps are severe
|
|
163
215
|
high_impact_gaps = [g for g in gaps if g.impact == "high"]
|
|
164
216
|
if high_impact_gaps:
|
|
@@ -26,11 +26,27 @@ _EXTENSION_MAP = {
|
|
|
26
26
|
_ENTRYPOINT_NAMES = {
|
|
27
27
|
"main.py": ("python", "script"),
|
|
28
28
|
"app.py": ("python", "app"),
|
|
29
|
-
|
|
29
|
+
# index.js excluded: ambiguous (library export vs server); nodejs detector handles it
|
|
30
30
|
"main.go": ("go", "binary"),
|
|
31
31
|
"main.rs": ("rust", "binary"),
|
|
32
32
|
}
|
|
33
33
|
|
|
34
|
+
_AUXILIARY_DIRS: frozenset[str] = frozenset({
|
|
35
|
+
"benchmark", "benchmarks", "bench",
|
|
36
|
+
"example", "examples",
|
|
37
|
+
"demo", "demos",
|
|
38
|
+
"playground", "playgrounds",
|
|
39
|
+
"fixture", "fixtures", "mock", "mocks",
|
|
40
|
+
"sandbox", "e2e", "docs", "doc", "documentation",
|
|
41
|
+
"test", "tests", "spec", "specs", "__tests__",
|
|
42
|
+
"scripts", "script", "tools", "tool", "tooling", "ci",
|
|
43
|
+
})
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _is_auxiliary_path(path: str) -> bool:
|
|
47
|
+
parts = path.replace("\\", "/").split("/")
|
|
48
|
+
return any(p.lower() in _AUXILIARY_DIRS for p in parts)
|
|
49
|
+
|
|
34
50
|
|
|
35
51
|
class HeuristicDetector(AbstractDetector):
|
|
36
52
|
name = "heuristic"
|
|
@@ -62,6 +78,8 @@ class HeuristicDetector(AbstractDetector):
|
|
|
62
78
|
|
|
63
79
|
entry_points: list[EntryPoint] = []
|
|
64
80
|
for path in paths:
|
|
81
|
+
if _is_auxiliary_path(path):
|
|
82
|
+
continue
|
|
65
83
|
filename = path.rsplit("/", 1)[-1]
|
|
66
84
|
if filename in _ENTRYPOINT_NAMES:
|
|
67
85
|
stack, kind = _ENTRYPOINT_NAMES[filename]
|
sourcecode/detectors/nodejs.py
CHANGED
|
@@ -187,17 +187,22 @@ class NodejsDetector(AbstractDetector):
|
|
|
187
187
|
if isinstance(main, str) and main.strip():
|
|
188
188
|
path = main.strip()
|
|
189
189
|
if path not in seen and path_exists_in_tree(context.file_tree, path):
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
190
|
+
if not self._is_auxiliary_path(path):
|
|
191
|
+
seen.add(path)
|
|
192
|
+
entry_points.append(EntryPoint(
|
|
193
|
+
path=path,
|
|
194
|
+
stack="nodejs",
|
|
195
|
+
kind="module",
|
|
196
|
+
source="package.json",
|
|
197
|
+
confidence="high",
|
|
198
|
+
reason="main",
|
|
199
|
+
evidence="declared in package.json main field",
|
|
200
|
+
entrypoint_type="production",
|
|
201
|
+
))
|
|
199
202
|
|
|
200
203
|
# Priority 4: filename conventions (last resort — penalize auxiliary dirs)
|
|
204
|
+
is_monorepo = bool(self._detect_monorepo_signals(context, package_json))
|
|
205
|
+
_INDEX_PATHS = {"src/index.js", "src/index.ts", "index.js", "index.ts"}
|
|
201
206
|
for path in [
|
|
202
207
|
"server.js", "server.ts",
|
|
203
208
|
"src/index.js", "src/index.ts",
|
|
@@ -206,14 +211,19 @@ class NodejsDetector(AbstractDetector):
|
|
|
206
211
|
]:
|
|
207
212
|
if path in seen or not path_exists_in_tree(context.file_tree, path):
|
|
208
213
|
continue
|
|
214
|
+
# In monorepos, root/src index files are package exports, not run targets
|
|
215
|
+
if is_monorepo and path in _INDEX_PATHS:
|
|
216
|
+
continue
|
|
209
217
|
ep_type = self._path_entrypoint_type(path)
|
|
210
|
-
|
|
218
|
+
is_index = path.split("/")[-1] in ("index.js", "index.ts")
|
|
219
|
+
kind = "module" if is_index else ("web" if path.startswith(("app/", "pages/")) else "server")
|
|
220
|
+
confidence = "low" # convention only — no script or bin declaration
|
|
211
221
|
entry_points.append(EntryPoint(
|
|
212
222
|
path=path,
|
|
213
223
|
stack="nodejs",
|
|
214
224
|
kind=kind,
|
|
215
225
|
source="convention",
|
|
216
|
-
confidence=
|
|
226
|
+
confidence=confidence,
|
|
217
227
|
reason="convention",
|
|
218
228
|
entrypoint_type=ep_type,
|
|
219
229
|
))
|
sourcecode/detectors/project.py
CHANGED
|
@@ -48,6 +48,12 @@ class ProjectDetector:
|
|
|
48
48
|
continue
|
|
49
49
|
|
|
50
50
|
stacks, entry_points = detector.detect(context)
|
|
51
|
+
# Stamp provenance: every emitted stack and EP knows which detector produced it
|
|
52
|
+
for item in stacks:
|
|
53
|
+
item.produced_by = detector.name
|
|
54
|
+
for item in entry_points:
|
|
55
|
+
item.produced_by = detector.name
|
|
56
|
+
|
|
51
57
|
for stack in stacks:
|
|
52
58
|
existing = merged_stacks.get(stack.stack)
|
|
53
59
|
if existing is None:
|
|
@@ -103,6 +109,7 @@ class ProjectDetector:
|
|
|
103
109
|
root=stack.root,
|
|
104
110
|
workspace=stack.workspace,
|
|
105
111
|
signals=list(stack.signals),
|
|
112
|
+
produced_by=stack.produced_by,
|
|
106
113
|
)
|
|
107
114
|
|
|
108
115
|
def _merge_stack(self, current: StackDetection, incoming: StackDetection) -> StackDetection:
|
sourcecode/env_analyzer.py
CHANGED
|
@@ -31,6 +31,14 @@ _SPRING_CONF_PROFILE_RE = re.compile(r'^application-[a-z0-9_-]+\.(properties|ya?
|
|
|
31
31
|
# Matches ${ENV_VAR} or ${ENV_VAR:default} where ENV_VAR is UPPER_SNAKE_CASE
|
|
32
32
|
_SPRING_ENV_REF_RE = re.compile(r'\$\{([A-Z][A-Z0-9_]*)(?::[^}]*)?\}')
|
|
33
33
|
|
|
34
|
+
# Patterns where absence of the variable causes a hard runtime error (not just None/null).
|
|
35
|
+
# py_environ_bracket → os.environ["KEY"] raises KeyError
|
|
36
|
+
# java_spring_value → Spring fails to start if ${KEY} has no default
|
|
37
|
+
_HARD_REQUIRED_PATTERNS: frozenset[str] = frozenset({
|
|
38
|
+
"py_environ_bracket",
|
|
39
|
+
"java_spring_value",
|
|
40
|
+
})
|
|
41
|
+
|
|
34
42
|
# (pattern_id, compiled_regex)
|
|
35
43
|
# Grupos de captura: group(1)=key, group(2)=default si existe
|
|
36
44
|
_PATTERNS: list[tuple[str, re.Pattern]] = [
|
|
@@ -132,9 +140,9 @@ def _infer_type_hint(key: str) -> str:
|
|
|
132
140
|
def _scan_file(
|
|
133
141
|
path: Path,
|
|
134
142
|
rel_path: str,
|
|
135
|
-
findings: dict[str, list[tuple[str, Optional[str]]]],
|
|
143
|
+
findings: dict[str, list[tuple[str, Optional[str], bool]]],
|
|
136
144
|
) -> None:
|
|
137
|
-
"""Escanea un fichero
|
|
145
|
+
"""Escanea un fichero y acumula hallazgos en findings[key] = [(file_ref, default, is_hard)]."""
|
|
138
146
|
try:
|
|
139
147
|
size = path.stat().st_size
|
|
140
148
|
if size > _MAX_FILE_SIZE:
|
|
@@ -143,8 +151,8 @@ def _scan_file(
|
|
|
143
151
|
except OSError:
|
|
144
152
|
return
|
|
145
153
|
|
|
146
|
-
|
|
147
|
-
|
|
154
|
+
for pattern_id, regex in _PATTERNS:
|
|
155
|
+
is_hard = pattern_id in _HARD_REQUIRED_PATTERNS
|
|
148
156
|
for m in regex.finditer(content):
|
|
149
157
|
key = m.group(1)
|
|
150
158
|
if not key:
|
|
@@ -158,10 +166,9 @@ def _scan_file(
|
|
|
158
166
|
except IndexError:
|
|
159
167
|
pass
|
|
160
168
|
|
|
161
|
-
# Compute 1-based line number
|
|
162
169
|
line_num = content.count("\n", 0, m.start()) + 1
|
|
163
170
|
file_ref = f"{rel_path}:{line_num}"
|
|
164
|
-
findings[key].append((file_ref, default))
|
|
171
|
+
findings[key].append((file_ref, default, is_hard))
|
|
165
172
|
|
|
166
173
|
|
|
167
174
|
def _parse_env_example(
|
|
@@ -211,7 +218,8 @@ def _parse_spring_config(
|
|
|
211
218
|
for m in _SPRING_ENV_REF_RE.finditer(content):
|
|
212
219
|
key = m.group(1)
|
|
213
220
|
line_num = content.count("\n", 0, m.start()) + 1
|
|
214
|
-
|
|
221
|
+
# Spring fails to start if a referenced env var has no default → hard required
|
|
222
|
+
findings[key].append((f"{rel_path}:{line_num}", None, True))
|
|
215
223
|
|
|
216
224
|
|
|
217
225
|
class EnvAnalyzer:
|
|
@@ -224,8 +232,8 @@ class EnvAnalyzer:
|
|
|
224
232
|
) -> tuple[list, object]:
|
|
225
233
|
from sourcecode.schema import EnvSummary, EnvVarRecord
|
|
226
234
|
|
|
227
|
-
# findings[key] = list of (file_ref, default_or_None)
|
|
228
|
-
findings: dict[str, list[tuple[str, Optional[str]]]] = defaultdict(list)
|
|
235
|
+
# findings[key] = list of (file_ref, default_or_None, is_hard_required)
|
|
236
|
+
findings: dict[str, list[tuple[str, Optional[str], bool]]] = defaultdict(list)
|
|
229
237
|
example_entries: list[tuple[str, Optional[str], Optional[str]]] = []
|
|
230
238
|
example_files_found: list[str] = []
|
|
231
239
|
limitations: list[str] = []
|
|
@@ -240,12 +248,16 @@ class EnvAnalyzer:
|
|
|
240
248
|
if len(records) >= _MAX_KEYS:
|
|
241
249
|
limitations.append(f"key_limit_reached:{_MAX_KEYS}")
|
|
242
250
|
break
|
|
243
|
-
defaults = [d for _, d in refs if d is not None]
|
|
244
|
-
required
|
|
251
|
+
defaults = [d for _, d, _ in refs if d is not None]
|
|
252
|
+
# required only when access pattern causes a hard runtime error if missing:
|
|
253
|
+
# os.environ["KEY"] (KeyError) or Spring @Value/${KEY} without default.
|
|
254
|
+
# os.getenv("KEY") / os.environ.get("KEY") return None — not hard required.
|
|
255
|
+
has_hard_access = any(is_hard for _, _, is_hard in refs)
|
|
256
|
+
required = has_hard_access and not defaults
|
|
245
257
|
default_val = defaults[0] if defaults else None
|
|
246
258
|
unique_files: list[str] = []
|
|
247
259
|
seen: set[str] = set()
|
|
248
|
-
for file_ref, _ in refs:
|
|
260
|
+
for file_ref, _, _ in refs:
|
|
249
261
|
if file_ref not in seen:
|
|
250
262
|
seen.add(file_ref)
|
|
251
263
|
unique_files.append(file_ref)
|
|
@@ -271,7 +283,7 @@ class EnvAnalyzer:
|
|
|
271
283
|
break
|
|
272
284
|
records[key] = EnvVarRecord(
|
|
273
285
|
key=key,
|
|
274
|
-
required=
|
|
286
|
+
required=False, # .env.example documents presence; hard required needs a code access pattern
|
|
275
287
|
default=example_default,
|
|
276
288
|
type_hint=_infer_type_hint(key),
|
|
277
289
|
category=_infer_category(key),
|
sourcecode/git_analyzer.py
CHANGED
|
@@ -12,6 +12,34 @@ _MAX_CONTRIBUTORS = 20
|
|
|
12
12
|
|
|
13
13
|
_DATE_PATTERN = re.compile(r"^\d{4}-\d{2}-\d{2}T")
|
|
14
14
|
|
|
15
|
+
_RELEASE_COMMIT_RE = re.compile(
|
|
16
|
+
r"^(?:chore(?:\(release\))?[:\s]|release[:\s]|bump[:\s]|version[:\s]"
|
|
17
|
+
r"|Merge pull request\s|Bumps?\s\w"
|
|
18
|
+
r"|v?\d+\.\d+\.\d+)",
|
|
19
|
+
re.IGNORECASE,
|
|
20
|
+
)
|
|
21
|
+
# Matches version-bump phrases anywhere in the commit subject (multilingual)
|
|
22
|
+
_RELEASE_COMMIT_CONTAINS_RE = re.compile(
|
|
23
|
+
r"subiendo a v?[\d.]" # Spanish: "subiendo a v.0.28.0"
|
|
24
|
+
r"|bumping to v?[\d.]"
|
|
25
|
+
r"|preparing (?:v|release)[\d. ]"
|
|
26
|
+
r"|releasing v?[\d.]"
|
|
27
|
+
r"|cut v?[\d.]"
|
|
28
|
+
r"|\bv\d+\.\d+\.\d+\b", # bare version tag in middle of message
|
|
29
|
+
re.IGNORECASE,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# Files changed by release bots / version bumps — exclude from semantic hotspots
|
|
33
|
+
_HOTSPOT_ADMIN_FILENAMES: frozenset[str] = frozenset({
|
|
34
|
+
"CHANGELOG.md", "CHANGELOG", "CHANGES.md", "CHANGES", "HISTORY.md",
|
|
35
|
+
"RELEASE.md", "RELEASES.md", "RELEASE_NOTES.md", "CHANGELOG.rst", "NEWS.md", "NEWS.rst",
|
|
36
|
+
"VERSION", "VERSION.txt", "version.txt", ".version",
|
|
37
|
+
"package-lock.json", "yarn.lock", "pnpm-lock.yaml", "bun.lockb",
|
|
38
|
+
"Cargo.lock", "poetry.lock", "Pipfile.lock", "composer.lock",
|
|
39
|
+
"go.sum", "Gemfile.lock",
|
|
40
|
+
})
|
|
41
|
+
_HOTSPOT_ADMIN_SUFFIXES: tuple[str, ...] = (".lock", ".snap", ".min.js", ".min.css")
|
|
42
|
+
|
|
15
43
|
|
|
16
44
|
def _run_git(args: list[str], cwd: Path, timeout: int = 15) -> tuple[str, int]:
|
|
17
45
|
result = subprocess.run(
|
|
@@ -87,7 +115,7 @@ class GitAnalyzer:
|
|
|
87
115
|
"log",
|
|
88
116
|
f"--since={days} days ago",
|
|
89
117
|
"--name-only",
|
|
90
|
-
"--pretty=format
|
|
118
|
+
"--pretty=format:__HOTSPOT__|%aI|%s",
|
|
91
119
|
],
|
|
92
120
|
path,
|
|
93
121
|
timeout=30,
|
|
@@ -162,23 +190,45 @@ def _parse_commits(output: str) -> list:
|
|
|
162
190
|
return commits
|
|
163
191
|
|
|
164
192
|
|
|
193
|
+
def _is_hotspot_admin(path: str) -> bool:
|
|
194
|
+
"""True for files that are noisy from release/bot commits, not semantic changes."""
|
|
195
|
+
filename = path.rsplit("/", 1)[-1]
|
|
196
|
+
if filename in _HOTSPOT_ADMIN_FILENAMES:
|
|
197
|
+
return True
|
|
198
|
+
for suffix in _HOTSPOT_ADMIN_SUFFIXES:
|
|
199
|
+
if filename.endswith(suffix):
|
|
200
|
+
return True
|
|
201
|
+
return False
|
|
202
|
+
|
|
203
|
+
|
|
165
204
|
def _parse_hotspots(output: str) -> list:
|
|
166
205
|
from sourcecode.schema import ChangeHotspot
|
|
167
206
|
|
|
168
207
|
file_counts: Counter = Counter()
|
|
169
208
|
file_last_date: dict[str, str] = {}
|
|
170
209
|
current_date = ""
|
|
210
|
+
skip_commit = False
|
|
171
211
|
|
|
172
212
|
for line in output.splitlines():
|
|
173
213
|
line = line.strip()
|
|
174
214
|
if not line:
|
|
175
215
|
continue
|
|
176
|
-
if
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
216
|
+
if line.startswith("__HOTSPOT__|"):
|
|
217
|
+
parts = line.split("|", 2)
|
|
218
|
+
current_date = parts[1][:10] if len(parts) > 1 else ""
|
|
219
|
+
subject = parts[2] if len(parts) > 2 else ""
|
|
220
|
+
skip_commit = (
|
|
221
|
+
bool(_RELEASE_COMMIT_RE.match(subject))
|
|
222
|
+
or bool(_RELEASE_COMMIT_CONTAINS_RE.search(subject))
|
|
223
|
+
)
|
|
224
|
+
continue
|
|
225
|
+
if skip_commit:
|
|
226
|
+
continue
|
|
227
|
+
if _is_hotspot_admin(line):
|
|
228
|
+
continue
|
|
229
|
+
file_counts[line] += 1
|
|
230
|
+
if line not in file_last_date and current_date:
|
|
231
|
+
file_last_date[line] = current_date
|
|
182
232
|
|
|
183
233
|
return [
|
|
184
234
|
ChangeHotspot(
|
sourcecode/schema.py
CHANGED
|
@@ -33,6 +33,7 @@ class AnalysisMetadata:
|
|
|
33
33
|
generated_at: str = field(default_factory=_now_utc)
|
|
34
34
|
sourcecode_version: str = field(default_factory=_sourcecode_version)
|
|
35
35
|
analyzed_path: str = ""
|
|
36
|
+
analyzer_fingerprints: dict[str, str] = field(default_factory=dict)
|
|
36
37
|
|
|
37
38
|
|
|
38
39
|
@dataclass
|
|
@@ -59,6 +60,7 @@ class StackDetection:
|
|
|
59
60
|
root: Optional[str] = None
|
|
60
61
|
workspace: Optional[str] = None
|
|
61
62
|
signals: list[str] = field(default_factory=list)
|
|
63
|
+
produced_by: Optional[str] = None # which detector emitted this
|
|
62
64
|
|
|
63
65
|
|
|
64
66
|
@dataclass
|
|
@@ -73,6 +75,7 @@ class EntryPoint:
|
|
|
73
75
|
reason: Optional[str] = None # console_script | entry_file_pattern | main_guard | typer_app | heuristic | convention
|
|
74
76
|
evidence: Optional[str] = None # brief evidence string
|
|
75
77
|
entrypoint_type: Optional[Literal["production", "development", "benchmark", "example"]] = None
|
|
78
|
+
produced_by: Optional[str] = None # which detector emitted this
|
|
76
79
|
|
|
77
80
|
|
|
78
81
|
@dataclass
|
|
@@ -462,6 +465,27 @@ class ContextSummary:
|
|
|
462
465
|
coupling_notes: list[str] = field(default_factory=list) # "2 import cycles", "hub: schema.py"
|
|
463
466
|
|
|
464
467
|
|
|
468
|
+
# --- Pipeline Trace ---
|
|
469
|
+
|
|
470
|
+
@dataclass
|
|
471
|
+
class PipelineEvent:
|
|
472
|
+
"""Single event in the pipeline trace."""
|
|
473
|
+
|
|
474
|
+
stage: str # "scan" | "detect" | "merge" | "confidence" | "output"
|
|
475
|
+
component: str # detector name or analyzer name
|
|
476
|
+
action: str # "emit_stack" | "emit_ep" | "filter_ep" | "discard_ep" | "computed"
|
|
477
|
+
target: Optional[str] = None # path or stack name
|
|
478
|
+
reason: Optional[str] = None # human-readable explanation
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
@dataclass
|
|
482
|
+
class PipelineTrace:
|
|
483
|
+
"""Full trace of what each pipeline stage produced or discarded."""
|
|
484
|
+
|
|
485
|
+
requested: bool = False
|
|
486
|
+
events: list[PipelineEvent] = field(default_factory=list)
|
|
487
|
+
|
|
488
|
+
|
|
465
489
|
# --- Confidence & Explainability ---
|
|
466
490
|
|
|
467
491
|
@dataclass
|
|
@@ -585,3 +609,5 @@ class SourceMap:
|
|
|
585
609
|
context_summary: Optional[ContextSummary] = None
|
|
586
610
|
# Runtime architecture (v0.26.0)
|
|
587
611
|
monorepo_packages: list[MonorepoPackageInfo] = field(default_factory=list)
|
|
612
|
+
# Pipeline trace (v0.29.0) — populated only when --trace-pipeline is passed
|
|
613
|
+
pipeline_trace: Optional[PipelineTrace] = None
|
sourcecode/serializer.py
CHANGED
|
@@ -410,7 +410,9 @@ def agent_view(sm: SourceMap) -> dict[str, Any]:
|
|
|
410
410
|
|
|
411
411
|
result: dict[str, Any] = {"project": project}
|
|
412
412
|
|
|
413
|
-
# ── 2. Entry points: production/runtime first
|
|
413
|
+
# ── 2. Entry points: production/runtime first; benchmark/example always excluded ──
|
|
414
|
+
# Never fall back to auxiliary-only EPs — when no operational EP exists the
|
|
415
|
+
# confidence_summary anomaly and analysis_gaps explain the gap instead.
|
|
414
416
|
if sm.entry_points:
|
|
415
417
|
_ep_skip = {"workspace"}
|
|
416
418
|
_aux_parts = frozenset({
|
|
@@ -435,7 +437,10 @@ def agent_view(sm: SourceMap) -> dict[str, Any]:
|
|
|
435
437
|
]
|
|
436
438
|
all_ep.sort(key=_ep_priority)
|
|
437
439
|
operational_ep = [ep for ep in all_ep if _ep_priority(ep) < 5]
|
|
438
|
-
|
|
440
|
+
if operational_ep:
|
|
441
|
+
result["entry_points"] = operational_ep
|
|
442
|
+
# When operational_ep is empty: omit key entirely.
|
|
443
|
+
# confidence_summary.anomalies + analysis_gaps carry the explanation.
|
|
439
444
|
|
|
440
445
|
# ── 3. Architecture ───────────────────────────────────────────────────────
|
|
441
446
|
if sm.architecture_summary:
|
|
@@ -619,6 +624,9 @@ def standard_view(sm: SourceMap, *, include_tree: bool = False) -> dict[str, Any
|
|
|
619
624
|
result["file_tree"] = sm.file_tree
|
|
620
625
|
result["file_paths"] = sm.file_paths
|
|
621
626
|
|
|
627
|
+
if sm.pipeline_trace is not None and sm.pipeline_trace.requested:
|
|
628
|
+
result["pipeline_trace"] = asdict(sm.pipeline_trace)
|
|
629
|
+
|
|
622
630
|
return result
|
|
623
631
|
|
|
624
632
|
|
|
@@ -1,16 +1,16 @@
|
|
|
1
|
-
sourcecode/__init__.py,sha256=
|
|
2
|
-
sourcecode/architecture_analyzer.py,sha256=
|
|
1
|
+
sourcecode/__init__.py,sha256=n13VPZwhm9Du2TeJhJzTRI5m_7dP6mgpjdq-5NmlVIw,100
|
|
2
|
+
sourcecode/architecture_analyzer.py,sha256=SBRMWJN70M2qeNLkm9oCG_1rw2UOVuNgikyeAHJsXKw,22859
|
|
3
3
|
sourcecode/architecture_summary.py,sha256=qolHmn6MWUIQHzY9WeHcfN41EJkQdnPQ5F_Z8pqQasA,20251
|
|
4
4
|
sourcecode/classifier.py,sha256=Ft_RfYS-KOe0t7vjgUx04OoCJd1-DXK7k9-I0CFDSnU,6934
|
|
5
|
-
sourcecode/cli.py,sha256=
|
|
5
|
+
sourcecode/cli.py,sha256=4yXSF9UkQsMWU26ySO4-7Zs0B224AxbrdYGr2Urag1k,51120
|
|
6
6
|
sourcecode/code_notes_analyzer.py,sha256=rRd8bFYV0krjlxxQV0wenwE9K7pVpUQSR7KvSvUQKw4,9226
|
|
7
|
-
sourcecode/confidence_analyzer.py,sha256=
|
|
7
|
+
sourcecode/confidence_analyzer.py,sha256=pzWeHTMz9ZYCuogYcTagpIJuLY-1SOSPTjsiE77rPek,11336
|
|
8
8
|
sourcecode/context_summarizer.py,sha256=CiQrfBEzun949bWvmLabWoj2HhPn6Lw62ofqnsy0FlQ,6503
|
|
9
9
|
sourcecode/coverage_parser.py,sha256=q0LeZJaX1bnntLu-ImksdBsMlpsVmk_iUfSaB4eaJGo,19702
|
|
10
10
|
sourcecode/dependency_analyzer.py,sha256=Exq0BfInvfS5iAg9xAr6WI2uPNuotkIudTKcYJcRhB8,52757
|
|
11
11
|
sourcecode/doc_analyzer.py,sha256=Ec3orx6vBKsh5cNM3-F4y2Got2KuKx8w3dErwtdtM-A,19891
|
|
12
|
-
sourcecode/env_analyzer.py,sha256=
|
|
13
|
-
sourcecode/git_analyzer.py,sha256=
|
|
12
|
+
sourcecode/env_analyzer.py,sha256=slvq-eT24RVMNczLNDlZbe0hU8JXIIPxybqubvrrnSQ,14409
|
|
13
|
+
sourcecode/git_analyzer.py,sha256=saI5wtHBEOXBhdk7SrVR7ArSM6MFkyGgukvGRuD9WRc,9638
|
|
14
14
|
sourcecode/graph_analyzer.py,sha256=hMOsLLz9B0UnQ4xwbHdgr3bFvqpw0bQ8kN-xmEn3Krk,64156
|
|
15
15
|
sourcecode/metrics_analyzer.py,sha256=4uh11v-Q0gdrN87BOxuFWUym3N3AOkOuy21K5N8peB8,20126
|
|
16
16
|
sourcecode/prepare_context.py,sha256=--lD2dhNkBYI8kwb14d1DlFmEN8XF1Ygtf0Qk7-Y1Bs,30911
|
|
@@ -18,9 +18,9 @@ sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
|
|
|
18
18
|
sourcecode/relevance_scorer.py,sha256=2yvxDFnz9YGrHEJubgx9soiVIDZHKv_pntOtTARtKow,5928
|
|
19
19
|
sourcecode/runtime_classifier.py,sha256=zWX3r3HCKHc-qtIobErOa8aKMmaoPYREtJKvPcBGPjQ,14792
|
|
20
20
|
sourcecode/scanner.py,sha256=aM3h9-DCQ3xKpeHpHYdo2vX6T5P95HA_YwZbkAVNwmo,8288
|
|
21
|
-
sourcecode/schema.py,sha256=
|
|
21
|
+
sourcecode/schema.py,sha256=yCBhYKbF6axqMgl6F1XGk7xLl6uDJNZafwSSNMtxSa8,20222
|
|
22
22
|
sourcecode/semantic_analyzer.py,sha256=asQfJf-EhzYaOTA-iMuZsrVXtbW7SV2WEKCxgsxa88Y,79413
|
|
23
|
-
sourcecode/serializer.py,sha256=
|
|
23
|
+
sourcecode/serializer.py,sha256=ZTnMbVnLSdplLK-NOdqH6GSi4v6zLgxm5i69VR9vS2Q,27199
|
|
24
24
|
sourcecode/summarizer.py,sha256=YfBixsN1zWHHXdOEqaf793BylbJrsj75ST7FN6jcqRU,15424
|
|
25
25
|
sourcecode/tree_utils.py,sha256=Fj9OIuUksBvgibNd3feog0sMDjVypJzPexp5lvMoYWI,1424
|
|
26
26
|
sourcecode/workspace.py,sha256=fQlVoNx8S-fSHpKoJ0JBvEHCFkxszH0KZVJed1i3TRk,6845
|
|
@@ -31,14 +31,14 @@ sourcecode/detectors/dart.py,sha256=QbqaL5v18-_ort75HihVBt8MsKUfOcFDF8IpWFLiXpI,
|
|
|
31
31
|
sourcecode/detectors/dotnet.py,sha256=oi8zq3AfUItlK3h_qM81vOe1ZVTIU9LBKIlIrRDuqOs,6864
|
|
32
32
|
sourcecode/detectors/elixir.py,sha256=jCpvt5Yi6jvplc80ovRtWh17q-11ZGo9qX7o8b57TJE,1713
|
|
33
33
|
sourcecode/detectors/go.py,sha256=2r66uRQfeTWsqxr4HDhT6vExZErby0t46QXLHVBRv9w,2782
|
|
34
|
-
sourcecode/detectors/heuristic.py,sha256=
|
|
34
|
+
sourcecode/detectors/heuristic.py,sha256=Hab_Uiuxtq-WBs_wCnzETBS5hhaxeEtf-GOGMH63cv0,2887
|
|
35
35
|
sourcecode/detectors/hybrid.py,sha256=IGFRUVsAZ1ooRlFdznCeJAV6vy1yVDx-VyghvLtddXc,9101
|
|
36
36
|
sourcecode/detectors/java.py,sha256=cZvB13cqJ76zHDncEG-TOCuK8gJjJN2mZGS2DGEcZy8,7715
|
|
37
37
|
sourcecode/detectors/jvm_ext.py,sha256=EgHJ5W8EE-ZTN9V607mVzohyKgZE8Mc2jCi-DF8RAZU,2616
|
|
38
|
-
sourcecode/detectors/nodejs.py,sha256=
|
|
38
|
+
sourcecode/detectors/nodejs.py,sha256=cqLw3SJ1UcImZD5_DVobByacWRD4ftIlep_mYo9z-n8,11260
|
|
39
39
|
sourcecode/detectors/parsers.py,sha256=ugPg8yNUf0Ai1gA7Fnn6wAkYGFjTxRodSP3IeViYJJ4,2290
|
|
40
40
|
sourcecode/detectors/php.py,sha256=W_AQD0WMVDdWHa9h_ilX6W8XSpz0X4ctpMK2WXfXf1I,1887
|
|
41
|
-
sourcecode/detectors/project.py,sha256=
|
|
41
|
+
sourcecode/detectors/project.py,sha256=egFUnHC93xFfb-ikGCIOSkRdyP52qytDx9W7pGkX0MY,6525
|
|
42
42
|
sourcecode/detectors/python.py,sha256=i2_Wtk_p0BJx5R8gBQ8NaQByzJ8zEfZkw9NNpKlvOYM,10486
|
|
43
43
|
sourcecode/detectors/ruby.py,sha256=Q4B5ePAw6-T4DLfanKJiuLHLqUigTPVrzylcXJMei3M,1591
|
|
44
44
|
sourcecode/detectors/rust.py,sha256=Tij1vz8BFZ332GEvVkL6vyMli2OMHJfHyDAppWfe66c,3557
|
|
@@ -51,8 +51,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
|
|
|
51
51
|
sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
|
|
52
52
|
sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
|
|
53
53
|
sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
|
|
54
|
-
sourcecode-0.
|
|
55
|
-
sourcecode-0.
|
|
56
|
-
sourcecode-0.
|
|
57
|
-
sourcecode-0.
|
|
58
|
-
sourcecode-0.
|
|
54
|
+
sourcecode-0.29.0.dist-info/METADATA,sha256=FeC2-4TQTNZRw7iXfUDelfEv7Art515YgQkW4ZB68AA,25020
|
|
55
|
+
sourcecode-0.29.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
56
|
+
sourcecode-0.29.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
|
|
57
|
+
sourcecode-0.29.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
|
|
58
|
+
sourcecode-0.29.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|