sourcecode 0.29.0__py3-none-any.whl → 0.30.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sourcecode/__init__.py +1 -1
- sourcecode/architecture_summary.py +4 -8
- sourcecode/classifier.py +5 -1
- sourcecode/cli.py +17 -29
- sourcecode/confidence_analyzer.py +29 -16
- sourcecode/detectors/nodejs.py +49 -15
- sourcecode/entrypoint_classifier.py +106 -0
- sourcecode/schema.py +6 -4
- sourcecode/serializer.py +80 -63
- {sourcecode-0.29.0.dist-info → sourcecode-0.30.0.dist-info}/METADATA +1 -1
- {sourcecode-0.29.0.dist-info → sourcecode-0.30.0.dist-info}/RECORD +14 -13
- {sourcecode-0.29.0.dist-info → sourcecode-0.30.0.dist-info}/WHEEL +0 -0
- {sourcecode-0.29.0.dist-info → sourcecode-0.30.0.dist-info}/entry_points.txt +0 -0
- {sourcecode-0.29.0.dist-info → sourcecode-0.30.0.dist-info}/licenses/LICENSE +0 -0
sourcecode/__init__.py
CHANGED
|
@@ -5,6 +5,7 @@ import re
|
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
from typing import Any
|
|
7
7
|
|
|
8
|
+
from sourcecode.entrypoint_classifier import is_production_entry_point
|
|
8
9
|
from sourcecode.schema import EntryPoint, SourceMap, StackDetection
|
|
9
10
|
from sourcecode.tree_utils import flatten_file_tree
|
|
10
11
|
|
|
@@ -63,11 +64,8 @@ class ArchitectureSummarizer:
|
|
|
63
64
|
entry for entry in sm.entry_points
|
|
64
65
|
if not self._is_tooling_path(entry.path)
|
|
65
66
|
and not self._is_auxiliary_path(entry.path)
|
|
66
|
-
and entry
|
|
67
|
+
and is_production_entry_point(entry)
|
|
67
68
|
]
|
|
68
|
-
if not entry_points:
|
|
69
|
-
fallback = self._infer_fallback_entry_points(file_paths, sm.stacks)
|
|
70
|
-
entry_points = fallback[:1]
|
|
71
69
|
|
|
72
70
|
lang_lines: list[str] = []
|
|
73
71
|
if entry_points:
|
|
@@ -280,8 +278,7 @@ class ArchitectureSummarizer:
|
|
|
280
278
|
if modules:
|
|
281
279
|
formatted = self._format_module_list([self._module_label(module) for module in modules])
|
|
282
280
|
if formatted:
|
|
283
|
-
lines.append(f"
|
|
284
|
-
lines.append("Produce la salida principal del entry point JavaScript/TypeScript detectado.")
|
|
281
|
+
lines.append(f"Imports internos del entry point: {formatted}.")
|
|
285
282
|
return lines
|
|
286
283
|
|
|
287
284
|
def _summarize_java_entry(self, path: str, content: str, stacks: list[StackDetection]) -> list[str]:
|
|
@@ -344,8 +341,7 @@ class ArchitectureSummarizer:
|
|
|
344
341
|
if internal:
|
|
345
342
|
formatted = self._format_module_list([self._module_label(module) for module in internal])
|
|
346
343
|
if formatted:
|
|
347
|
-
lines.append(f"
|
|
348
|
-
lines.append("Produce la salida principal del binario Go detectado.")
|
|
344
|
+
lines.append(f"Imports internos del binario Go: {formatted}.")
|
|
349
345
|
return lines
|
|
350
346
|
|
|
351
347
|
def _describe_entry_point(self, entry_point: EntryPoint, project_type: str | None) -> str:
|
sourcecode/classifier.py
CHANGED
|
@@ -45,8 +45,12 @@ class TypeClassifier:
|
|
|
45
45
|
primary_stack = self._select_primary_stack(enriched, project_type)
|
|
46
46
|
|
|
47
47
|
final_stacks: list[StackDetection] = []
|
|
48
|
+
primary_assigned = False
|
|
48
49
|
for stack in enriched:
|
|
49
|
-
|
|
50
|
+
is_primary = stack.stack == primary_stack and not primary_assigned
|
|
51
|
+
if is_primary:
|
|
52
|
+
primary_assigned = True
|
|
53
|
+
final_stacks.append(replace(stack, primary=is_primary))
|
|
50
54
|
return final_stacks, project_type
|
|
51
55
|
|
|
52
56
|
def _enrich_stack(
|
sourcecode/cli.py
CHANGED
|
@@ -6,9 +6,10 @@ import time
|
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
from typing import Any, Optional, cast
|
|
8
8
|
|
|
9
|
-
import typer
|
|
10
|
-
|
|
11
|
-
from sourcecode import __version__
|
|
9
|
+
import typer
|
|
10
|
+
|
|
11
|
+
from sourcecode import __version__
|
|
12
|
+
from sourcecode.entrypoint_classifier import is_production_entry_point, normalize_entry_point
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
# ---------------------------------------------------------------------------
|
|
@@ -117,11 +118,11 @@ def _check_pipeline_coherence(sm: "SourceMap") -> list[str]: # type: ignore[nam
|
|
|
117
118
|
)
|
|
118
119
|
|
|
119
120
|
# overall:high requires at least one production entry point
|
|
120
|
-
if cs.overall == "high":
|
|
121
|
-
prod_eps = [
|
|
122
|
-
ep for ep in sm.entry_points
|
|
123
|
-
if ep
|
|
124
|
-
]
|
|
121
|
+
if cs.overall == "high":
|
|
122
|
+
prod_eps = [
|
|
123
|
+
ep for ep in sm.entry_points
|
|
124
|
+
if is_production_entry_point(ep)
|
|
125
|
+
]
|
|
125
126
|
if not prod_eps and sm.entry_points:
|
|
126
127
|
issues.append(
|
|
127
128
|
"[coherence] overall=high but no production entry points exist — "
|
|
@@ -134,21 +135,7 @@ def _check_pipeline_coherence(sm: "SourceMap") -> list[str]: # type: ignore[nam
|
|
|
134
135
|
"[coherence] entry_point_confidence=high but entry_points is empty"
|
|
135
136
|
)
|
|
136
137
|
|
|
137
|
-
|
|
138
|
-
# appear in agent_view output (checked post-facto via produced_by + type)
|
|
139
|
-
benchmark_eps = [
|
|
140
|
-
ep for ep in sm.entry_points
|
|
141
|
-
if ep.entrypoint_type in ("benchmark", "example")
|
|
142
|
-
]
|
|
143
|
-
if benchmark_eps and sm.entry_points and all(
|
|
144
|
-
ep.entrypoint_type in ("benchmark", "example") for ep in sm.entry_points
|
|
145
|
-
):
|
|
146
|
-
issues.append(
|
|
147
|
-
f"[coherence] all {len(sm.entry_points)} entry point(s) are benchmark/example — "
|
|
148
|
-
"no production entry detected; analysis_gaps should reflect impact=high"
|
|
149
|
-
)
|
|
150
|
-
|
|
151
|
-
return issues
|
|
138
|
+
return issues
|
|
152
139
|
|
|
153
140
|
_HELP = """\
|
|
154
141
|
Deterministic codebase context for AI coding agents.
|
|
@@ -993,12 +980,13 @@ def main(
|
|
|
993
980
|
"example", "examples", "docs", "doc", "fixtures", "fixture",
|
|
994
981
|
})
|
|
995
982
|
for _ep in sm.entry_points:
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
983
|
+
_normalized_ep = normalize_entry_point(_ep)
|
|
984
|
+
_ep_type = _normalized_ep.entrypoint_type
|
|
985
|
+
_path_parts = _ep.path.replace("\\", "/").lower().split("/")
|
|
986
|
+
_filtered = (
|
|
987
|
+
_normalized_ep.classification != "production"
|
|
988
|
+
or any(p in _aux_parts for p in _path_parts)
|
|
989
|
+
)
|
|
1002
990
|
if _filtered:
|
|
1003
991
|
_trace.emit("output", "agent_view", "filter_ep",
|
|
1004
992
|
target=_ep.path,
|
|
@@ -12,6 +12,7 @@ from __future__ import annotations
|
|
|
12
12
|
from pathlib import Path
|
|
13
13
|
from typing import TYPE_CHECKING
|
|
14
14
|
|
|
15
|
+
from sourcecode.entrypoint_classifier import is_production_entry_point, normalize_entry_point
|
|
15
16
|
from sourcecode.schema import AnalysisGap, ConfidenceSummary, SourceMap
|
|
16
17
|
|
|
17
18
|
if TYPE_CHECKING:
|
|
@@ -59,8 +60,15 @@ class ConfidenceAnalyzer:
|
|
|
59
60
|
hard_signals.append(sig)
|
|
60
61
|
|
|
61
62
|
# ── Entry point signals ───────────────────────────────────────────────
|
|
62
|
-
for ep in sm.entry_points
|
|
63
|
-
|
|
63
|
+
normalized_entry_points = [normalize_entry_point(ep) for ep in sm.entry_points]
|
|
64
|
+
|
|
65
|
+
for ep in normalized_entry_points:
|
|
66
|
+
if ep.classification != "production":
|
|
67
|
+
sig = f"entry:{ep.path} ({ep.classification}, {ep.reason or ep.source})"
|
|
68
|
+
if sig not in ignored_signals:
|
|
69
|
+
ignored_signals.append(sig)
|
|
70
|
+
continue
|
|
71
|
+
if ep.source in _HARD_SOURCES or ep.reason == "console_script" or ep.runtime_relevance == "high":
|
|
64
72
|
sig = f"entry:{ep.path} ({ep.reason or ep.source})"
|
|
65
73
|
if sig not in hard_signals:
|
|
66
74
|
hard_signals.append(sig)
|
|
@@ -95,13 +103,13 @@ class ConfidenceAnalyzer:
|
|
|
95
103
|
anomalies.append("All stacks detected via heuristic only — no manifest found")
|
|
96
104
|
|
|
97
105
|
# ── Anomaly: entry points all low-confidence ──────────────────────────
|
|
98
|
-
if
|
|
106
|
+
if normalized_entry_points and all(ep.confidence == "low" for ep in normalized_entry_points):
|
|
99
107
|
anomalies.append("All entry points are low-confidence (heuristic/code_signal only)")
|
|
100
108
|
|
|
101
109
|
# ── Anomaly: all production EPs are convention-only (no manifest evidence) ──
|
|
102
110
|
production_eps_check = [
|
|
103
|
-
ep for ep in
|
|
104
|
-
if ep
|
|
111
|
+
ep for ep in normalized_entry_points
|
|
112
|
+
if is_production_entry_point(ep)
|
|
105
113
|
]
|
|
106
114
|
if production_eps_check and all(
|
|
107
115
|
ep.source in ("convention", "heuristic") or ep.reason in ("convention", "entry_file_pattern")
|
|
@@ -113,37 +121,37 @@ class ConfidenceAnalyzer:
|
|
|
113
121
|
)
|
|
114
122
|
|
|
115
123
|
# ── Anomaly: no production entry points ───────────────────────────────
|
|
116
|
-
if
|
|
124
|
+
if normalized_entry_points:
|
|
117
125
|
production_eps = [
|
|
118
|
-
ep for ep in
|
|
119
|
-
if ep
|
|
126
|
+
ep for ep in normalized_entry_points
|
|
127
|
+
if is_production_entry_point(ep)
|
|
120
128
|
]
|
|
121
129
|
if not production_eps:
|
|
122
130
|
anomalies.append(
|
|
123
|
-
"No production entry points — all detected entries are
|
|
131
|
+
"No production entry points — all detected entries are development/auxiliary"
|
|
124
132
|
)
|
|
125
133
|
|
|
126
134
|
# ── Gaps ──────────────────────────────────────────────────────────────
|
|
127
|
-
if not
|
|
135
|
+
if not normalized_entry_points:
|
|
128
136
|
gaps.append(AnalysisGap(
|
|
129
137
|
area="entry_points",
|
|
130
138
|
reason="No entry point detected — project may use non-standard structure or be a library",
|
|
131
139
|
impact="high",
|
|
132
140
|
))
|
|
133
141
|
elif all(
|
|
134
|
-
ep.
|
|
135
|
-
for ep in
|
|
142
|
+
ep.classification in ("development", "auxiliary")
|
|
143
|
+
for ep in normalized_entry_points
|
|
136
144
|
):
|
|
137
145
|
gaps.append(AnalysisGap(
|
|
138
146
|
area="entry_points",
|
|
139
147
|
reason=(
|
|
140
|
-
"All detected entry points are auxiliary
|
|
148
|
+
"All detected entry points are development or auxiliary — "
|
|
141
149
|
"no production entry point found. Verify project has a 'start'/'serve' "
|
|
142
150
|
"script or production binary."
|
|
143
151
|
),
|
|
144
152
|
impact="high",
|
|
145
153
|
))
|
|
146
|
-
elif all(ep.confidence == "low" for ep in
|
|
154
|
+
elif all(ep.confidence == "low" for ep in normalized_entry_points):
|
|
147
155
|
gaps.append(AnalysisGap(
|
|
148
156
|
area="entry_points",
|
|
149
157
|
reason="Entry points inferred from code patterns only, no manifest declaration found",
|
|
@@ -196,12 +204,17 @@ class ConfidenceAnalyzer:
|
|
|
196
204
|
# Entry points: only consider production EPs for confidence scoring.
|
|
197
205
|
# Benchmark/example/dev-only entries are not evidence of production readiness.
|
|
198
206
|
production_eps = [
|
|
199
|
-
ep for ep in
|
|
200
|
-
if ep
|
|
207
|
+
ep for ep in normalized_entry_points
|
|
208
|
+
if is_production_entry_point(ep)
|
|
201
209
|
]
|
|
202
210
|
ep_conf = _max_confidence([ep.confidence for ep in production_eps] or ["low"])
|
|
203
211
|
overall = _min_confidence([stack_conf, ep_conf])
|
|
204
212
|
|
|
213
|
+
if normalized_entry_points and not production_eps:
|
|
214
|
+
overall = "low"
|
|
215
|
+
elif production_eps and all(ep.runtime_relevance == "low" for ep in production_eps):
|
|
216
|
+
overall = _min_confidence([overall, "low"])
|
|
217
|
+
|
|
205
218
|
# Factor in architecture confidence when available
|
|
206
219
|
arch = sm.architecture
|
|
207
220
|
if arch is not None and arch.requested:
|
sourcecode/detectors/nodejs.py
CHANGED
|
@@ -125,6 +125,9 @@ class NodejsDetector(AbstractDetector):
|
|
|
125
125
|
"playground", "playgrounds",
|
|
126
126
|
"fixture", "fixtures",
|
|
127
127
|
"sandbox", "e2e", "docs",
|
|
128
|
+
"test", "tests", "__tests__", "spec", "specs",
|
|
129
|
+
"scripts", "script", "tools", "tooling", "ci",
|
|
130
|
+
".storybook", "storybook",
|
|
128
131
|
})
|
|
129
132
|
|
|
130
133
|
def _collect_entry_points(
|
|
@@ -144,19 +147,20 @@ class NodejsDetector(AbstractDetector):
|
|
|
144
147
|
continue
|
|
145
148
|
# Extract file path from script command
|
|
146
149
|
path = self._extract_script_path(script_cmd, context)
|
|
150
|
+
if path is None:
|
|
151
|
+
path = self._infer_tool_script_path(script_name, script_cmd, context)
|
|
147
152
|
if path and path not in seen and path_exists_in_tree(context.file_tree, path):
|
|
148
153
|
seen.add(path)
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
))
|
|
154
|
+
entry_points.append(EntryPoint(
|
|
155
|
+
path=path,
|
|
156
|
+
stack="nodejs",
|
|
157
|
+
kind=kind,
|
|
158
|
+
source="package.json#scripts",
|
|
159
|
+
confidence="high",
|
|
160
|
+
reason=f"script:{script_name}",
|
|
161
|
+
evidence=f"scripts.{script_name} = {script_cmd!r:.80}",
|
|
162
|
+
entrypoint_type=self._path_entrypoint_type(path, fallback=ep_type),
|
|
163
|
+
))
|
|
160
164
|
|
|
161
165
|
# Priority 2: package.json bin — CLI production entry points
|
|
162
166
|
bin_field = package_json.get("bin")
|
|
@@ -233,7 +237,7 @@ class NodejsDetector(AbstractDetector):
|
|
|
233
237
|
def _classify_script(self, script_name: str) -> tuple[str | None, str]:
|
|
234
238
|
"""Map script name → (entrypoint_type, kind). Returns (None, '') to skip."""
|
|
235
239
|
lower = script_name.lower()
|
|
236
|
-
if lower in ("start", "serve"):
|
|
240
|
+
if lower in ("start", "serve", "server"):
|
|
237
241
|
return "production", "server"
|
|
238
242
|
if lower in ("dev", "develop", "watch"):
|
|
239
243
|
return "development", "server"
|
|
@@ -243,6 +247,12 @@ class NodejsDetector(AbstractDetector):
|
|
|
243
247
|
return "benchmark", "script"
|
|
244
248
|
if lower.startswith("example") or lower.startswith("demo"):
|
|
245
249
|
return "example", "script"
|
|
250
|
+
if lower in {"docs", "doc", "storybook", "playground"} or any(
|
|
251
|
+
marker in lower for marker in ("rspress", "vite", "storybook", "playground")
|
|
252
|
+
):
|
|
253
|
+
return "development", "server"
|
|
254
|
+
if lower in {"test", "e2e", "spec", "lint", "format", "typecheck", "build"}:
|
|
255
|
+
return "development", "script"
|
|
246
256
|
return None, ""
|
|
247
257
|
|
|
248
258
|
def _extract_script_path(self, cmd: str, context: DetectionContext) -> str | None:
|
|
@@ -264,12 +274,36 @@ class NodejsDetector(AbstractDetector):
|
|
|
264
274
|
return p
|
|
265
275
|
return None
|
|
266
276
|
|
|
277
|
+
def _infer_tool_script_path(
|
|
278
|
+
self,
|
|
279
|
+
script_name: str,
|
|
280
|
+
script_cmd: str,
|
|
281
|
+
context: DetectionContext,
|
|
282
|
+
) -> str | None:
|
|
283
|
+
text = f"{script_name} {script_cmd}".lower()
|
|
284
|
+
candidates: list[str] = []
|
|
285
|
+
if "rspress" in text or "docs" in text or "doc" in text:
|
|
286
|
+
candidates.extend(["docs/rspress.mjs", "docs/rspress.config.mjs"])
|
|
287
|
+
if "storybook" in text:
|
|
288
|
+
candidates.extend([".storybook/main.js", ".storybook/main.ts"])
|
|
289
|
+
if "vite" in text or "playground" in text:
|
|
290
|
+
candidates.extend(["playground/vite.config.ts", "vite.config.ts"])
|
|
291
|
+
for candidate in candidates:
|
|
292
|
+
if path_exists_in_tree(context.file_tree, candidate):
|
|
293
|
+
return candidate
|
|
294
|
+
return None
|
|
295
|
+
|
|
267
296
|
def _is_auxiliary_path(self, path: str) -> bool:
|
|
268
297
|
norm = path.replace("\\", "/")
|
|
269
298
|
parts = norm.split("/")
|
|
270
299
|
return any(p.lower() in self._AUXILIARY_DIRS for p in parts)
|
|
271
300
|
|
|
272
|
-
def _path_entrypoint_type(self, path: str) -> str:
|
|
273
|
-
|
|
301
|
+
def _path_entrypoint_type(self, path: str, *, fallback: str = "production") -> str:
|
|
302
|
+
parts = {p.lower() for p in path.replace("\\", "/").split("/")}
|
|
303
|
+
if parts & {"benchmark", "benchmarks", "bench", "benches"}:
|
|
304
|
+
return "benchmark"
|
|
305
|
+
if parts & {"example", "examples", "demo", "demos", "fixture", "fixtures"}:
|
|
274
306
|
return "example"
|
|
275
|
-
|
|
307
|
+
if self._is_auxiliary_path(path):
|
|
308
|
+
return "development"
|
|
309
|
+
return fallback
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import replace
|
|
4
|
+
from typing import Literal
|
|
5
|
+
|
|
6
|
+
from sourcecode.schema import EntryPoint
|
|
7
|
+
|
|
8
|
+
Classification = Literal["production", "development", "auxiliary"]
|
|
9
|
+
RuntimeRelevance = Literal["high", "medium", "low"]
|
|
10
|
+
|
|
11
|
+
_AUXILIARY_DIRS = frozenset({
|
|
12
|
+
"benchmark", "benchmarks", "bench", "benches",
|
|
13
|
+
"example", "examples", "demo", "demos",
|
|
14
|
+
"fixture", "fixtures", "__fixtures__", "testdata", "test_data",
|
|
15
|
+
"test", "tests", "__tests__", "spec", "specs", "e2e",
|
|
16
|
+
"script", "scripts", "tool", "tools", "tooling", "ci",
|
|
17
|
+
"mock", "mocks", "sandbox",
|
|
18
|
+
})
|
|
19
|
+
|
|
20
|
+
_DEVELOPMENT_DIRS = frozenset({
|
|
21
|
+
"docs", "doc", "documentation", "wiki",
|
|
22
|
+
"playground", "playgrounds", ".storybook", "storybook",
|
|
23
|
+
})
|
|
24
|
+
|
|
25
|
+
_DEV_MARKERS = ("rspress", "vite", "storybook", "playground", "dev-server")
|
|
26
|
+
_PRODUCTION_SCRIPT_REASONS = {"script:start", "script:serve", "script:server"}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def classify_entry_point(ep: EntryPoint) -> Classification:
|
|
30
|
+
"""Return the operational class for an entry point.
|
|
31
|
+
|
|
32
|
+
The rules intentionally prefer exclusion over weak inclusion. Development
|
|
33
|
+
and auxiliary path evidence wins over detector-provided production labels.
|
|
34
|
+
"""
|
|
35
|
+
path = ep.path.replace("\\", "/").lower()
|
|
36
|
+
parts = set(path.split("/"))
|
|
37
|
+
reason = (ep.reason or "").lower()
|
|
38
|
+
evidence = (ep.evidence or "").lower()
|
|
39
|
+
marker_text = f"{path} {reason} {evidence}"
|
|
40
|
+
|
|
41
|
+
if parts & _DEVELOPMENT_DIRS or any(marker in marker_text for marker in _DEV_MARKERS):
|
|
42
|
+
return "development"
|
|
43
|
+
if parts & _AUXILIARY_DIRS:
|
|
44
|
+
return "auxiliary"
|
|
45
|
+
if ep.entrypoint_type in {"benchmark", "example"}:
|
|
46
|
+
return "auxiliary"
|
|
47
|
+
if ep.entrypoint_type == "development":
|
|
48
|
+
return "development"
|
|
49
|
+
if (
|
|
50
|
+
ep.source == "convention"
|
|
51
|
+
and ep.kind in {"binary", "application"}
|
|
52
|
+
and ep.stack in {"go", "rust", "java", "dotnet", "kotlin", "scala"}
|
|
53
|
+
):
|
|
54
|
+
return "production"
|
|
55
|
+
if ep.source in {"heuristic", "convention"}:
|
|
56
|
+
return "auxiliary"
|
|
57
|
+
if ep.entrypoint_type == "production":
|
|
58
|
+
return "production"
|
|
59
|
+
if ep.source == "package.json#bin" or reason == "bin":
|
|
60
|
+
return "production"
|
|
61
|
+
if reason in _PRODUCTION_SCRIPT_REASONS:
|
|
62
|
+
return "production"
|
|
63
|
+
return "production"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def runtime_relevance(ep: EntryPoint, classification: Classification | None = None) -> RuntimeRelevance:
|
|
67
|
+
classification = classification or classify_entry_point(ep)
|
|
68
|
+
if classification != "production":
|
|
69
|
+
return "low"
|
|
70
|
+
reason = (ep.reason or "").lower()
|
|
71
|
+
if ep.source == "package.json#bin" or reason == "bin" or reason in _PRODUCTION_SCRIPT_REASONS:
|
|
72
|
+
return "high"
|
|
73
|
+
if ep.source == "package.json" and reason in {"main", "module"}:
|
|
74
|
+
return "medium"
|
|
75
|
+
if ep.source == "convention" and ep.kind in {"binary", "application"}:
|
|
76
|
+
return "medium"
|
|
77
|
+
if ep.source in {"heuristic", "convention"} or ep.confidence == "low":
|
|
78
|
+
return "low"
|
|
79
|
+
return "medium"
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def normalize_entry_point(ep: EntryPoint) -> EntryPoint:
|
|
83
|
+
classification = classify_entry_point(ep)
|
|
84
|
+
relevance = runtime_relevance(ep, classification)
|
|
85
|
+
legacy_type = ep.entrypoint_type
|
|
86
|
+
if classification == "auxiliary" and legacy_type == "production" and ep.source in {"heuristic", "convention"}:
|
|
87
|
+
legacy_type = None
|
|
88
|
+
if legacy_type is None:
|
|
89
|
+
if classification == "production":
|
|
90
|
+
legacy_type = "production"
|
|
91
|
+
elif classification == "development":
|
|
92
|
+
legacy_type = "development"
|
|
93
|
+
return replace(
|
|
94
|
+
ep,
|
|
95
|
+
classification=classification,
|
|
96
|
+
runtime_relevance=relevance,
|
|
97
|
+
entrypoint_type=legacy_type,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def is_production_entry_point(ep: EntryPoint) -> bool:
|
|
102
|
+
normalized = normalize_entry_point(ep)
|
|
103
|
+
return (
|
|
104
|
+
normalized.classification == "production"
|
|
105
|
+
and normalized.runtime_relevance in {"high", "medium"}
|
|
106
|
+
)
|
sourcecode/schema.py
CHANGED
|
@@ -72,10 +72,12 @@ class EntryPoint:
|
|
|
72
72
|
kind: str = "entry"
|
|
73
73
|
source: str = "manifest"
|
|
74
74
|
confidence: Literal["high", "medium", "low"] = "high"
|
|
75
|
-
reason: Optional[str] = None # console_script | entry_file_pattern | main_guard | typer_app | heuristic | convention
|
|
76
|
-
evidence: Optional[str] = None # brief evidence string
|
|
77
|
-
entrypoint_type: Optional[Literal["production", "development", "benchmark", "example"]] = None
|
|
78
|
-
|
|
75
|
+
reason: Optional[str] = None # console_script | entry_file_pattern | main_guard | typer_app | heuristic | convention
|
|
76
|
+
evidence: Optional[str] = None # brief evidence string
|
|
77
|
+
entrypoint_type: Optional[Literal["production", "development", "benchmark", "example"]] = None
|
|
78
|
+
classification: Optional[Literal["production", "development", "auxiliary"]] = None
|
|
79
|
+
runtime_relevance: Optional[Literal["high", "medium", "low"]] = None
|
|
80
|
+
produced_by: Optional[str] = None # which detector emitted this
|
|
79
81
|
|
|
80
82
|
|
|
81
83
|
@dataclass
|
sourcecode/serializer.py
CHANGED
|
@@ -13,9 +13,10 @@ import sys
|
|
|
13
13
|
from dataclasses import asdict, dataclass, is_dataclass, replace
|
|
14
14
|
from io import StringIO
|
|
15
15
|
from pathlib import Path
|
|
16
|
-
from typing import Any, Optional
|
|
17
|
-
|
|
18
|
-
from sourcecode.
|
|
16
|
+
from typing import Any, Optional
|
|
17
|
+
|
|
18
|
+
from sourcecode.entrypoint_classifier import normalize_entry_point, is_production_entry_point
|
|
19
|
+
from sourcecode.schema import (
|
|
19
20
|
ArchitectureAnalysis,
|
|
20
21
|
ModuleGraph,
|
|
21
22
|
ModuleGraphSummary,
|
|
@@ -34,7 +35,7 @@ def to_json(sm: SourceMap | dict[str, Any], indent: int = 2) -> str:
|
|
|
34
35
|
return json.dumps(data, indent=indent, ensure_ascii=False)
|
|
35
36
|
|
|
36
37
|
|
|
37
|
-
def to_yaml(sm: SourceMap) -> str:
|
|
38
|
+
def to_yaml(sm: SourceMap) -> str:
|
|
38
39
|
"""Serializa SourceMap a YAML usando ruamel.yaml.
|
|
39
40
|
|
|
40
41
|
ruamel.yaml preserva el orden de claves y serializa None como null
|
|
@@ -50,11 +51,42 @@ def to_yaml(sm: SourceMap) -> str:
|
|
|
50
51
|
lambda dumper, data: dumper.represent_scalar("tag:yaml.org,2002:null", "null"),
|
|
51
52
|
)
|
|
52
53
|
stream = StringIO()
|
|
53
|
-
yaml.dump(asdict(sm), stream)
|
|
54
|
-
return stream.getvalue()
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def
|
|
54
|
+
yaml.dump(asdict(sm), stream)
|
|
55
|
+
return stream.getvalue()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _clean_entry_point(ep: Any) -> dict[str, Any]:
|
|
59
|
+
normalized = normalize_entry_point(ep)
|
|
60
|
+
return {
|
|
61
|
+
k: v
|
|
62
|
+
for k, v in asdict(normalized).items()
|
|
63
|
+
if v is not None and v != "" and k != "workspace"
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _entry_point_groups(entry_points: list[Any]) -> dict[str, list[dict[str, Any]]]:
|
|
68
|
+
groups: dict[str, list[dict[str, Any]]] = {
|
|
69
|
+
"production": [],
|
|
70
|
+
"development": [],
|
|
71
|
+
"auxiliary": [],
|
|
72
|
+
}
|
|
73
|
+
for ep in entry_points:
|
|
74
|
+
normalized = normalize_entry_point(ep)
|
|
75
|
+
item = _clean_entry_point(normalized)
|
|
76
|
+
if is_production_entry_point(normalized):
|
|
77
|
+
groups["production"].append(item)
|
|
78
|
+
elif normalized.classification == "development":
|
|
79
|
+
groups["development"].append(item)
|
|
80
|
+
else:
|
|
81
|
+
groups["auxiliary"].append(item)
|
|
82
|
+
|
|
83
|
+
groups["production"].sort(key=lambda ep: (ep.get("runtime_relevance") != "high", ep.get("path", "")))
|
|
84
|
+
groups["development"].sort(key=lambda ep: ep.get("path", ""))
|
|
85
|
+
groups["auxiliary"].sort(key=lambda ep: ep.get("path", ""))
|
|
86
|
+
return groups
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def compact_view(sm: SourceMap, *, no_tree: bool = False) -> dict[str, Any]:
|
|
58
90
|
"""Context package ready for prompt or handoff (~600-800 tokens).
|
|
59
91
|
|
|
60
92
|
Answers: what it is, where it enters, what depends on what,
|
|
@@ -85,13 +117,12 @@ def compact_view(sm: SourceMap, *, no_tree: bool = False) -> dict[str, Any]:
|
|
|
85
117
|
if sm.code_notes_summary is not None and sm.code_notes_summary.requested:
|
|
86
118
|
code_notes_summary_dict = asdict(sm.code_notes_summary)
|
|
87
119
|
|
|
88
|
-
# Entry points:
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
entry_points_compact = None # type: ignore[assignment] # signal: not detected
|
|
120
|
+
# Entry points: production runtime only. Auxiliary and development entries
|
|
121
|
+
# are exposed separately so agents do not mix tooling with execution paths.
|
|
122
|
+
ep_groups = _entry_point_groups(sm.entry_points)
|
|
123
|
+
entry_points_compact = ep_groups["production"]
|
|
124
|
+
if not entry_points_compact:
|
|
125
|
+
entry_points_compact = [] # truth signal: no production runtime detected
|
|
95
126
|
|
|
96
127
|
# Confidence summary
|
|
97
128
|
conf_dict: Any = None
|
|
@@ -116,9 +147,11 @@ def compact_view(sm: SourceMap, *, no_tree: bool = False) -> dict[str, Any]:
|
|
|
116
147
|
"project_summary": sm.project_summary,
|
|
117
148
|
"architecture_summary": sm.architecture_summary,
|
|
118
149
|
"context_summary": context_summary_dict,
|
|
119
|
-
"stacks": [asdict(stack) for stack in sm.stacks],
|
|
120
|
-
"entry_points": entry_points_compact,
|
|
121
|
-
"
|
|
150
|
+
"stacks": [asdict(stack) for stack in sm.stacks],
|
|
151
|
+
"entry_points": entry_points_compact,
|
|
152
|
+
"development_entry_points": ep_groups["development"] or None,
|
|
153
|
+
"auxiliary_entry_points": ep_groups["auxiliary"] or None,
|
|
154
|
+
"dependency_summary": dep_summary_dict,
|
|
122
155
|
"key_dependencies": key_deps,
|
|
123
156
|
"env_summary": env_summary_dict,
|
|
124
157
|
"code_notes_summary": code_notes_summary_dict,
|
|
@@ -163,10 +196,14 @@ def normalize_source_map(sm: SourceMap) -> SourceMap:
|
|
|
163
196
|
|
|
164
197
|
# dependencies is already list[DependencyRecord] by default_factory, but
|
|
165
198
|
# guard against any future refactor that could accidentally set it to None
|
|
166
|
-
if sm.dependencies is None: # type: ignore[comparison-overlap]
|
|
167
|
-
changes["dependencies"] = []
|
|
168
|
-
|
|
169
|
-
|
|
199
|
+
if sm.dependencies is None: # type: ignore[comparison-overlap]
|
|
200
|
+
changes["dependencies"] = []
|
|
201
|
+
|
|
202
|
+
normalized_eps = [normalize_entry_point(ep) for ep in sm.entry_points]
|
|
203
|
+
if normalized_eps != sm.entry_points:
|
|
204
|
+
changes["entry_points"] = normalized_eps
|
|
205
|
+
|
|
206
|
+
return replace(sm, **changes) if changes else sm
|
|
170
207
|
|
|
171
208
|
|
|
172
209
|
def validate_source_map(sm: SourceMap) -> None:
|
|
@@ -410,37 +447,13 @@ def agent_view(sm: SourceMap) -> dict[str, Any]:
|
|
|
410
447
|
|
|
411
448
|
result: dict[str, Any] = {"project": project}
|
|
412
449
|
|
|
413
|
-
# ── 2. Entry points: production/runtime
|
|
414
|
-
#
|
|
415
|
-
#
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
"example", "examples", "docs", "doc", "fixtures", "fixture",
|
|
421
|
-
})
|
|
422
|
-
|
|
423
|
-
def _ep_priority(ep_dict: dict[str, Any]) -> int:
|
|
424
|
-
ep_type = ep_dict.get("entrypoint_type")
|
|
425
|
-
if ep_type in ("benchmark", "example"):
|
|
426
|
-
return 10
|
|
427
|
-
path_parts = ep_dict.get("path", "").replace("\\", "/").lower().split("/")
|
|
428
|
-
if any(p in _aux_parts for p in path_parts):
|
|
429
|
-
return 5
|
|
430
|
-
if ep_type == "development":
|
|
431
|
-
return 3
|
|
432
|
-
return 0
|
|
433
|
-
|
|
434
|
-
all_ep = [
|
|
435
|
-
{k: v for k, v in asdict(ep).items() if v is not None and v != "" and k not in _ep_skip}
|
|
436
|
-
for ep in sm.entry_points
|
|
437
|
-
]
|
|
438
|
-
all_ep.sort(key=_ep_priority)
|
|
439
|
-
operational_ep = [ep for ep in all_ep if _ep_priority(ep) < 5]
|
|
440
|
-
if operational_ep:
|
|
441
|
-
result["entry_points"] = operational_ep
|
|
442
|
-
# When operational_ep is empty: omit key entirely.
|
|
443
|
-
# confidence_summary.anomalies + analysis_gaps carry the explanation.
|
|
450
|
+
# ── 2. Entry points: production/runtime only in the primary field ─────────
|
|
451
|
+
# Development and auxiliary entries are explicit side channels. A missing
|
|
452
|
+
# production runtime is represented as entry_points=[], never by fallback.
|
|
453
|
+
ep_groups = _entry_point_groups(sm.entry_points)
|
|
454
|
+
result["entry_points"] = ep_groups["production"]
|
|
455
|
+
result["development_entry_points"] = ep_groups["development"]
|
|
456
|
+
result["auxiliary_entry_points"] = ep_groups["auxiliary"]
|
|
444
457
|
|
|
445
458
|
# ── 3. Architecture ───────────────────────────────────────────────────────
|
|
446
459
|
if sm.architecture_summary:
|
|
@@ -550,7 +563,7 @@ def agent_view(sm: SourceMap) -> dict[str, Any]:
|
|
|
550
563
|
return result
|
|
551
564
|
|
|
552
565
|
|
|
553
|
-
def standard_view(sm: SourceMap, *, include_tree: bool = False) -> dict[str, Any]:
|
|
566
|
+
def standard_view(sm: SourceMap, *, include_tree: bool = False) -> dict[str, Any]:
|
|
554
567
|
"""Default output — three signal layers.
|
|
555
568
|
|
|
556
569
|
Layer A (always):
|
|
@@ -568,14 +581,18 @@ def standard_view(sm: SourceMap, *, include_tree: bool = False) -> dict[str, Any
|
|
|
568
581
|
Full dependencies list is never included — use key_dependencies instead.
|
|
569
582
|
Empty unrequested analyzer fields are omitted entirely.
|
|
570
583
|
"""
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
"
|
|
575
|
-
"
|
|
576
|
-
"
|
|
577
|
-
"
|
|
578
|
-
|
|
584
|
+
ep_groups = _entry_point_groups(sm.entry_points)
|
|
585
|
+
|
|
586
|
+
result: dict[str, Any] = {
|
|
587
|
+
"metadata": asdict(sm.metadata),
|
|
588
|
+
"project_type": sm.project_type,
|
|
589
|
+
"project_summary": sm.project_summary,
|
|
590
|
+
"architecture_summary": sm.architecture_summary,
|
|
591
|
+
"stacks": [asdict(s) for s in sm.stacks],
|
|
592
|
+
"entry_points": ep_groups["production"],
|
|
593
|
+
"development_entry_points": ep_groups["development"],
|
|
594
|
+
"auxiliary_entry_points": ep_groups["auxiliary"],
|
|
595
|
+
}
|
|
579
596
|
|
|
580
597
|
# Layer B — signals (only when the corresponding analyzer ran)
|
|
581
598
|
if sm.dependency_summary is not None and sm.dependency_summary.requested:
|
|
@@ -1,14 +1,15 @@
|
|
|
1
|
-
sourcecode/__init__.py,sha256=
|
|
1
|
+
sourcecode/__init__.py,sha256=MU2HxHzhdlDeES-MGTUNA1df0X4nB3GWAvjTRWUEoys,100
|
|
2
2
|
sourcecode/architecture_analyzer.py,sha256=SBRMWJN70M2qeNLkm9oCG_1rw2UOVuNgikyeAHJsXKw,22859
|
|
3
|
-
sourcecode/architecture_summary.py,sha256=
|
|
4
|
-
sourcecode/classifier.py,sha256=
|
|
5
|
-
sourcecode/cli.py,sha256=
|
|
3
|
+
sourcecode/architecture_summary.py,sha256=rSY5MRiaz4N1YdG0pqDTDuFjSN7PO_Zplx-dtNzv2Yo,19985
|
|
4
|
+
sourcecode/classifier.py,sha256=GKTMN8qKZX7ponSwDJfN08RrasI4CVpq1_gFBgEopps,7093
|
|
5
|
+
sourcecode/cli.py,sha256=LKtus6aETNZv70fkp5LrjTfvu5w9jsB4go-7MCoDnzg,50611
|
|
6
6
|
sourcecode/code_notes_analyzer.py,sha256=rRd8bFYV0krjlxxQV0wenwE9K7pVpUQSR7KvSvUQKw4,9226
|
|
7
|
-
sourcecode/confidence_analyzer.py,sha256=
|
|
7
|
+
sourcecode/confidence_analyzer.py,sha256=B48lCuz_t_qsyjPQdLbKUj2kJ0Wu4Sq5ZnO18F_v3eU,12069
|
|
8
8
|
sourcecode/context_summarizer.py,sha256=CiQrfBEzun949bWvmLabWoj2HhPn6Lw62ofqnsy0FlQ,6503
|
|
9
9
|
sourcecode/coverage_parser.py,sha256=q0LeZJaX1bnntLu-ImksdBsMlpsVmk_iUfSaB4eaJGo,19702
|
|
10
10
|
sourcecode/dependency_analyzer.py,sha256=Exq0BfInvfS5iAg9xAr6WI2uPNuotkIudTKcYJcRhB8,52757
|
|
11
11
|
sourcecode/doc_analyzer.py,sha256=Ec3orx6vBKsh5cNM3-F4y2Got2KuKx8w3dErwtdtM-A,19891
|
|
12
|
+
sourcecode/entrypoint_classifier.py,sha256=a69dMGyxCTd_LOm3oqj-EXWpRmbmeujN7T1mr2eJ1as,3877
|
|
12
13
|
sourcecode/env_analyzer.py,sha256=slvq-eT24RVMNczLNDlZbe0hU8JXIIPxybqubvrrnSQ,14409
|
|
13
14
|
sourcecode/git_analyzer.py,sha256=saI5wtHBEOXBhdk7SrVR7ArSM6MFkyGgukvGRuD9WRc,9638
|
|
14
15
|
sourcecode/graph_analyzer.py,sha256=hMOsLLz9B0UnQ4xwbHdgr3bFvqpw0bQ8kN-xmEn3Krk,64156
|
|
@@ -18,9 +19,9 @@ sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
|
|
|
18
19
|
sourcecode/relevance_scorer.py,sha256=2yvxDFnz9YGrHEJubgx9soiVIDZHKv_pntOtTARtKow,5928
|
|
19
20
|
sourcecode/runtime_classifier.py,sha256=zWX3r3HCKHc-qtIobErOa8aKMmaoPYREtJKvPcBGPjQ,14792
|
|
20
21
|
sourcecode/scanner.py,sha256=aM3h9-DCQ3xKpeHpHYdo2vX6T5P95HA_YwZbkAVNwmo,8288
|
|
21
|
-
sourcecode/schema.py,sha256=
|
|
22
|
+
sourcecode/schema.py,sha256=wylO5aKFBHBUAvMh4AH6hKKcN8p5yt6XRkyRvZRjV-4,20378
|
|
22
23
|
sourcecode/semantic_analyzer.py,sha256=asQfJf-EhzYaOTA-iMuZsrVXtbW7SV2WEKCxgsxa88Y,79413
|
|
23
|
-
sourcecode/serializer.py,sha256=
|
|
24
|
+
sourcecode/serializer.py,sha256=VksZokFUG3GLWz_eUtVqNdkddkeV-tBY2lzfa8ociAc,27898
|
|
24
25
|
sourcecode/summarizer.py,sha256=YfBixsN1zWHHXdOEqaf793BylbJrsj75ST7FN6jcqRU,15424
|
|
25
26
|
sourcecode/tree_utils.py,sha256=Fj9OIuUksBvgibNd3feog0sMDjVypJzPexp5lvMoYWI,1424
|
|
26
27
|
sourcecode/workspace.py,sha256=fQlVoNx8S-fSHpKoJ0JBvEHCFkxszH0KZVJed1i3TRk,6845
|
|
@@ -35,7 +36,7 @@ sourcecode/detectors/heuristic.py,sha256=Hab_Uiuxtq-WBs_wCnzETBS5hhaxeEtf-GOGMH6
|
|
|
35
36
|
sourcecode/detectors/hybrid.py,sha256=IGFRUVsAZ1ooRlFdznCeJAV6vy1yVDx-VyghvLtddXc,9101
|
|
36
37
|
sourcecode/detectors/java.py,sha256=cZvB13cqJ76zHDncEG-TOCuK8gJjJN2mZGS2DGEcZy8,7715
|
|
37
38
|
sourcecode/detectors/jvm_ext.py,sha256=EgHJ5W8EE-ZTN9V607mVzohyKgZE8Mc2jCi-DF8RAZU,2616
|
|
38
|
-
sourcecode/detectors/nodejs.py,sha256=
|
|
39
|
+
sourcecode/detectors/nodejs.py,sha256=LN-m3bERpijlBMl1TNVOH_cJDhfDYRhn8K8lsNzztVc,12923
|
|
39
40
|
sourcecode/detectors/parsers.py,sha256=ugPg8yNUf0Ai1gA7Fnn6wAkYGFjTxRodSP3IeViYJJ4,2290
|
|
40
41
|
sourcecode/detectors/php.py,sha256=W_AQD0WMVDdWHa9h_ilX6W8XSpz0X4ctpMK2WXfXf1I,1887
|
|
41
42
|
sourcecode/detectors/project.py,sha256=egFUnHC93xFfb-ikGCIOSkRdyP52qytDx9W7pGkX0MY,6525
|
|
@@ -51,8 +52,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
|
|
|
51
52
|
sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
|
|
52
53
|
sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
|
|
53
54
|
sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
|
|
54
|
-
sourcecode-0.
|
|
55
|
-
sourcecode-0.
|
|
56
|
-
sourcecode-0.
|
|
57
|
-
sourcecode-0.
|
|
58
|
-
sourcecode-0.
|
|
55
|
+
sourcecode-0.30.0.dist-info/METADATA,sha256=wjMQ_CyxnBDjQ6G_7PLE5crhTdh2sl6wd6Bkdy3t48o,25020
|
|
56
|
+
sourcecode-0.30.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
57
|
+
sourcecode-0.30.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
|
|
58
|
+
sourcecode-0.30.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
|
|
59
|
+
sourcecode-0.30.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|