sourcecode 0.29.0__py3-none-any.whl → 0.31.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sourcecode/__init__.py +1 -1
- sourcecode/architecture_analyzer.py +9 -5
- sourcecode/architecture_summary.py +4 -8
- sourcecode/classifier.py +5 -1
- sourcecode/cli.py +24 -34
- sourcecode/confidence_analyzer.py +33 -20
- sourcecode/detectors/nodejs.py +60 -18
- sourcecode/entrypoint_classifier.py +106 -0
- sourcecode/file_classifier.py +215 -0
- sourcecode/prepare_context.py +12 -7
- sourcecode/schema.py +6 -4
- sourcecode/serializer.py +268 -87
- sourcecode/summarizer.py +10 -7
- {sourcecode-0.29.0.dist-info → sourcecode-0.31.0.dist-info}/METADATA +1 -1
- {sourcecode-0.29.0.dist-info → sourcecode-0.31.0.dist-info}/RECORD +18 -16
- {sourcecode-0.29.0.dist-info → sourcecode-0.31.0.dist-info}/WHEEL +0 -0
- {sourcecode-0.29.0.dist-info → sourcecode-0.31.0.dist-info}/entry_points.txt +0 -0
- {sourcecode-0.29.0.dist-info → sourcecode-0.31.0.dist-info}/licenses/LICENSE +0 -0
sourcecode/__init__.py
CHANGED
|
@@ -215,18 +215,22 @@ class ArchitectureAnalyzer:
|
|
|
215
215
|
if pattern not in (None, "unknown", "flat"):
|
|
216
216
|
if all_layers_weak:
|
|
217
217
|
# Layers came from file-naming heuristic only, not directory structure
|
|
218
|
-
confidence = "
|
|
218
|
+
confidence = "low"
|
|
219
219
|
limitations.append(
|
|
220
|
-
"
|
|
220
|
+
"Low confidence inference: pattern inferred from filenames only, without import graph confirmation"
|
|
221
221
|
)
|
|
222
222
|
else:
|
|
223
|
-
confidence = "
|
|
223
|
+
confidence = "medium" if len(strong_domains) >= 3 else "low"
|
|
224
|
+
if graph is None:
|
|
225
|
+
limitations.append(
|
|
226
|
+
"Pattern not confirmed by module import graph; run with --graph-modules for structural validation"
|
|
227
|
+
)
|
|
224
228
|
elif len(strong_domains) >= 1:
|
|
225
229
|
confidence = "medium"
|
|
226
230
|
else:
|
|
227
231
|
confidence = "low"
|
|
228
232
|
|
|
229
|
-
method = "graph+
|
|
233
|
+
method = "graph+structure" if graph is not None else "filesystem_inference"
|
|
230
234
|
|
|
231
235
|
return ArchitectureAnalysis(
|
|
232
236
|
requested=True,
|
|
@@ -339,7 +343,7 @@ class ArchitectureAnalyzer:
|
|
|
339
343
|
best_matched = matched
|
|
340
344
|
|
|
341
345
|
if best_score >= 2:
|
|
342
|
-
layer_confidence: Literal["high", "medium", "low"] = "
|
|
346
|
+
layer_confidence: Literal["high", "medium", "low"] = "medium" if best_score >= 3 else "low"
|
|
343
347
|
layers: list[ArchitectureLayer] = []
|
|
344
348
|
for layer_key, matched_dirs in best_matched.items():
|
|
345
349
|
matched_files = [
|
|
@@ -5,6 +5,7 @@ import re
|
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
from typing import Any
|
|
7
7
|
|
|
8
|
+
from sourcecode.entrypoint_classifier import is_production_entry_point
|
|
8
9
|
from sourcecode.schema import EntryPoint, SourceMap, StackDetection
|
|
9
10
|
from sourcecode.tree_utils import flatten_file_tree
|
|
10
11
|
|
|
@@ -63,11 +64,8 @@ class ArchitectureSummarizer:
|
|
|
63
64
|
entry for entry in sm.entry_points
|
|
64
65
|
if not self._is_tooling_path(entry.path)
|
|
65
66
|
and not self._is_auxiliary_path(entry.path)
|
|
66
|
-
and entry
|
|
67
|
+
and is_production_entry_point(entry)
|
|
67
68
|
]
|
|
68
|
-
if not entry_points:
|
|
69
|
-
fallback = self._infer_fallback_entry_points(file_paths, sm.stacks)
|
|
70
|
-
entry_points = fallback[:1]
|
|
71
69
|
|
|
72
70
|
lang_lines: list[str] = []
|
|
73
71
|
if entry_points:
|
|
@@ -280,8 +278,7 @@ class ArchitectureSummarizer:
|
|
|
280
278
|
if modules:
|
|
281
279
|
formatted = self._format_module_list([self._module_label(module) for module in modules])
|
|
282
280
|
if formatted:
|
|
283
|
-
lines.append(f"
|
|
284
|
-
lines.append("Produce la salida principal del entry point JavaScript/TypeScript detectado.")
|
|
281
|
+
lines.append(f"Imports internos del entry point: {formatted}.")
|
|
285
282
|
return lines
|
|
286
283
|
|
|
287
284
|
def _summarize_java_entry(self, path: str, content: str, stacks: list[StackDetection]) -> list[str]:
|
|
@@ -344,8 +341,7 @@ class ArchitectureSummarizer:
|
|
|
344
341
|
if internal:
|
|
345
342
|
formatted = self._format_module_list([self._module_label(module) for module in internal])
|
|
346
343
|
if formatted:
|
|
347
|
-
lines.append(f"
|
|
348
|
-
lines.append("Produce la salida principal del binario Go detectado.")
|
|
344
|
+
lines.append(f"Imports internos del binario Go: {formatted}.")
|
|
349
345
|
return lines
|
|
350
346
|
|
|
351
347
|
def _describe_entry_point(self, entry_point: EntryPoint, project_type: str | None) -> str:
|
sourcecode/classifier.py
CHANGED
|
@@ -45,8 +45,12 @@ class TypeClassifier:
|
|
|
45
45
|
primary_stack = self._select_primary_stack(enriched, project_type)
|
|
46
46
|
|
|
47
47
|
final_stacks: list[StackDetection] = []
|
|
48
|
+
primary_assigned = False
|
|
48
49
|
for stack in enriched:
|
|
49
|
-
|
|
50
|
+
is_primary = stack.stack == primary_stack and not primary_assigned
|
|
51
|
+
if is_primary:
|
|
52
|
+
primary_assigned = True
|
|
53
|
+
final_stacks.append(replace(stack, primary=is_primary))
|
|
50
54
|
return final_stacks, project_type
|
|
51
55
|
|
|
52
56
|
def _enrich_stack(
|
sourcecode/cli.py
CHANGED
|
@@ -6,9 +6,10 @@ import time
|
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
from typing import Any, Optional, cast
|
|
8
8
|
|
|
9
|
-
import typer
|
|
10
|
-
|
|
11
|
-
from sourcecode import __version__
|
|
9
|
+
import typer
|
|
10
|
+
|
|
11
|
+
from sourcecode import __version__
|
|
12
|
+
from sourcecode.entrypoint_classifier import is_production_entry_point, normalize_entry_point
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
# ---------------------------------------------------------------------------
|
|
@@ -117,11 +118,11 @@ def _check_pipeline_coherence(sm: "SourceMap") -> list[str]: # type: ignore[nam
|
|
|
117
118
|
)
|
|
118
119
|
|
|
119
120
|
# overall:high requires at least one production entry point
|
|
120
|
-
if cs.overall == "high":
|
|
121
|
-
prod_eps = [
|
|
122
|
-
ep for ep in sm.entry_points
|
|
123
|
-
if ep
|
|
124
|
-
]
|
|
121
|
+
if cs.overall == "high":
|
|
122
|
+
prod_eps = [
|
|
123
|
+
ep for ep in sm.entry_points
|
|
124
|
+
if is_production_entry_point(ep)
|
|
125
|
+
]
|
|
125
126
|
if not prod_eps and sm.entry_points:
|
|
126
127
|
issues.append(
|
|
127
128
|
"[coherence] overall=high but no production entry points exist — "
|
|
@@ -134,21 +135,7 @@ def _check_pipeline_coherence(sm: "SourceMap") -> list[str]: # type: ignore[nam
|
|
|
134
135
|
"[coherence] entry_point_confidence=high but entry_points is empty"
|
|
135
136
|
)
|
|
136
137
|
|
|
137
|
-
|
|
138
|
-
# appear in agent_view output (checked post-facto via produced_by + type)
|
|
139
|
-
benchmark_eps = [
|
|
140
|
-
ep for ep in sm.entry_points
|
|
141
|
-
if ep.entrypoint_type in ("benchmark", "example")
|
|
142
|
-
]
|
|
143
|
-
if benchmark_eps and sm.entry_points and all(
|
|
144
|
-
ep.entrypoint_type in ("benchmark", "example") for ep in sm.entry_points
|
|
145
|
-
):
|
|
146
|
-
issues.append(
|
|
147
|
-
f"[coherence] all {len(sm.entry_points)} entry point(s) are benchmark/example — "
|
|
148
|
-
"no production entry detected; analysis_gaps should reflect impact=high"
|
|
149
|
-
)
|
|
150
|
-
|
|
151
|
-
return issues
|
|
138
|
+
return issues
|
|
152
139
|
|
|
153
140
|
_HELP = """\
|
|
154
141
|
Deterministic codebase context for AI coding agents.
|
|
@@ -909,11 +896,13 @@ def main(
|
|
|
909
896
|
if dependency_analyzer is not None:
|
|
910
897
|
from sourcecode.dependency_analyzer import _ROLE_PRIORITY
|
|
911
898
|
|
|
912
|
-
primary_ecosystem = sm.stacks[0].stack if sm.stacks else ""
|
|
913
|
-
direct_deps = [
|
|
914
|
-
d for d in sm.dependencies
|
|
915
|
-
if d.scope != "transitive" and d.source in {"manifest", "lockfile"}
|
|
916
|
-
|
|
899
|
+
primary_ecosystem = sm.stacks[0].stack if sm.stacks else ""
|
|
900
|
+
direct_deps = [
|
|
901
|
+
d for d in sm.dependencies
|
|
902
|
+
if d.scope != "transitive" and d.source in {"manifest", "lockfile"}
|
|
903
|
+
and (d.role or "unknown") in {"runtime", "parsing", "serialization", "observability", "infra"}
|
|
904
|
+
and d.scope not in {"dev"}
|
|
905
|
+
]
|
|
917
906
|
|
|
918
907
|
def _dep_sort_key(d: Any) -> tuple[int, int, str]:
|
|
919
908
|
role_order = _ROLE_PRIORITY.get(d.role or "runtime", 5)
|
|
@@ -993,12 +982,13 @@ def main(
|
|
|
993
982
|
"example", "examples", "docs", "doc", "fixtures", "fixture",
|
|
994
983
|
})
|
|
995
984
|
for _ep in sm.entry_points:
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
985
|
+
_normalized_ep = normalize_entry_point(_ep)
|
|
986
|
+
_ep_type = _normalized_ep.entrypoint_type
|
|
987
|
+
_path_parts = _ep.path.replace("\\", "/").lower().split("/")
|
|
988
|
+
_filtered = (
|
|
989
|
+
_normalized_ep.classification != "production"
|
|
990
|
+
or any(p in _aux_parts for p in _path_parts)
|
|
991
|
+
)
|
|
1002
992
|
if _filtered:
|
|
1003
993
|
_trace.emit("output", "agent_view", "filter_ep",
|
|
1004
994
|
target=_ep.path,
|
|
@@ -12,6 +12,7 @@ from __future__ import annotations
|
|
|
12
12
|
from pathlib import Path
|
|
13
13
|
from typing import TYPE_CHECKING
|
|
14
14
|
|
|
15
|
+
from sourcecode.entrypoint_classifier import is_production_entry_point, normalize_entry_point
|
|
15
16
|
from sourcecode.schema import AnalysisGap, ConfidenceSummary, SourceMap
|
|
16
17
|
|
|
17
18
|
if TYPE_CHECKING:
|
|
@@ -59,8 +60,15 @@ class ConfidenceAnalyzer:
|
|
|
59
60
|
hard_signals.append(sig)
|
|
60
61
|
|
|
61
62
|
# ── Entry point signals ───────────────────────────────────────────────
|
|
62
|
-
for ep in sm.entry_points
|
|
63
|
-
|
|
63
|
+
normalized_entry_points = [normalize_entry_point(ep) for ep in sm.entry_points]
|
|
64
|
+
|
|
65
|
+
for ep in normalized_entry_points:
|
|
66
|
+
if ep.classification != "production":
|
|
67
|
+
sig = f"entry:{ep.path} ({ep.classification}, {ep.reason or ep.source})"
|
|
68
|
+
if sig not in ignored_signals:
|
|
69
|
+
ignored_signals.append(sig)
|
|
70
|
+
continue
|
|
71
|
+
if ep.source in _HARD_SOURCES or ep.reason == "console_script" or ep.runtime_relevance == "high":
|
|
64
72
|
sig = f"entry:{ep.path} ({ep.reason or ep.source})"
|
|
65
73
|
if sig not in hard_signals:
|
|
66
74
|
hard_signals.append(sig)
|
|
@@ -95,13 +103,13 @@ class ConfidenceAnalyzer:
|
|
|
95
103
|
anomalies.append("All stacks detected via heuristic only — no manifest found")
|
|
96
104
|
|
|
97
105
|
# ── Anomaly: entry points all low-confidence ──────────────────────────
|
|
98
|
-
if
|
|
106
|
+
if normalized_entry_points and all(ep.confidence == "low" for ep in normalized_entry_points):
|
|
99
107
|
anomalies.append("All entry points are low-confidence (heuristic/code_signal only)")
|
|
100
108
|
|
|
101
109
|
# ── Anomaly: all production EPs are convention-only (no manifest evidence) ──
|
|
102
110
|
production_eps_check = [
|
|
103
|
-
ep for ep in
|
|
104
|
-
if ep
|
|
111
|
+
ep for ep in normalized_entry_points
|
|
112
|
+
if is_production_entry_point(ep)
|
|
105
113
|
]
|
|
106
114
|
if production_eps_check and all(
|
|
107
115
|
ep.source in ("convention", "heuristic") or ep.reason in ("convention", "entry_file_pattern")
|
|
@@ -113,40 +121,40 @@ class ConfidenceAnalyzer:
|
|
|
113
121
|
)
|
|
114
122
|
|
|
115
123
|
# ── Anomaly: no production entry points ───────────────────────────────
|
|
116
|
-
if
|
|
124
|
+
if normalized_entry_points:
|
|
117
125
|
production_eps = [
|
|
118
|
-
ep for ep in
|
|
119
|
-
if ep
|
|
126
|
+
ep for ep in normalized_entry_points
|
|
127
|
+
if is_production_entry_point(ep)
|
|
120
128
|
]
|
|
121
129
|
if not production_eps:
|
|
122
130
|
anomalies.append(
|
|
123
|
-
"No production entry points — all detected entries are
|
|
131
|
+
"No production entry points — all detected entries are development/auxiliary"
|
|
124
132
|
)
|
|
125
133
|
|
|
126
134
|
# ── Gaps ──────────────────────────────────────────────────────────────
|
|
127
|
-
if not
|
|
135
|
+
if not normalized_entry_points:
|
|
128
136
|
gaps.append(AnalysisGap(
|
|
129
137
|
area="entry_points",
|
|
130
|
-
reason="
|
|
138
|
+
reason="Critical: no runtime entrypoint detected; system cannot be executed without manual inference",
|
|
131
139
|
impact="high",
|
|
132
140
|
))
|
|
133
141
|
elif all(
|
|
134
|
-
ep.
|
|
135
|
-
for ep in
|
|
142
|
+
ep.classification in ("development", "auxiliary")
|
|
143
|
+
for ep in normalized_entry_points
|
|
136
144
|
):
|
|
137
145
|
gaps.append(AnalysisGap(
|
|
138
146
|
area="entry_points",
|
|
139
147
|
reason=(
|
|
140
|
-
"
|
|
141
|
-
"
|
|
142
|
-
"
|
|
148
|
+
"Critical: no production runtime entrypoint detected; detected entries are "
|
|
149
|
+
"development or auxiliary only. Add/verify a start/serve script, CLI bin, "
|
|
150
|
+
"or server bootstrap before using this context for automation."
|
|
143
151
|
),
|
|
144
152
|
impact="high",
|
|
145
153
|
))
|
|
146
|
-
elif all(ep.confidence == "low" for ep in
|
|
154
|
+
elif all(ep.confidence == "low" for ep in normalized_entry_points):
|
|
147
155
|
gaps.append(AnalysisGap(
|
|
148
156
|
area="entry_points",
|
|
149
|
-
reason="Entry points inferred from code patterns only
|
|
157
|
+
reason="Entry points inferred from code patterns only; no manifest script, CLI bin, or server bootstrap declaration found",
|
|
150
158
|
impact="medium",
|
|
151
159
|
))
|
|
152
160
|
|
|
@@ -196,12 +204,17 @@ class ConfidenceAnalyzer:
|
|
|
196
204
|
# Entry points: only consider production EPs for confidence scoring.
|
|
197
205
|
# Benchmark/example/dev-only entries are not evidence of production readiness.
|
|
198
206
|
production_eps = [
|
|
199
|
-
ep for ep in
|
|
200
|
-
if ep
|
|
207
|
+
ep for ep in normalized_entry_points
|
|
208
|
+
if is_production_entry_point(ep)
|
|
201
209
|
]
|
|
202
210
|
ep_conf = _max_confidence([ep.confidence for ep in production_eps] or ["low"])
|
|
203
211
|
overall = _min_confidence([stack_conf, ep_conf])
|
|
204
212
|
|
|
213
|
+
if normalized_entry_points and not production_eps:
|
|
214
|
+
overall = "low"
|
|
215
|
+
elif production_eps and all(ep.runtime_relevance == "low" for ep in production_eps):
|
|
216
|
+
overall = _min_confidence([overall, "low"])
|
|
217
|
+
|
|
205
218
|
# Factor in architecture confidence when available
|
|
206
219
|
arch = sm.architecture
|
|
207
220
|
if arch is not None and arch.requested:
|
sourcecode/detectors/nodejs.py
CHANGED
|
@@ -58,7 +58,7 @@ class NodejsDetector(AbstractDetector):
|
|
|
58
58
|
|
|
59
59
|
from sourcecode.detectors.hybrid import merge_framework_detections, scan_for_frameworks
|
|
60
60
|
|
|
61
|
-
dependency_names = self._collect_dependency_names(package_json)
|
|
61
|
+
dependency_names = self._collect_dependency_names(package_json, runtime_only=True)
|
|
62
62
|
seen_fw: set[str] = set()
|
|
63
63
|
manifest_frameworks = []
|
|
64
64
|
for pkg_name, label in _FRAMEWORK_MAP.items():
|
|
@@ -98,9 +98,17 @@ class NodejsDetector(AbstractDetector):
|
|
|
98
98
|
signals.append("monorepo:npm-workspaces")
|
|
99
99
|
return signals
|
|
100
100
|
|
|
101
|
-
def _collect_dependency_names(
|
|
101
|
+
def _collect_dependency_names(
|
|
102
|
+
self,
|
|
103
|
+
package_json: dict[str, Any],
|
|
104
|
+
*,
|
|
105
|
+
runtime_only: bool = False,
|
|
106
|
+
) -> set[str]:
|
|
102
107
|
names: set[str] = set()
|
|
103
|
-
|
|
108
|
+
fields = ("dependencies", "peerDependencies", "optionalDependencies")
|
|
109
|
+
if not runtime_only:
|
|
110
|
+
fields = fields + ("devDependencies",)
|
|
111
|
+
for field in fields:
|
|
104
112
|
raw = package_json.get(field, {})
|
|
105
113
|
if isinstance(raw, dict):
|
|
106
114
|
names.update(str(name) for name in raw)
|
|
@@ -125,6 +133,9 @@ class NodejsDetector(AbstractDetector):
|
|
|
125
133
|
"playground", "playgrounds",
|
|
126
134
|
"fixture", "fixtures",
|
|
127
135
|
"sandbox", "e2e", "docs",
|
|
136
|
+
"test", "tests", "__tests__", "spec", "specs",
|
|
137
|
+
"scripts", "script", "tools", "tooling", "ci",
|
|
138
|
+
".storybook", "storybook",
|
|
128
139
|
})
|
|
129
140
|
|
|
130
141
|
def _collect_entry_points(
|
|
@@ -144,19 +155,20 @@ class NodejsDetector(AbstractDetector):
|
|
|
144
155
|
continue
|
|
145
156
|
# Extract file path from script command
|
|
146
157
|
path = self._extract_script_path(script_cmd, context)
|
|
158
|
+
if path is None:
|
|
159
|
+
path = self._infer_tool_script_path(script_name, script_cmd, context)
|
|
147
160
|
if path and path not in seen and path_exists_in_tree(context.file_tree, path):
|
|
148
161
|
seen.add(path)
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
))
|
|
162
|
+
entry_points.append(EntryPoint(
|
|
163
|
+
path=path,
|
|
164
|
+
stack="nodejs",
|
|
165
|
+
kind=kind,
|
|
166
|
+
source="package.json#scripts",
|
|
167
|
+
confidence="high",
|
|
168
|
+
reason=f"script:{script_name}",
|
|
169
|
+
evidence=f"scripts.{script_name} = {script_cmd!r:.80}",
|
|
170
|
+
entrypoint_type=self._path_entrypoint_type(path, fallback=ep_type),
|
|
171
|
+
))
|
|
160
172
|
|
|
161
173
|
# Priority 2: package.json bin — CLI production entry points
|
|
162
174
|
bin_field = package_json.get("bin")
|
|
@@ -233,7 +245,7 @@ class NodejsDetector(AbstractDetector):
|
|
|
233
245
|
def _classify_script(self, script_name: str) -> tuple[str | None, str]:
|
|
234
246
|
"""Map script name → (entrypoint_type, kind). Returns (None, '') to skip."""
|
|
235
247
|
lower = script_name.lower()
|
|
236
|
-
if lower in ("start", "serve"):
|
|
248
|
+
if lower in ("start", "serve", "server"):
|
|
237
249
|
return "production", "server"
|
|
238
250
|
if lower in ("dev", "develop", "watch"):
|
|
239
251
|
return "development", "server"
|
|
@@ -243,6 +255,12 @@ class NodejsDetector(AbstractDetector):
|
|
|
243
255
|
return "benchmark", "script"
|
|
244
256
|
if lower.startswith("example") or lower.startswith("demo"):
|
|
245
257
|
return "example", "script"
|
|
258
|
+
if lower in {"docs", "doc", "storybook", "playground"} or any(
|
|
259
|
+
marker in lower for marker in ("rspress", "vite", "storybook", "playground")
|
|
260
|
+
):
|
|
261
|
+
return "development", "server"
|
|
262
|
+
if lower in {"test", "e2e", "spec", "lint", "format", "typecheck", "build"}:
|
|
263
|
+
return "development", "script"
|
|
246
264
|
return None, ""
|
|
247
265
|
|
|
248
266
|
def _extract_script_path(self, cmd: str, context: DetectionContext) -> str | None:
|
|
@@ -264,12 +282,36 @@ class NodejsDetector(AbstractDetector):
|
|
|
264
282
|
return p
|
|
265
283
|
return None
|
|
266
284
|
|
|
285
|
+
def _infer_tool_script_path(
|
|
286
|
+
self,
|
|
287
|
+
script_name: str,
|
|
288
|
+
script_cmd: str,
|
|
289
|
+
context: DetectionContext,
|
|
290
|
+
) -> str | None:
|
|
291
|
+
text = f"{script_name} {script_cmd}".lower()
|
|
292
|
+
candidates: list[str] = []
|
|
293
|
+
if "rspress" in text or "docs" in text or "doc" in text:
|
|
294
|
+
candidates.extend(["docs/rspress.mjs", "docs/rspress.config.mjs"])
|
|
295
|
+
if "storybook" in text:
|
|
296
|
+
candidates.extend([".storybook/main.js", ".storybook/main.ts"])
|
|
297
|
+
if "vite" in text or "playground" in text:
|
|
298
|
+
candidates.extend(["playground/vite.config.ts", "vite.config.ts"])
|
|
299
|
+
for candidate in candidates:
|
|
300
|
+
if path_exists_in_tree(context.file_tree, candidate):
|
|
301
|
+
return candidate
|
|
302
|
+
return None
|
|
303
|
+
|
|
267
304
|
def _is_auxiliary_path(self, path: str) -> bool:
|
|
268
305
|
norm = path.replace("\\", "/")
|
|
269
306
|
parts = norm.split("/")
|
|
270
307
|
return any(p.lower() in self._AUXILIARY_DIRS for p in parts)
|
|
271
308
|
|
|
272
|
-
def _path_entrypoint_type(self, path: str) -> str:
|
|
273
|
-
|
|
309
|
+
def _path_entrypoint_type(self, path: str, *, fallback: str = "production") -> str:
|
|
310
|
+
parts = {p.lower() for p in path.replace("\\", "/").split("/")}
|
|
311
|
+
if parts & {"benchmark", "benchmarks", "bench", "benches"}:
|
|
312
|
+
return "benchmark"
|
|
313
|
+
if parts & {"example", "examples", "demo", "demos", "fixture", "fixtures"}:
|
|
274
314
|
return "example"
|
|
275
|
-
|
|
315
|
+
if self._is_auxiliary_path(path):
|
|
316
|
+
return "development"
|
|
317
|
+
return fallback
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import replace
|
|
4
|
+
from typing import Literal
|
|
5
|
+
|
|
6
|
+
from sourcecode.schema import EntryPoint
|
|
7
|
+
|
|
8
|
+
Classification = Literal["production", "development", "auxiliary"]
|
|
9
|
+
RuntimeRelevance = Literal["high", "medium", "low"]
|
|
10
|
+
|
|
11
|
+
_AUXILIARY_DIRS = frozenset({
|
|
12
|
+
"benchmark", "benchmarks", "bench", "benches",
|
|
13
|
+
"example", "examples", "demo", "demos",
|
|
14
|
+
"fixture", "fixtures", "__fixtures__", "testdata", "test_data",
|
|
15
|
+
"test", "tests", "__tests__", "spec", "specs", "e2e",
|
|
16
|
+
"script", "scripts", "tool", "tools", "tooling", "ci",
|
|
17
|
+
"mock", "mocks", "sandbox",
|
|
18
|
+
})
|
|
19
|
+
|
|
20
|
+
_DEVELOPMENT_DIRS = frozenset({
|
|
21
|
+
"docs", "doc", "documentation", "wiki",
|
|
22
|
+
"playground", "playgrounds", ".storybook", "storybook",
|
|
23
|
+
})
|
|
24
|
+
|
|
25
|
+
_DEV_MARKERS = ("rspress", "vite", "storybook", "playground", "dev-server")
|
|
26
|
+
_PRODUCTION_SCRIPT_REASONS = {"script:start", "script:serve", "script:server"}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def classify_entry_point(ep: EntryPoint) -> Classification:
|
|
30
|
+
"""Return the operational class for an entry point.
|
|
31
|
+
|
|
32
|
+
The rules intentionally prefer exclusion over weak inclusion. Development
|
|
33
|
+
and auxiliary path evidence wins over detector-provided production labels.
|
|
34
|
+
"""
|
|
35
|
+
path = ep.path.replace("\\", "/").lower()
|
|
36
|
+
parts = set(path.split("/"))
|
|
37
|
+
reason = (ep.reason or "").lower()
|
|
38
|
+
evidence = (ep.evidence or "").lower()
|
|
39
|
+
marker_text = f"{path} {reason} {evidence}"
|
|
40
|
+
|
|
41
|
+
if parts & _DEVELOPMENT_DIRS or any(marker in marker_text for marker in _DEV_MARKERS):
|
|
42
|
+
return "development"
|
|
43
|
+
if parts & _AUXILIARY_DIRS:
|
|
44
|
+
return "auxiliary"
|
|
45
|
+
if ep.entrypoint_type in {"benchmark", "example"}:
|
|
46
|
+
return "auxiliary"
|
|
47
|
+
if ep.entrypoint_type == "development":
|
|
48
|
+
return "development"
|
|
49
|
+
if (
|
|
50
|
+
ep.source == "convention"
|
|
51
|
+
and ep.kind in {"binary", "application"}
|
|
52
|
+
and ep.stack in {"go", "rust", "java", "dotnet", "kotlin", "scala"}
|
|
53
|
+
):
|
|
54
|
+
return "production"
|
|
55
|
+
if ep.source in {"heuristic", "convention"}:
|
|
56
|
+
return "auxiliary"
|
|
57
|
+
if ep.entrypoint_type == "production":
|
|
58
|
+
return "production"
|
|
59
|
+
if ep.source == "package.json#bin" or reason == "bin":
|
|
60
|
+
return "production"
|
|
61
|
+
if reason in _PRODUCTION_SCRIPT_REASONS:
|
|
62
|
+
return "production"
|
|
63
|
+
return "production"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def runtime_relevance(ep: EntryPoint, classification: Classification | None = None) -> RuntimeRelevance:
|
|
67
|
+
classification = classification or classify_entry_point(ep)
|
|
68
|
+
if classification != "production":
|
|
69
|
+
return "low"
|
|
70
|
+
reason = (ep.reason or "").lower()
|
|
71
|
+
if ep.source == "package.json#bin" or reason == "bin" or reason in _PRODUCTION_SCRIPT_REASONS:
|
|
72
|
+
return "high"
|
|
73
|
+
if ep.source == "package.json" and reason in {"main", "module"}:
|
|
74
|
+
return "medium"
|
|
75
|
+
if ep.source == "convention" and ep.kind in {"binary", "application"}:
|
|
76
|
+
return "medium"
|
|
77
|
+
if ep.source in {"heuristic", "convention"} or ep.confidence == "low":
|
|
78
|
+
return "low"
|
|
79
|
+
return "medium"
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def normalize_entry_point(ep: EntryPoint) -> EntryPoint:
|
|
83
|
+
classification = classify_entry_point(ep)
|
|
84
|
+
relevance = runtime_relevance(ep, classification)
|
|
85
|
+
legacy_type = ep.entrypoint_type
|
|
86
|
+
if classification == "auxiliary" and legacy_type == "production" and ep.source in {"heuristic", "convention"}:
|
|
87
|
+
legacy_type = None
|
|
88
|
+
if legacy_type is None:
|
|
89
|
+
if classification == "production":
|
|
90
|
+
legacy_type = "production"
|
|
91
|
+
elif classification == "development":
|
|
92
|
+
legacy_type = "development"
|
|
93
|
+
return replace(
|
|
94
|
+
ep,
|
|
95
|
+
classification=classification,
|
|
96
|
+
runtime_relevance=relevance,
|
|
97
|
+
entrypoint_type=legacy_type,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def is_production_entry_point(ep: EntryPoint) -> bool:
|
|
102
|
+
normalized = normalize_entry_point(ep)
|
|
103
|
+
return (
|
|
104
|
+
normalized.classification == "production"
|
|
105
|
+
and normalized.runtime_relevance in {"high", "medium"}
|
|
106
|
+
)
|