sourcecode 0.29.0__py3-none-any.whl → 0.30.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcecode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Genera mapas de contexto estructurado para agentes IA."""
2
2
 
3
- __version__ = "0.29.0"
3
+ __version__ = "0.30.0"
@@ -5,6 +5,7 @@ import re
5
5
  from pathlib import Path
6
6
  from typing import Any
7
7
 
8
+ from sourcecode.entrypoint_classifier import is_production_entry_point
8
9
  from sourcecode.schema import EntryPoint, SourceMap, StackDetection
9
10
  from sourcecode.tree_utils import flatten_file_tree
10
11
 
@@ -63,11 +64,8 @@ class ArchitectureSummarizer:
63
64
  entry for entry in sm.entry_points
64
65
  if not self._is_tooling_path(entry.path)
65
66
  and not self._is_auxiliary_path(entry.path)
66
- and entry.entrypoint_type not in ("benchmark", "example")
67
+ and is_production_entry_point(entry)
67
68
  ]
68
- if not entry_points:
69
- fallback = self._infer_fallback_entry_points(file_paths, sm.stacks)
70
- entry_points = fallback[:1]
71
69
 
72
70
  lang_lines: list[str] = []
73
71
  if entry_points:
@@ -280,8 +278,7 @@ class ArchitectureSummarizer:
280
278
  if modules:
281
279
  formatted = self._format_module_list([self._module_label(module) for module in modules])
282
280
  if formatted:
283
- lines.append(f"Orquesta modulos internos: {formatted}.")
284
- lines.append("Produce la salida principal del entry point JavaScript/TypeScript detectado.")
281
+ lines.append(f"Imports internos del entry point: {formatted}.")
285
282
  return lines
286
283
 
287
284
  def _summarize_java_entry(self, path: str, content: str, stacks: list[StackDetection]) -> list[str]:
@@ -344,8 +341,7 @@ class ArchitectureSummarizer:
344
341
  if internal:
345
342
  formatted = self._format_module_list([self._module_label(module) for module in internal])
346
343
  if formatted:
347
- lines.append(f"Orquesta paquetes internos: {formatted}.")
348
- lines.append("Produce la salida principal del binario Go detectado.")
344
+ lines.append(f"Imports internos del binario Go: {formatted}.")
349
345
  return lines
350
346
 
351
347
  def _describe_entry_point(self, entry_point: EntryPoint, project_type: str | None) -> str:
sourcecode/classifier.py CHANGED
@@ -45,8 +45,12 @@ class TypeClassifier:
45
45
  primary_stack = self._select_primary_stack(enriched, project_type)
46
46
 
47
47
  final_stacks: list[StackDetection] = []
48
+ primary_assigned = False
48
49
  for stack in enriched:
49
- final_stacks.append(replace(stack, primary=(stack.stack == primary_stack)))
50
+ is_primary = stack.stack == primary_stack and not primary_assigned
51
+ if is_primary:
52
+ primary_assigned = True
53
+ final_stacks.append(replace(stack, primary=is_primary))
50
54
  return final_stacks, project_type
51
55
 
52
56
  def _enrich_stack(
sourcecode/cli.py CHANGED
@@ -6,9 +6,10 @@ import time
6
6
  from pathlib import Path
7
7
  from typing import Any, Optional, cast
8
8
 
9
- import typer
10
-
11
- from sourcecode import __version__
9
+ import typer
10
+
11
+ from sourcecode import __version__
12
+ from sourcecode.entrypoint_classifier import is_production_entry_point, normalize_entry_point
12
13
 
13
14
 
14
15
  # ---------------------------------------------------------------------------
@@ -117,11 +118,11 @@ def _check_pipeline_coherence(sm: "SourceMap") -> list[str]: # type: ignore[nam
117
118
  )
118
119
 
119
120
  # overall:high requires at least one production entry point
120
- if cs.overall == "high":
121
- prod_eps = [
122
- ep for ep in sm.entry_points
123
- if ep.entrypoint_type in ("production", None)
124
- ]
121
+ if cs.overall == "high":
122
+ prod_eps = [
123
+ ep for ep in sm.entry_points
124
+ if is_production_entry_point(ep)
125
+ ]
125
126
  if not prod_eps and sm.entry_points:
126
127
  issues.append(
127
128
  "[coherence] overall=high but no production entry points exist — "
@@ -134,21 +135,7 @@ def _check_pipeline_coherence(sm: "SourceMap") -> list[str]: # type: ignore[nam
134
135
  "[coherence] entry_point_confidence=high but entry_points is empty"
135
136
  )
136
137
 
137
- # Contradictory EP classification: EPs with entrypoint_type=benchmark must not
138
- # appear in agent_view output (checked post-facto via produced_by + type)
139
- benchmark_eps = [
140
- ep for ep in sm.entry_points
141
- if ep.entrypoint_type in ("benchmark", "example")
142
- ]
143
- if benchmark_eps and sm.entry_points and all(
144
- ep.entrypoint_type in ("benchmark", "example") for ep in sm.entry_points
145
- ):
146
- issues.append(
147
- f"[coherence] all {len(sm.entry_points)} entry point(s) are benchmark/example — "
148
- "no production entry detected; analysis_gaps should reflect impact=high"
149
- )
150
-
151
- return issues
138
+ return issues
152
139
 
153
140
  _HELP = """\
154
141
  Deterministic codebase context for AI coding agents.
@@ -993,12 +980,13 @@ def main(
993
980
  "example", "examples", "docs", "doc", "fixtures", "fixture",
994
981
  })
995
982
  for _ep in sm.entry_points:
996
- _ep_type = _ep.entrypoint_type
997
- _path_parts = _ep.path.replace("\\", "/").lower().split("/")
998
- _filtered = (
999
- _ep_type in ("benchmark", "example")
1000
- or any(p in _aux_parts for p in _path_parts)
1001
- )
983
+ _normalized_ep = normalize_entry_point(_ep)
984
+ _ep_type = _normalized_ep.entrypoint_type
985
+ _path_parts = _ep.path.replace("\\", "/").lower().split("/")
986
+ _filtered = (
987
+ _normalized_ep.classification != "production"
988
+ or any(p in _aux_parts for p in _path_parts)
989
+ )
1002
990
  if _filtered:
1003
991
  _trace.emit("output", "agent_view", "filter_ep",
1004
992
  target=_ep.path,
@@ -12,6 +12,7 @@ from __future__ import annotations
12
12
  from pathlib import Path
13
13
  from typing import TYPE_CHECKING
14
14
 
15
+ from sourcecode.entrypoint_classifier import is_production_entry_point, normalize_entry_point
15
16
  from sourcecode.schema import AnalysisGap, ConfidenceSummary, SourceMap
16
17
 
17
18
  if TYPE_CHECKING:
@@ -59,8 +60,15 @@ class ConfidenceAnalyzer:
59
60
  hard_signals.append(sig)
60
61
 
61
62
  # ── Entry point signals ───────────────────────────────────────────────
62
- for ep in sm.entry_points:
63
- if ep.source in _HARD_SOURCES or ep.reason == "console_script":
63
+ normalized_entry_points = [normalize_entry_point(ep) for ep in sm.entry_points]
64
+
65
+ for ep in normalized_entry_points:
66
+ if ep.classification != "production":
67
+ sig = f"entry:{ep.path} ({ep.classification}, {ep.reason or ep.source})"
68
+ if sig not in ignored_signals:
69
+ ignored_signals.append(sig)
70
+ continue
71
+ if ep.source in _HARD_SOURCES or ep.reason == "console_script" or ep.runtime_relevance == "high":
64
72
  sig = f"entry:{ep.path} ({ep.reason or ep.source})"
65
73
  if sig not in hard_signals:
66
74
  hard_signals.append(sig)
@@ -95,13 +103,13 @@ class ConfidenceAnalyzer:
95
103
  anomalies.append("All stacks detected via heuristic only — no manifest found")
96
104
 
97
105
  # ── Anomaly: entry points all low-confidence ──────────────────────────
98
- if sm.entry_points and all(ep.confidence == "low" for ep in sm.entry_points):
106
+ if normalized_entry_points and all(ep.confidence == "low" for ep in normalized_entry_points):
99
107
  anomalies.append("All entry points are low-confidence (heuristic/code_signal only)")
100
108
 
101
109
  # ── Anomaly: all production EPs are convention-only (no manifest evidence) ──
102
110
  production_eps_check = [
103
- ep for ep in sm.entry_points
104
- if ep.entrypoint_type in ("production", None)
111
+ ep for ep in normalized_entry_points
112
+ if is_production_entry_point(ep)
105
113
  ]
106
114
  if production_eps_check and all(
107
115
  ep.source in ("convention", "heuristic") or ep.reason in ("convention", "entry_file_pattern")
@@ -113,37 +121,37 @@ class ConfidenceAnalyzer:
113
121
  )
114
122
 
115
123
  # ── Anomaly: no production entry points ───────────────────────────────
116
- if sm.entry_points:
124
+ if normalized_entry_points:
117
125
  production_eps = [
118
- ep for ep in sm.entry_points
119
- if ep.entrypoint_type in ("production", None)
126
+ ep for ep in normalized_entry_points
127
+ if is_production_entry_point(ep)
120
128
  ]
121
129
  if not production_eps:
122
130
  anomalies.append(
123
- "No production entry points — all detected entries are dev/benchmark/example"
131
+ "No production entry points — all detected entries are development/auxiliary"
124
132
  )
125
133
 
126
134
  # ── Gaps ──────────────────────────────────────────────────────────────
127
- if not sm.entry_points:
135
+ if not normalized_entry_points:
128
136
  gaps.append(AnalysisGap(
129
137
  area="entry_points",
130
138
  reason="No entry point detected — project may use non-standard structure or be a library",
131
139
  impact="high",
132
140
  ))
133
141
  elif all(
134
- ep.entrypoint_type in ("benchmark", "example", "development")
135
- for ep in sm.entry_points
142
+ ep.classification in ("development", "auxiliary")
143
+ for ep in normalized_entry_points
136
144
  ):
137
145
  gaps.append(AnalysisGap(
138
146
  area="entry_points",
139
147
  reason=(
140
- "All detected entry points are auxiliary (benchmark/example/dev) — "
148
+ "All detected entry points are development or auxiliary — "
141
149
  "no production entry point found. Verify project has a 'start'/'serve' "
142
150
  "script or production binary."
143
151
  ),
144
152
  impact="high",
145
153
  ))
146
- elif all(ep.confidence == "low" for ep in sm.entry_points):
154
+ elif all(ep.confidence == "low" for ep in normalized_entry_points):
147
155
  gaps.append(AnalysisGap(
148
156
  area="entry_points",
149
157
  reason="Entry points inferred from code patterns only, no manifest declaration found",
@@ -196,12 +204,17 @@ class ConfidenceAnalyzer:
196
204
  # Entry points: only consider production EPs for confidence scoring.
197
205
  # Benchmark/example/dev-only entries are not evidence of production readiness.
198
206
  production_eps = [
199
- ep for ep in sm.entry_points
200
- if ep.entrypoint_type in ("production", None)
207
+ ep for ep in normalized_entry_points
208
+ if is_production_entry_point(ep)
201
209
  ]
202
210
  ep_conf = _max_confidence([ep.confidence for ep in production_eps] or ["low"])
203
211
  overall = _min_confidence([stack_conf, ep_conf])
204
212
 
213
+ if normalized_entry_points and not production_eps:
214
+ overall = "low"
215
+ elif production_eps and all(ep.runtime_relevance == "low" for ep in production_eps):
216
+ overall = _min_confidence([overall, "low"])
217
+
205
218
  # Factor in architecture confidence when available
206
219
  arch = sm.architecture
207
220
  if arch is not None and arch.requested:
@@ -125,6 +125,9 @@ class NodejsDetector(AbstractDetector):
125
125
  "playground", "playgrounds",
126
126
  "fixture", "fixtures",
127
127
  "sandbox", "e2e", "docs",
128
+ "test", "tests", "__tests__", "spec", "specs",
129
+ "scripts", "script", "tools", "tooling", "ci",
130
+ ".storybook", "storybook",
128
131
  })
129
132
 
130
133
  def _collect_entry_points(
@@ -144,19 +147,20 @@ class NodejsDetector(AbstractDetector):
144
147
  continue
145
148
  # Extract file path from script command
146
149
  path = self._extract_script_path(script_cmd, context)
150
+ if path is None:
151
+ path = self._infer_tool_script_path(script_name, script_cmd, context)
147
152
  if path and path not in seen and path_exists_in_tree(context.file_tree, path):
148
153
  seen.add(path)
149
- if not self._is_auxiliary_path(path):
150
- entry_points.append(EntryPoint(
151
- path=path,
152
- stack="nodejs",
153
- kind=kind,
154
- source="package.json#scripts",
155
- confidence="high",
156
- reason=f"script:{script_name}",
157
- evidence=f"scripts.{script_name} = {script_cmd!r:.80}",
158
- entrypoint_type=ep_type,
159
- ))
154
+ entry_points.append(EntryPoint(
155
+ path=path,
156
+ stack="nodejs",
157
+ kind=kind,
158
+ source="package.json#scripts",
159
+ confidence="high",
160
+ reason=f"script:{script_name}",
161
+ evidence=f"scripts.{script_name} = {script_cmd!r:.80}",
162
+ entrypoint_type=self._path_entrypoint_type(path, fallback=ep_type),
163
+ ))
160
164
 
161
165
  # Priority 2: package.json bin — CLI production entry points
162
166
  bin_field = package_json.get("bin")
@@ -233,7 +237,7 @@ class NodejsDetector(AbstractDetector):
233
237
  def _classify_script(self, script_name: str) -> tuple[str | None, str]:
234
238
  """Map script name → (entrypoint_type, kind). Returns (None, '') to skip."""
235
239
  lower = script_name.lower()
236
- if lower in ("start", "serve"):
240
+ if lower in ("start", "serve", "server"):
237
241
  return "production", "server"
238
242
  if lower in ("dev", "develop", "watch"):
239
243
  return "development", "server"
@@ -243,6 +247,12 @@ class NodejsDetector(AbstractDetector):
243
247
  return "benchmark", "script"
244
248
  if lower.startswith("example") or lower.startswith("demo"):
245
249
  return "example", "script"
250
+ if lower in {"docs", "doc", "storybook", "playground"} or any(
251
+ marker in lower for marker in ("rspress", "vite", "storybook", "playground")
252
+ ):
253
+ return "development", "server"
254
+ if lower in {"test", "e2e", "spec", "lint", "format", "typecheck", "build"}:
255
+ return "development", "script"
246
256
  return None, ""
247
257
 
248
258
  def _extract_script_path(self, cmd: str, context: DetectionContext) -> str | None:
@@ -264,12 +274,36 @@ class NodejsDetector(AbstractDetector):
264
274
  return p
265
275
  return None
266
276
 
277
+ def _infer_tool_script_path(
278
+ self,
279
+ script_name: str,
280
+ script_cmd: str,
281
+ context: DetectionContext,
282
+ ) -> str | None:
283
+ text = f"{script_name} {script_cmd}".lower()
284
+ candidates: list[str] = []
285
+ if "rspress" in text or "docs" in text or "doc" in text:
286
+ candidates.extend(["docs/rspress.mjs", "docs/rspress.config.mjs"])
287
+ if "storybook" in text:
288
+ candidates.extend([".storybook/main.js", ".storybook/main.ts"])
289
+ if "vite" in text or "playground" in text:
290
+ candidates.extend(["playground/vite.config.ts", "vite.config.ts"])
291
+ for candidate in candidates:
292
+ if path_exists_in_tree(context.file_tree, candidate):
293
+ return candidate
294
+ return None
295
+
267
296
  def _is_auxiliary_path(self, path: str) -> bool:
268
297
  norm = path.replace("\\", "/")
269
298
  parts = norm.split("/")
270
299
  return any(p.lower() in self._AUXILIARY_DIRS for p in parts)
271
300
 
272
- def _path_entrypoint_type(self, path: str) -> str:
273
- if self._is_auxiliary_path(path):
301
+ def _path_entrypoint_type(self, path: str, *, fallback: str = "production") -> str:
302
+ parts = {p.lower() for p in path.replace("\\", "/").split("/")}
303
+ if parts & {"benchmark", "benchmarks", "bench", "benches"}:
304
+ return "benchmark"
305
+ if parts & {"example", "examples", "demo", "demos", "fixture", "fixtures"}:
274
306
  return "example"
275
- return "production"
307
+ if self._is_auxiliary_path(path):
308
+ return "development"
309
+ return fallback
@@ -0,0 +1,106 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import replace
4
+ from typing import Literal
5
+
6
+ from sourcecode.schema import EntryPoint
7
+
8
+ Classification = Literal["production", "development", "auxiliary"]
9
+ RuntimeRelevance = Literal["high", "medium", "low"]
10
+
11
+ _AUXILIARY_DIRS = frozenset({
12
+ "benchmark", "benchmarks", "bench", "benches",
13
+ "example", "examples", "demo", "demos",
14
+ "fixture", "fixtures", "__fixtures__", "testdata", "test_data",
15
+ "test", "tests", "__tests__", "spec", "specs", "e2e",
16
+ "script", "scripts", "tool", "tools", "tooling", "ci",
17
+ "mock", "mocks", "sandbox",
18
+ })
19
+
20
+ _DEVELOPMENT_DIRS = frozenset({
21
+ "docs", "doc", "documentation", "wiki",
22
+ "playground", "playgrounds", ".storybook", "storybook",
23
+ })
24
+
25
+ _DEV_MARKERS = ("rspress", "vite", "storybook", "playground", "dev-server")
26
+ _PRODUCTION_SCRIPT_REASONS = {"script:start", "script:serve", "script:server"}
27
+
28
+
29
+ def classify_entry_point(ep: EntryPoint) -> Classification:
30
+ """Return the operational class for an entry point.
31
+
32
+ The rules intentionally prefer exclusion over weak inclusion. Development
33
+ and auxiliary path evidence wins over detector-provided production labels.
34
+ """
35
+ path = ep.path.replace("\\", "/").lower()
36
+ parts = set(path.split("/"))
37
+ reason = (ep.reason or "").lower()
38
+ evidence = (ep.evidence or "").lower()
39
+ marker_text = f"{path} {reason} {evidence}"
40
+
41
+ if parts & _DEVELOPMENT_DIRS or any(marker in marker_text for marker in _DEV_MARKERS):
42
+ return "development"
43
+ if parts & _AUXILIARY_DIRS:
44
+ return "auxiliary"
45
+ if ep.entrypoint_type in {"benchmark", "example"}:
46
+ return "auxiliary"
47
+ if ep.entrypoint_type == "development":
48
+ return "development"
49
+ if (
50
+ ep.source == "convention"
51
+ and ep.kind in {"binary", "application"}
52
+ and ep.stack in {"go", "rust", "java", "dotnet", "kotlin", "scala"}
53
+ ):
54
+ return "production"
55
+ if ep.source in {"heuristic", "convention"}:
56
+ return "auxiliary"
57
+ if ep.entrypoint_type == "production":
58
+ return "production"
59
+ if ep.source == "package.json#bin" or reason == "bin":
60
+ return "production"
61
+ if reason in _PRODUCTION_SCRIPT_REASONS:
62
+ return "production"
63
+ return "production"
64
+
65
+
66
+ def runtime_relevance(ep: EntryPoint, classification: Classification | None = None) -> RuntimeRelevance:
67
+ classification = classification or classify_entry_point(ep)
68
+ if classification != "production":
69
+ return "low"
70
+ reason = (ep.reason or "").lower()
71
+ if ep.source == "package.json#bin" or reason == "bin" or reason in _PRODUCTION_SCRIPT_REASONS:
72
+ return "high"
73
+ if ep.source == "package.json" and reason in {"main", "module"}:
74
+ return "medium"
75
+ if ep.source == "convention" and ep.kind in {"binary", "application"}:
76
+ return "medium"
77
+ if ep.source in {"heuristic", "convention"} or ep.confidence == "low":
78
+ return "low"
79
+ return "medium"
80
+
81
+
82
+ def normalize_entry_point(ep: EntryPoint) -> EntryPoint:
83
+ classification = classify_entry_point(ep)
84
+ relevance = runtime_relevance(ep, classification)
85
+ legacy_type = ep.entrypoint_type
86
+ if classification == "auxiliary" and legacy_type == "production" and ep.source in {"heuristic", "convention"}:
87
+ legacy_type = None
88
+ if legacy_type is None:
89
+ if classification == "production":
90
+ legacy_type = "production"
91
+ elif classification == "development":
92
+ legacy_type = "development"
93
+ return replace(
94
+ ep,
95
+ classification=classification,
96
+ runtime_relevance=relevance,
97
+ entrypoint_type=legacy_type,
98
+ )
99
+
100
+
101
+ def is_production_entry_point(ep: EntryPoint) -> bool:
102
+ normalized = normalize_entry_point(ep)
103
+ return (
104
+ normalized.classification == "production"
105
+ and normalized.runtime_relevance in {"high", "medium"}
106
+ )
sourcecode/schema.py CHANGED
@@ -72,10 +72,12 @@ class EntryPoint:
72
72
  kind: str = "entry"
73
73
  source: str = "manifest"
74
74
  confidence: Literal["high", "medium", "low"] = "high"
75
- reason: Optional[str] = None # console_script | entry_file_pattern | main_guard | typer_app | heuristic | convention
76
- evidence: Optional[str] = None # brief evidence string
77
- entrypoint_type: Optional[Literal["production", "development", "benchmark", "example"]] = None
78
- produced_by: Optional[str] = None # which detector emitted this
75
+ reason: Optional[str] = None # console_script | entry_file_pattern | main_guard | typer_app | heuristic | convention
76
+ evidence: Optional[str] = None # brief evidence string
77
+ entrypoint_type: Optional[Literal["production", "development", "benchmark", "example"]] = None
78
+ classification: Optional[Literal["production", "development", "auxiliary"]] = None
79
+ runtime_relevance: Optional[Literal["high", "medium", "low"]] = None
80
+ produced_by: Optional[str] = None # which detector emitted this
79
81
 
80
82
 
81
83
  @dataclass
sourcecode/serializer.py CHANGED
@@ -13,9 +13,10 @@ import sys
13
13
  from dataclasses import asdict, dataclass, is_dataclass, replace
14
14
  from io import StringIO
15
15
  from pathlib import Path
16
- from typing import Any, Optional
17
-
18
- from sourcecode.schema import (
16
+ from typing import Any, Optional
17
+
18
+ from sourcecode.entrypoint_classifier import normalize_entry_point, is_production_entry_point
19
+ from sourcecode.schema import (
19
20
  ArchitectureAnalysis,
20
21
  ModuleGraph,
21
22
  ModuleGraphSummary,
@@ -34,7 +35,7 @@ def to_json(sm: SourceMap | dict[str, Any], indent: int = 2) -> str:
34
35
  return json.dumps(data, indent=indent, ensure_ascii=False)
35
36
 
36
37
 
37
- def to_yaml(sm: SourceMap) -> str:
38
+ def to_yaml(sm: SourceMap) -> str:
38
39
  """Serializa SourceMap a YAML usando ruamel.yaml.
39
40
 
40
41
  ruamel.yaml preserva el orden de claves y serializa None como null
@@ -50,11 +51,42 @@ def to_yaml(sm: SourceMap) -> str:
50
51
  lambda dumper, data: dumper.represent_scalar("tag:yaml.org,2002:null", "null"),
51
52
  )
52
53
  stream = StringIO()
53
- yaml.dump(asdict(sm), stream)
54
- return stream.getvalue()
55
-
56
-
57
- def compact_view(sm: SourceMap, *, no_tree: bool = False) -> dict[str, Any]:
54
+ yaml.dump(asdict(sm), stream)
55
+ return stream.getvalue()
56
+
57
+
58
+ def _clean_entry_point(ep: Any) -> dict[str, Any]:
59
+ normalized = normalize_entry_point(ep)
60
+ return {
61
+ k: v
62
+ for k, v in asdict(normalized).items()
63
+ if v is not None and v != "" and k != "workspace"
64
+ }
65
+
66
+
67
+ def _entry_point_groups(entry_points: list[Any]) -> dict[str, list[dict[str, Any]]]:
68
+ groups: dict[str, list[dict[str, Any]]] = {
69
+ "production": [],
70
+ "development": [],
71
+ "auxiliary": [],
72
+ }
73
+ for ep in entry_points:
74
+ normalized = normalize_entry_point(ep)
75
+ item = _clean_entry_point(normalized)
76
+ if is_production_entry_point(normalized):
77
+ groups["production"].append(item)
78
+ elif normalized.classification == "development":
79
+ groups["development"].append(item)
80
+ else:
81
+ groups["auxiliary"].append(item)
82
+
83
+ groups["production"].sort(key=lambda ep: (ep.get("runtime_relevance") != "high", ep.get("path", "")))
84
+ groups["development"].sort(key=lambda ep: ep.get("path", ""))
85
+ groups["auxiliary"].sort(key=lambda ep: ep.get("path", ""))
86
+ return groups
87
+
88
+
89
+ def compact_view(sm: SourceMap, *, no_tree: bool = False) -> dict[str, Any]:
58
90
  """Context package ready for prompt or handoff (~600-800 tokens).
59
91
 
60
92
  Answers: what it is, where it enters, what depends on what,
@@ -85,13 +117,12 @@ def compact_view(sm: SourceMap, *, no_tree: bool = False) -> dict[str, Any]:
85
117
  if sm.code_notes_summary is not None and sm.code_notes_summary.requested:
86
118
  code_notes_summary_dict = asdict(sm.code_notes_summary)
87
119
 
88
- # Entry points: strip None fields for compactness
89
- entry_points_compact = [
90
- {k: v for k, v in asdict(ep).items() if v is not None and v != ""}
91
- for ep in sm.entry_points
92
- ]
93
- if not entry_points_compact:
94
- entry_points_compact = None # type: ignore[assignment] # signal: not detected
120
+ # Entry points: production runtime only. Auxiliary and development entries
121
+ # are exposed separately so agents do not mix tooling with execution paths.
122
+ ep_groups = _entry_point_groups(sm.entry_points)
123
+ entry_points_compact = ep_groups["production"]
124
+ if not entry_points_compact:
125
+ entry_points_compact = [] # truth signal: no production runtime detected
95
126
 
96
127
  # Confidence summary
97
128
  conf_dict: Any = None
@@ -116,9 +147,11 @@ def compact_view(sm: SourceMap, *, no_tree: bool = False) -> dict[str, Any]:
116
147
  "project_summary": sm.project_summary,
117
148
  "architecture_summary": sm.architecture_summary,
118
149
  "context_summary": context_summary_dict,
119
- "stacks": [asdict(stack) for stack in sm.stacks],
120
- "entry_points": entry_points_compact,
121
- "dependency_summary": dep_summary_dict,
150
+ "stacks": [asdict(stack) for stack in sm.stacks],
151
+ "entry_points": entry_points_compact,
152
+ "development_entry_points": ep_groups["development"] or None,
153
+ "auxiliary_entry_points": ep_groups["auxiliary"] or None,
154
+ "dependency_summary": dep_summary_dict,
122
155
  "key_dependencies": key_deps,
123
156
  "env_summary": env_summary_dict,
124
157
  "code_notes_summary": code_notes_summary_dict,
@@ -163,10 +196,14 @@ def normalize_source_map(sm: SourceMap) -> SourceMap:
163
196
 
164
197
  # dependencies is already list[DependencyRecord] by default_factory, but
165
198
  # guard against any future refactor that could accidentally set it to None
166
- if sm.dependencies is None: # type: ignore[comparison-overlap]
167
- changes["dependencies"] = []
168
-
169
- return replace(sm, **changes) if changes else sm
199
+ if sm.dependencies is None: # type: ignore[comparison-overlap]
200
+ changes["dependencies"] = []
201
+
202
+ normalized_eps = [normalize_entry_point(ep) for ep in sm.entry_points]
203
+ if normalized_eps != sm.entry_points:
204
+ changes["entry_points"] = normalized_eps
205
+
206
+ return replace(sm, **changes) if changes else sm
170
207
 
171
208
 
172
209
  def validate_source_map(sm: SourceMap) -> None:
@@ -410,37 +447,13 @@ def agent_view(sm: SourceMap) -> dict[str, Any]:
410
447
 
411
448
  result: dict[str, Any] = {"project": project}
412
449
 
413
- # ── 2. Entry points: production/runtime first; benchmark/example always excluded ──
414
- # Never fall back to auxiliary-only EPs when no operational EP exists the
415
- # confidence_summary anomaly and analysis_gaps explain the gap instead.
416
- if sm.entry_points:
417
- _ep_skip = {"workspace"}
418
- _aux_parts = frozenset({
419
- "benchmark", "benchmarks", "bench", "demo", "demos",
420
- "example", "examples", "docs", "doc", "fixtures", "fixture",
421
- })
422
-
423
- def _ep_priority(ep_dict: dict[str, Any]) -> int:
424
- ep_type = ep_dict.get("entrypoint_type")
425
- if ep_type in ("benchmark", "example"):
426
- return 10
427
- path_parts = ep_dict.get("path", "").replace("\\", "/").lower().split("/")
428
- if any(p in _aux_parts for p in path_parts):
429
- return 5
430
- if ep_type == "development":
431
- return 3
432
- return 0
433
-
434
- all_ep = [
435
- {k: v for k, v in asdict(ep).items() if v is not None and v != "" and k not in _ep_skip}
436
- for ep in sm.entry_points
437
- ]
438
- all_ep.sort(key=_ep_priority)
439
- operational_ep = [ep for ep in all_ep if _ep_priority(ep) < 5]
440
- if operational_ep:
441
- result["entry_points"] = operational_ep
442
- # When operational_ep is empty: omit key entirely.
443
- # confidence_summary.anomalies + analysis_gaps carry the explanation.
450
+ # ── 2. Entry points: production/runtime only in the primary field ─────────
451
+ # Development and auxiliary entries are explicit side channels. A missing
452
+ # production runtime is represented as entry_points=[], never by fallback.
453
+ ep_groups = _entry_point_groups(sm.entry_points)
454
+ result["entry_points"] = ep_groups["production"]
455
+ result["development_entry_points"] = ep_groups["development"]
456
+ result["auxiliary_entry_points"] = ep_groups["auxiliary"]
444
457
 
445
458
  # ── 3. Architecture ───────────────────────────────────────────────────────
446
459
  if sm.architecture_summary:
@@ -550,7 +563,7 @@ def agent_view(sm: SourceMap) -> dict[str, Any]:
550
563
  return result
551
564
 
552
565
 
553
- def standard_view(sm: SourceMap, *, include_tree: bool = False) -> dict[str, Any]:
566
+ def standard_view(sm: SourceMap, *, include_tree: bool = False) -> dict[str, Any]:
554
567
  """Default output — three signal layers.
555
568
 
556
569
  Layer A (always):
@@ -568,14 +581,18 @@ def standard_view(sm: SourceMap, *, include_tree: bool = False) -> dict[str, Any
568
581
  Full dependencies list is never included — use key_dependencies instead.
569
582
  Empty unrequested analyzer fields are omitted entirely.
570
583
  """
571
- result: dict[str, Any] = {
572
- "metadata": asdict(sm.metadata),
573
- "project_type": sm.project_type,
574
- "project_summary": sm.project_summary,
575
- "architecture_summary": sm.architecture_summary,
576
- "stacks": [asdict(s) for s in sm.stacks],
577
- "entry_points": [asdict(ep) for ep in sm.entry_points],
578
- }
584
+ ep_groups = _entry_point_groups(sm.entry_points)
585
+
586
+ result: dict[str, Any] = {
587
+ "metadata": asdict(sm.metadata),
588
+ "project_type": sm.project_type,
589
+ "project_summary": sm.project_summary,
590
+ "architecture_summary": sm.architecture_summary,
591
+ "stacks": [asdict(s) for s in sm.stacks],
592
+ "entry_points": ep_groups["production"],
593
+ "development_entry_points": ep_groups["development"],
594
+ "auxiliary_entry_points": ep_groups["auxiliary"],
595
+ }
579
596
 
580
597
  # Layer B — signals (only when the corresponding analyzer ran)
581
598
  if sm.dependency_summary is not None and sm.dependency_summary.requested:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 0.29.0
3
+ Version: 0.30.0
4
4
  Summary: Deterministic codebase context for AI coding agents
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -1,14 +1,15 @@
1
- sourcecode/__init__.py,sha256=n13VPZwhm9Du2TeJhJzTRI5m_7dP6mgpjdq-5NmlVIw,100
1
+ sourcecode/__init__.py,sha256=MU2HxHzhdlDeES-MGTUNA1df0X4nB3GWAvjTRWUEoys,100
2
2
  sourcecode/architecture_analyzer.py,sha256=SBRMWJN70M2qeNLkm9oCG_1rw2UOVuNgikyeAHJsXKw,22859
3
- sourcecode/architecture_summary.py,sha256=qolHmn6MWUIQHzY9WeHcfN41EJkQdnPQ5F_Z8pqQasA,20251
4
- sourcecode/classifier.py,sha256=Ft_RfYS-KOe0t7vjgUx04OoCJd1-DXK7k9-I0CFDSnU,6934
5
- sourcecode/cli.py,sha256=4yXSF9UkQsMWU26ySO4-7Zs0B224AxbrdYGr2Urag1k,51120
3
+ sourcecode/architecture_summary.py,sha256=rSY5MRiaz4N1YdG0pqDTDuFjSN7PO_Zplx-dtNzv2Yo,19985
4
+ sourcecode/classifier.py,sha256=GKTMN8qKZX7ponSwDJfN08RrasI4CVpq1_gFBgEopps,7093
5
+ sourcecode/cli.py,sha256=LKtus6aETNZv70fkp5LrjTfvu5w9jsB4go-7MCoDnzg,50611
6
6
  sourcecode/code_notes_analyzer.py,sha256=rRd8bFYV0krjlxxQV0wenwE9K7pVpUQSR7KvSvUQKw4,9226
7
- sourcecode/confidence_analyzer.py,sha256=pzWeHTMz9ZYCuogYcTagpIJuLY-1SOSPTjsiE77rPek,11336
7
+ sourcecode/confidence_analyzer.py,sha256=B48lCuz_t_qsyjPQdLbKUj2kJ0Wu4Sq5ZnO18F_v3eU,12069
8
8
  sourcecode/context_summarizer.py,sha256=CiQrfBEzun949bWvmLabWoj2HhPn6Lw62ofqnsy0FlQ,6503
9
9
  sourcecode/coverage_parser.py,sha256=q0LeZJaX1bnntLu-ImksdBsMlpsVmk_iUfSaB4eaJGo,19702
10
10
  sourcecode/dependency_analyzer.py,sha256=Exq0BfInvfS5iAg9xAr6WI2uPNuotkIudTKcYJcRhB8,52757
11
11
  sourcecode/doc_analyzer.py,sha256=Ec3orx6vBKsh5cNM3-F4y2Got2KuKx8w3dErwtdtM-A,19891
12
+ sourcecode/entrypoint_classifier.py,sha256=a69dMGyxCTd_LOm3oqj-EXWpRmbmeujN7T1mr2eJ1as,3877
12
13
  sourcecode/env_analyzer.py,sha256=slvq-eT24RVMNczLNDlZbe0hU8JXIIPxybqubvrrnSQ,14409
13
14
  sourcecode/git_analyzer.py,sha256=saI5wtHBEOXBhdk7SrVR7ArSM6MFkyGgukvGRuD9WRc,9638
14
15
  sourcecode/graph_analyzer.py,sha256=hMOsLLz9B0UnQ4xwbHdgr3bFvqpw0bQ8kN-xmEn3Krk,64156
@@ -18,9 +19,9 @@ sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
18
19
  sourcecode/relevance_scorer.py,sha256=2yvxDFnz9YGrHEJubgx9soiVIDZHKv_pntOtTARtKow,5928
19
20
  sourcecode/runtime_classifier.py,sha256=zWX3r3HCKHc-qtIobErOa8aKMmaoPYREtJKvPcBGPjQ,14792
20
21
  sourcecode/scanner.py,sha256=aM3h9-DCQ3xKpeHpHYdo2vX6T5P95HA_YwZbkAVNwmo,8288
21
- sourcecode/schema.py,sha256=yCBhYKbF6axqMgl6F1XGk7xLl6uDJNZafwSSNMtxSa8,20222
22
+ sourcecode/schema.py,sha256=wylO5aKFBHBUAvMh4AH6hKKcN8p5yt6XRkyRvZRjV-4,20378
22
23
  sourcecode/semantic_analyzer.py,sha256=asQfJf-EhzYaOTA-iMuZsrVXtbW7SV2WEKCxgsxa88Y,79413
23
- sourcecode/serializer.py,sha256=ZTnMbVnLSdplLK-NOdqH6GSi4v6zLgxm5i69VR9vS2Q,27199
24
+ sourcecode/serializer.py,sha256=VksZokFUG3GLWz_eUtVqNdkddkeV-tBY2lzfa8ociAc,27898
24
25
  sourcecode/summarizer.py,sha256=YfBixsN1zWHHXdOEqaf793BylbJrsj75ST7FN6jcqRU,15424
25
26
  sourcecode/tree_utils.py,sha256=Fj9OIuUksBvgibNd3feog0sMDjVypJzPexp5lvMoYWI,1424
26
27
  sourcecode/workspace.py,sha256=fQlVoNx8S-fSHpKoJ0JBvEHCFkxszH0KZVJed1i3TRk,6845
@@ -35,7 +36,7 @@ sourcecode/detectors/heuristic.py,sha256=Hab_Uiuxtq-WBs_wCnzETBS5hhaxeEtf-GOGMH6
35
36
  sourcecode/detectors/hybrid.py,sha256=IGFRUVsAZ1ooRlFdznCeJAV6vy1yVDx-VyghvLtddXc,9101
36
37
  sourcecode/detectors/java.py,sha256=cZvB13cqJ76zHDncEG-TOCuK8gJjJN2mZGS2DGEcZy8,7715
37
38
  sourcecode/detectors/jvm_ext.py,sha256=EgHJ5W8EE-ZTN9V607mVzohyKgZE8Mc2jCi-DF8RAZU,2616
38
- sourcecode/detectors/nodejs.py,sha256=cqLw3SJ1UcImZD5_DVobByacWRD4ftIlep_mYo9z-n8,11260
39
+ sourcecode/detectors/nodejs.py,sha256=LN-m3bERpijlBMl1TNVOH_cJDhfDYRhn8K8lsNzztVc,12923
39
40
  sourcecode/detectors/parsers.py,sha256=ugPg8yNUf0Ai1gA7Fnn6wAkYGFjTxRodSP3IeViYJJ4,2290
40
41
  sourcecode/detectors/php.py,sha256=W_AQD0WMVDdWHa9h_ilX6W8XSpz0X4ctpMK2WXfXf1I,1887
41
42
  sourcecode/detectors/project.py,sha256=egFUnHC93xFfb-ikGCIOSkRdyP52qytDx9W7pGkX0MY,6525
@@ -51,8 +52,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
51
52
  sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
52
53
  sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
53
54
  sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
54
- sourcecode-0.29.0.dist-info/METADATA,sha256=FeC2-4TQTNZRw7iXfUDelfEv7Art515YgQkW4ZB68AA,25020
55
- sourcecode-0.29.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
56
- sourcecode-0.29.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
57
- sourcecode-0.29.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
58
- sourcecode-0.29.0.dist-info/RECORD,,
55
+ sourcecode-0.30.0.dist-info/METADATA,sha256=wjMQ_CyxnBDjQ6G_7PLE5crhTdh2sl6wd6Bkdy3t48o,25020
56
+ sourcecode-0.30.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
57
+ sourcecode-0.30.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
58
+ sourcecode-0.30.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
59
+ sourcecode-0.30.0.dist-info/RECORD,,