sourcecode 0.30.0__py3-none-any.whl → 0.31.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcecode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Genera mapas de contexto estructurado para agentes IA."""
2
2
 
3
- __version__ = "0.30.0"
3
+ __version__ = "0.31.0"
@@ -215,18 +215,22 @@ class ArchitectureAnalyzer:
215
215
  if pattern not in (None, "unknown", "flat"):
216
216
  if all_layers_weak:
217
217
  # Layers came from file-naming heuristic only, not directory structure
218
- confidence = "medium"
218
+ confidence = "low"
219
219
  limitations.append(
220
- "Patron inferido de nombres de archivo sin estructura de directorios confirmatoria"
220
+ "Low confidence inference: pattern inferred from filenames only, without import graph confirmation"
221
221
  )
222
222
  else:
223
- confidence = "high" if len(strong_domains) >= 3 else "medium"
223
+ confidence = "medium" if len(strong_domains) >= 3 else "low"
224
+ if graph is None:
225
+ limitations.append(
226
+ "Pattern not confirmed by module import graph; run with --graph-modules for structural validation"
227
+ )
224
228
  elif len(strong_domains) >= 1:
225
229
  confidence = "medium"
226
230
  else:
227
231
  confidence = "low"
228
232
 
229
- method = "graph+heuristic" if graph is not None else "heuristic"
233
+ method = "graph+structure" if graph is not None else "filesystem_inference"
230
234
 
231
235
  return ArchitectureAnalysis(
232
236
  requested=True,
@@ -339,7 +343,7 @@ class ArchitectureAnalyzer:
339
343
  best_matched = matched
340
344
 
341
345
  if best_score >= 2:
342
- layer_confidence: Literal["high", "medium", "low"] = "high" if best_score >= 3 else "medium"
346
+ layer_confidence: Literal["high", "medium", "low"] = "medium" if best_score >= 3 else "low"
343
347
  layers: list[ArchitectureLayer] = []
344
348
  for layer_key, matched_dirs in best_matched.items():
345
349
  matched_files = [
sourcecode/cli.py CHANGED
@@ -896,11 +896,13 @@ def main(
896
896
  if dependency_analyzer is not None:
897
897
  from sourcecode.dependency_analyzer import _ROLE_PRIORITY
898
898
 
899
- primary_ecosystem = sm.stacks[0].stack if sm.stacks else ""
900
- direct_deps = [
901
- d for d in sm.dependencies
902
- if d.scope != "transitive" and d.source in {"manifest", "lockfile"}
903
- ]
899
+ primary_ecosystem = sm.stacks[0].stack if sm.stacks else ""
900
+ direct_deps = [
901
+ d for d in sm.dependencies
902
+ if d.scope != "transitive" and d.source in {"manifest", "lockfile"}
903
+ and (d.role or "unknown") in {"runtime", "parsing", "serialization", "observability", "infra"}
904
+ and d.scope not in {"dev"}
905
+ ]
904
906
 
905
907
  def _dep_sort_key(d: Any) -> tuple[int, int, str]:
906
908
  role_order = _ROLE_PRIORITY.get(d.role or "runtime", 5)
@@ -135,7 +135,7 @@ class ConfidenceAnalyzer:
135
135
  if not normalized_entry_points:
136
136
  gaps.append(AnalysisGap(
137
137
  area="entry_points",
138
- reason="No entry point detected project may use non-standard structure or be a library",
138
+ reason="Critical: no runtime entrypoint detected; system cannot be executed without manual inference",
139
139
  impact="high",
140
140
  ))
141
141
  elif all(
@@ -145,16 +145,16 @@ class ConfidenceAnalyzer:
145
145
  gaps.append(AnalysisGap(
146
146
  area="entry_points",
147
147
  reason=(
148
- "All detected entry points are development or auxiliary "
149
- "no production entry point found. Verify project has a 'start'/'serve' "
150
- "script or production binary."
148
+ "Critical: no production runtime entrypoint detected; detected entries are "
149
+ "development or auxiliary only. Add/verify a start/serve script, CLI bin, "
150
+ "or server bootstrap before using this context for automation."
151
151
  ),
152
152
  impact="high",
153
153
  ))
154
154
  elif all(ep.confidence == "low" for ep in normalized_entry_points):
155
155
  gaps.append(AnalysisGap(
156
156
  area="entry_points",
157
- reason="Entry points inferred from code patterns only, no manifest declaration found",
157
+ reason="Entry points inferred from code patterns only; no manifest script, CLI bin, or server bootstrap declaration found",
158
158
  impact="medium",
159
159
  ))
160
160
 
@@ -58,7 +58,7 @@ class NodejsDetector(AbstractDetector):
58
58
 
59
59
  from sourcecode.detectors.hybrid import merge_framework_detections, scan_for_frameworks
60
60
 
61
- dependency_names = self._collect_dependency_names(package_json)
61
+ dependency_names = self._collect_dependency_names(package_json, runtime_only=True)
62
62
  seen_fw: set[str] = set()
63
63
  manifest_frameworks = []
64
64
  for pkg_name, label in _FRAMEWORK_MAP.items():
@@ -98,9 +98,17 @@ class NodejsDetector(AbstractDetector):
98
98
  signals.append("monorepo:npm-workspaces")
99
99
  return signals
100
100
 
101
- def _collect_dependency_names(self, package_json: dict[str, Any]) -> set[str]:
101
+ def _collect_dependency_names(
102
+ self,
103
+ package_json: dict[str, Any],
104
+ *,
105
+ runtime_only: bool = False,
106
+ ) -> set[str]:
102
107
  names: set[str] = set()
103
- for field in ("dependencies", "devDependencies", "peerDependencies", "optionalDependencies"):
108
+ fields = ("dependencies", "peerDependencies", "optionalDependencies")
109
+ if not runtime_only:
110
+ fields = fields + ("devDependencies",)
111
+ for field in fields:
104
112
  raw = package_json.get(field, {})
105
113
  if isinstance(raw, dict):
106
114
  names.update(str(name) for name in raw)
@@ -0,0 +1,215 @@
1
+ from __future__ import annotations
2
+
3
+ """Evidence-based file classification for agent context.
4
+
5
+ This module intentionally avoids assigning runtime/application roles from a
6
+ directory name alone. Runtime roles require execution evidence, imports,
7
+ definitions, or manifest/config evidence. Tests/tooling/build classifications
8
+ can be structural because their purpose is explicitly encoded by conventional
9
+ locations and config filenames.
10
+ """
11
+
12
+ import re
13
+ from dataclasses import dataclass, field
14
+ from pathlib import Path
15
+ from typing import Literal
16
+
17
+ from sourcecode.entrypoint_classifier import is_production_entry_point, normalize_entry_point
18
+ from sourcecode.schema import EntryPoint, MonorepoPackageInfo
19
+
20
+ FileCategory = Literal[
21
+ "runtime_core",
22
+ "application_logic",
23
+ "domain_model",
24
+ "infrastructure",
25
+ "database_layer",
26
+ "api_layer",
27
+ "cli_entrypoint",
28
+ "tests",
29
+ "tooling",
30
+ "build_system",
31
+ ]
32
+
33
+
34
+ @dataclass
35
+ class FileClassification:
36
+ path: str
37
+ category: FileCategory
38
+ confidence: Literal["high", "medium", "low"]
39
+ relevance: float
40
+ reason: str
41
+ evidence: list[str] = field(default_factory=list)
42
+
43
+
44
+ _CODE_EXTENSIONS = {
45
+ ".py", ".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs",
46
+ ".go", ".rs", ".java", ".kt", ".scala", ".rb", ".php", ".cs",
47
+ }
48
+ _TEST_DIRS = {"test", "tests", "__tests__", "spec", "specs", "e2e"}
49
+ _TOOLING_DIRS = {"scripts", "script", "tools", "tool", "tooling", "ci", ".github", ".vscode"}
50
+ _BUILD_FILES = {
51
+ "package.json", "pyproject.toml", "go.mod", "Cargo.toml", "pom.xml",
52
+ "build.gradle", "settings.gradle", "Makefile", "Dockerfile",
53
+ "tsconfig.json", "vite.config.ts", "vite.config.js", "webpack.config.js",
54
+ "rollup.config.js", "turbo.json", "nx.json", "pnpm-workspace.yaml",
55
+ }
56
+ _TOOLING_FILES = {
57
+ ".eslintrc", ".prettierrc", "eslint.config.js", "eslint.config.ts",
58
+ "prettier.config.js", "jest.config.js", "jest.config.ts",
59
+ "vitest.config.ts", "vitest.config.js", ".editorconfig",
60
+ }
61
+ _API_IMPORTS = {
62
+ "fastapi", "flask", "django", "express", "koa", "fastify", "hono",
63
+ "@nestjs/core", "@apollo/server", "graphql", "springframework",
64
+ }
65
+ _DB_IMPORTS = {
66
+ "sqlalchemy", "psycopg2", "asyncpg", "pymongo", "mongoose", "prisma",
67
+ "@prisma/client", "typeorm", "sequelize", "pg", "mysql2", "redis",
68
+ }
69
+ _INFRA_IMPORTS = {
70
+ "boto3", "botocore", "kubernetes", "celery", "dramatiq", "bullmq",
71
+ "kafkajs", "amqplib", "firebase-admin", "@aws-sdk/",
72
+ }
73
+
74
+ _IMPORT_RE = re.compile(
75
+ r"(?:from\s+([A-Za-z0-9_@./-]+)\s+import|import\s+([A-Za-z0-9_@./-]+)|"
76
+ r"require\(['\"]([^'\"]+)['\"]\)|from\s+['\"]([^'\"]+)['\"])",
77
+ re.MULTILINE,
78
+ )
79
+ _DEF_RE = re.compile(r"\b(class|def|function|const|export\s+class|interface|type)\s+[A-Za-z_]", re.MULTILINE)
80
+
81
+
82
+ class FileClassifier:
83
+ def __init__(
84
+ self,
85
+ root: Path,
86
+ entry_points: list[EntryPoint],
87
+ monorepo_packages: list[MonorepoPackageInfo] | None = None,
88
+ ) -> None:
89
+ self.root = root
90
+ self.entry_points = [normalize_entry_point(ep) for ep in entry_points]
91
+ self.production_entry_paths = {
92
+ ep.path for ep in self.entry_points if is_production_entry_point(ep)
93
+ }
94
+ self.cli_entry_paths = {
95
+ ep.path for ep in self.entry_points
96
+ if is_production_entry_point(ep) and ep.kind == "cli"
97
+ }
98
+ self._pkg_roles = {
99
+ pkg.path.rstrip("/") + "/": pkg.architectural_role
100
+ for pkg in (monorepo_packages or [])
101
+ }
102
+
103
+ def classify_paths(self, paths: list[str], *, limit: int = 20) -> list[FileClassification]:
104
+ classified: list[FileClassification] = []
105
+ for path in paths:
106
+ item = self.classify(path)
107
+ if item is not None:
108
+ classified.append(item)
109
+ classified.sort(key=lambda item: (-item.relevance, item.path))
110
+ return classified[:limit]
111
+
112
+ def classify(self, path: str) -> FileClassification | None:
113
+ norm = path.replace("\\", "/").lstrip("/")
114
+ parts = norm.split("/")
115
+ filename = Path(norm).name
116
+ suffix = Path(norm).suffix.lower()
117
+
118
+ if any(part.lower() in _TEST_DIRS for part in parts[:-1]) or self._is_test_file(norm):
119
+ return FileClassification(norm, "tests", "high", 0.35, "test file by path/suffix convention", [norm])
120
+
121
+ if filename in _BUILD_FILES:
122
+ return FileClassification(norm, "build_system", "high", 0.45, "build or package manifest", [filename])
123
+
124
+ if filename in _TOOLING_FILES or any(part.lower() in _TOOLING_DIRS for part in parts[:-1]):
125
+ return FileClassification(norm, "tooling", "high", 0.25, "tooling/config path", [norm])
126
+
127
+ if suffix not in _CODE_EXTENSIONS:
128
+ return None
129
+
130
+ content = self._read(norm)
131
+ imports = self._imports(content)
132
+ has_defs = bool(_DEF_RE.search(content))
133
+ evidence: list[str] = []
134
+
135
+ if norm in self.cli_entry_paths:
136
+ return FileClassification(norm, "cli_entrypoint", "high", 1.0, "declared production CLI entrypoint", ["entry_points"])
137
+
138
+ if norm in self.production_entry_paths:
139
+ return FileClassification(norm, "runtime_core", "high", 0.95, "declared production runtime entrypoint", ["entry_points"])
140
+
141
+ if self._has_any_import(imports, _API_IMPORTS):
142
+ evidence = self._matched_imports(imports, _API_IMPORTS)
143
+ return FileClassification(norm, "api_layer", "high", 0.82, "imports API/server framework", evidence)
144
+
145
+ if self._has_any_import(imports, _DB_IMPORTS):
146
+ evidence = self._matched_imports(imports, _DB_IMPORTS)
147
+ return FileClassification(norm, "database_layer", "high", 0.78, "imports database/persistence dependency", evidence)
148
+
149
+ if self._has_any_import(imports, _INFRA_IMPORTS):
150
+ evidence = self._matched_imports(imports, _INFRA_IMPORTS)
151
+ return FileClassification(norm, "infrastructure", "high", 0.72, "imports infrastructure dependency", evidence)
152
+
153
+ role = self._package_role(norm)
154
+ if role in {"runtime_core", "backend_runtime", "frontend_runtime", "plugin_host"} and has_defs:
155
+ return FileClassification(norm, "application_logic", "medium", 0.65, "code definitions inside runtime package", [f"workspace_role:{role}"])
156
+
157
+ if self._looks_like_domain_model(norm, content, has_defs):
158
+ return FileClassification(norm, "domain_model", "medium", 0.58, "model/entity definitions detected", ["class/type definition"])
159
+
160
+ if has_defs and imports:
161
+ return FileClassification(norm, "application_logic", "medium", 0.52, "code definitions with imports", self._sample(imports))
162
+
163
+ return None
164
+
165
+ def _read(self, path: str) -> str:
166
+ try:
167
+ return (self.root / path).read_text(encoding="utf-8", errors="replace")[:12000]
168
+ except OSError:
169
+ return ""
170
+
171
+ def _imports(self, content: str) -> list[str]:
172
+ imports: list[str] = []
173
+ for match in _IMPORT_RE.findall(content):
174
+ value = next((part for part in match if part), "")
175
+ if value:
176
+ imports.append(value)
177
+ return imports
178
+
179
+ def _has_any_import(self, imports: list[str], needles: set[str]) -> bool:
180
+ return bool(self._matched_imports(imports, needles))
181
+
182
+ def _matched_imports(self, imports: list[str], needles: set[str]) -> list[str]:
183
+ matched: list[str] = []
184
+ for imp in imports:
185
+ low = imp.lower()
186
+ if any(low == n or low.startswith(n + "/") or low.startswith(n + ".") for n in needles):
187
+ matched.append(f"import:{imp}")
188
+ return matched[:4]
189
+
190
+ def _package_role(self, path: str) -> str:
191
+ for prefix, role in self._pkg_roles.items():
192
+ if path.startswith(prefix):
193
+ return role
194
+ return ""
195
+
196
+ def _is_test_file(self, path: str) -> bool:
197
+ name = Path(path).name.lower()
198
+ return (
199
+ name.startswith("test_")
200
+ or ".test." in name
201
+ or ".spec." in name
202
+ or name.endswith("_test.py")
203
+ )
204
+
205
+ def _looks_like_domain_model(self, path: str, content: str, has_defs: bool) -> bool:
206
+ if not has_defs:
207
+ return False
208
+ parts = {part.lower() for part in path.split("/")[:-1]}
209
+ if parts & {"domain", "models", "model", "entities", "entity"}:
210
+ return True
211
+ return "@dataclass" in content or "pydantic" in content.lower()
212
+
213
+ def _sample(self, imports: list[str]) -> list[str]:
214
+ return [f"import:{imp}" for imp in imports[:4]]
215
+
@@ -410,6 +410,8 @@ class TaskContextBuilder:
410
410
  direct = [
411
411
  d for d in dep_records
412
412
  if d.scope != "transitive" and d.source in {"manifest", "lockfile"}
413
+ and (d.role or "unknown") in {"runtime", "parsing", "serialization", "observability", "infra"}
414
+ and d.scope not in {"dev"}
413
415
  ]
414
416
  direct.sort(key=lambda d: (0 if d.ecosystem == primary_eco else 1, d.name.lower()))
415
417
  key_dependencies = [asdict(d) for d in direct[:15]]
@@ -626,7 +628,12 @@ class TaskContextBuilder:
626
628
  uncommitted_files: Optional[set[str]] = None,
627
629
  ) -> list[RelevantFile]:
628
630
  from sourcecode.relevance_scorer import RelevanceScorer
631
+ from sourcecode.file_classifier import FileClassifier
629
632
  scorer = RelevanceScorer(monorepo_packages or [])
633
+ file_classifier = FileClassifier(self.root, [
634
+ # _rank_files only needs production path evidence; EntryPoint objects
635
+ # are not available here, so category evidence is best-effort below.
636
+ ], monorepo_packages or [])
630
637
 
631
638
  # Auxiliary entry points (benchmark, docs, examples) must not get
632
639
  # the production entry boost — they are not runtime signals.
@@ -660,12 +667,10 @@ class TaskContextBuilder:
660
667
  score += 3.0
661
668
  reasons.append("entry point")
662
669
 
663
- path_lower = path.lower()
664
- for keyword in spec.ranking_boosts:
665
- if keyword in path_lower:
666
- score += 1.5
667
- reasons.append(f"matches '{keyword}'")
668
- break
670
+ file_class = file_classifier.classify(path)
671
+ if file_class is not None:
672
+ score += file_class.relevance * 2.0
673
+ reasons.append(f"{file_class.category}: {file_class.reason}")
669
674
 
670
675
  if is_test:
671
676
  score += 2.0
@@ -673,7 +678,7 @@ class TaskContextBuilder:
673
678
  elif self._is_source(path):
674
679
  score += 0.5
675
680
  if not reasons:
676
- reasons.append("source file")
681
+ reasons.append("source file with supported extension")
677
682
 
678
683
  # Operational relevance boost/penalty from package role
679
684
  rel = scorer.score(path)
sourcecode/serializer.py CHANGED
@@ -16,6 +16,7 @@ from pathlib import Path
16
16
  from typing import Any, Optional
17
17
 
18
18
  from sourcecode.entrypoint_classifier import normalize_entry_point, is_production_entry_point
19
+ from sourcecode.file_classifier import FileClassifier
19
20
  from sourcecode.schema import (
20
21
  ArchitectureAnalysis,
21
22
  ModuleGraph,
@@ -86,6 +87,146 @@ def _entry_point_groups(entry_points: list[Any]) -> dict[str, list[dict[str, Any
86
87
  return groups
87
88
 
88
89
 
90
+ _PRODUCTION_DEP_ROLES = {"runtime", "parsing", "serialization", "observability", "infra"}
91
+ _DEV_DEP_ROLES = {"devtool"}
92
+ _TEST_DEP_ROLES = {"testtool"}
93
+ _BUILD_DEP_ROLES = {"buildtool"}
94
+
95
+
96
+ def _dependency_groups(sm: SourceMap) -> dict[str, list[dict[str, Any]]]:
97
+ groups: dict[str, list[dict[str, Any]]] = {
98
+ "production_dependencies": [],
99
+ "dev_tools": [],
100
+ "test_utilities": [],
101
+ "build_tooling": [],
102
+ "noise_dependencies": [],
103
+ "suspicious_dependencies": [],
104
+ }
105
+ if sm.dependency_summary is None or not sm.dependency_summary.requested:
106
+ return groups
107
+
108
+ root = Path(sm.metadata.analyzed_path) if sm.metadata.analyzed_path else Path(".")
109
+ import_index = _dependency_import_index(root, sm.file_paths)
110
+
111
+ for dep in sm.dependency_summary.dependencies:
112
+ if dep.scope == "transitive":
113
+ continue
114
+ item = {
115
+ k: v for k, v in asdict(dep).items()
116
+ if v is not None and k not in {"parent"}
117
+ }
118
+ role = dep.role or "unknown"
119
+ scope = dep.scope
120
+ name_key = _dep_import_key(dep.name)
121
+
122
+ if role in _PRODUCTION_DEP_ROLES and scope not in {"dev"}:
123
+ groups["production_dependencies"].append(item)
124
+ if dep.source == "manifest" and name_key not in import_index:
125
+ suspect = dict(item)
126
+ suspect["reason"] = "declared as production dependency but no static import observed"
127
+ groups["suspicious_dependencies"].append(suspect)
128
+ elif role in _TEST_DEP_ROLES:
129
+ groups["test_utilities"].append(item)
130
+ elif role in _BUILD_DEP_ROLES:
131
+ groups["build_tooling"].append(item)
132
+ elif role in _DEV_DEP_ROLES or scope in {"dev", "optional"}:
133
+ groups["dev_tools"].append(item)
134
+ else:
135
+ groups["noise_dependencies"].append(item)
136
+
137
+ for values in groups.values():
138
+ values.sort(key=lambda d: (d.get("ecosystem", ""), d.get("name", "")))
139
+ return groups
140
+
141
+
142
+ def _dependency_import_index(root: Path, file_paths: list[str]) -> set[str]:
143
+ import re
144
+
145
+ index: set[str] = set()
146
+ import_re = re.compile(
147
+ r"(?:from\s+([A-Za-z0-9_@./-]+)\s+import|import\s+([A-Za-z0-9_@./-]+)|"
148
+ r"require\(['\"]([^'\"]+)['\"]\)|from\s+['\"]([^'\"]+)['\"])",
149
+ re.MULTILINE,
150
+ )
151
+ for path in file_paths[:2000]:
152
+ if Path(path).suffix.lower() not in {".py", ".js", ".ts", ".tsx", ".jsx", ".mjs", ".cjs"}:
153
+ continue
154
+ try:
155
+ content = (root / path).read_text(encoding="utf-8", errors="replace")[:20000]
156
+ except OSError:
157
+ continue
158
+ for match in import_re.findall(content):
159
+ raw = next((part for part in match if part), "")
160
+ if raw and not raw.startswith("."):
161
+ index.add(_dep_import_key(raw))
162
+ return index
163
+
164
+
165
+ def _dep_import_key(name: str) -> str:
166
+ lowered = name.lower()
167
+ if lowered.startswith("@"):
168
+ parts = lowered.split("/")
169
+ return "/".join(parts[:2])
170
+ return lowered.split("/")[0].replace("_", "-")
171
+
172
+
173
+ def _file_relevance(sm: SourceMap, *, limit: int = 15) -> list[dict[str, Any]]:
174
+ root = Path(sm.metadata.analyzed_path) if sm.metadata.analyzed_path else Path(".")
175
+ classifier = FileClassifier(root, sm.entry_points, sm.monorepo_packages)
176
+ items = classifier.classify_paths(sm.file_paths, limit=limit)
177
+ return [asdict(item) for item in items]
178
+
179
+
180
+ def _architecture_context(sm: SourceMap) -> dict[str, Any]:
181
+ arch = sm.architecture
182
+ if arch is not None and arch.requested:
183
+ pattern = arch.pattern if arch.pattern not in (None, "unknown", "flat") else "no confirmed architecture pattern; inferred partial layering"
184
+ return {
185
+ "summary": sm.architecture_summary,
186
+ "pattern": pattern,
187
+ "confidence": arch.confidence,
188
+ "method": arch.method,
189
+ "layers": [
190
+ {
191
+ "name": layer.name,
192
+ "confidence": layer.confidence,
193
+ "file_count": len(layer.files),
194
+ }
195
+ for layer in arch.layers
196
+ ],
197
+ "limitations": arch.limitations,
198
+ }
199
+ return {
200
+ "summary": sm.architecture_summary,
201
+ "pattern": "no confirmed architecture pattern; inferred partial layering",
202
+ "confidence": "low",
203
+ "method": "not_requested",
204
+ "limitations": [
205
+ "architecture analyzer not requested; summary limited to stack, filesystem and entrypoint evidence"
206
+ ],
207
+ }
208
+
209
+
210
+ def _section_confidence(sm: SourceMap) -> dict[str, str]:
211
+ cs = sm.confidence_summary
212
+ dep_conf = "low"
213
+ if sm.dependency_summary is not None and sm.dependency_summary.requested:
214
+ dep_conf = "medium"
215
+ if sm.dependency_summary.sources and sm.dependency_summary.total_count > 0:
216
+ dep_conf = "high"
217
+ arch_conf = "low"
218
+ if sm.architecture is not None and sm.architecture.requested:
219
+ arch_conf = sm.architecture.confidence
220
+ file_conf = "medium" if sm.file_paths else "low"
221
+ return {
222
+ "stack": cs.stack_confidence if cs else "low",
223
+ "entrypoints": cs.entry_point_confidence if cs else "low",
224
+ "dependencies": dep_conf,
225
+ "architecture": arch_conf,
226
+ "file_relevance": file_conf,
227
+ }
228
+
229
+
89
230
  def compact_view(sm: SourceMap, *, no_tree: bool = False) -> dict[str, Any]:
90
231
  """Context package ready for prompt or handoff (~600-800 tokens).
91
232
 
@@ -102,10 +243,13 @@ def compact_view(sm: SourceMap, *, no_tree: bool = False) -> dict[str, Any]:
102
243
  """
103
244
  dep_summary_dict: Any = None
104
245
  key_deps: Any = None
105
- if sm.dependency_summary is not None and sm.dependency_summary.requested:
106
- dep_summary_dict = asdict(sm.dependency_summary)
107
- dep_summary_dict.pop("dependencies", None)
108
- key_deps = [asdict(d) for d in sm.key_dependencies]
246
+ if sm.dependency_summary is not None and sm.dependency_summary.requested:
247
+ dep_summary_dict = asdict(sm.dependency_summary)
248
+ dep_summary_dict.pop("dependencies", None)
249
+ key_deps = [
250
+ asdict(d) for d in sm.key_dependencies
251
+ if (d.role or "unknown") in _PRODUCTION_DEP_ROLES and d.scope not in {"dev"}
252
+ ]
109
253
  elif sm.dependency_summary is None or not sm.dependency_summary.requested:
110
254
  dep_summary_dict = None # "not analyzed" — agent should add --dependencies
111
255
 
@@ -455,9 +599,13 @@ def agent_view(sm: SourceMap) -> dict[str, Any]:
455
599
  result["development_entry_points"] = ep_groups["development"]
456
600
  result["auxiliary_entry_points"] = ep_groups["auxiliary"]
457
601
 
458
- # ── 3. Architecture ───────────────────────────────────────────────────────
459
- if sm.architecture_summary:
460
- result["architecture"] = sm.architecture_summary
602
+ # ── 3. Architecture ───────────────────────────────────────────────────────
603
+ result["architecture"] = _architecture_context(sm)
604
+
605
+ # ── 3a. File relevance: evidence-backed categories, not keyword matches ──
606
+ relevant_files = _file_relevance(sm)
607
+ if relevant_files:
608
+ result["file_relevance"] = relevant_files
461
609
 
462
610
  # ── 3b. Monorepo package roles (when available) ───────────────────────────
463
611
  if sm.monorepo_packages:
@@ -470,13 +618,25 @@ def agent_view(sm: SourceMap) -> dict[str, Any]:
470
618
  if operational_pkgs:
471
619
  result["runtime_packages"] = operational_pkgs
472
620
 
473
- # ── 4. Key dependencies (role-sorted, already computed) ───────────────────
474
- if sm.dependency_summary and sm.dependency_summary.requested and sm.key_dependencies:
475
- _dep_skip = {"parent", "manifest_path", "workspace", "source", "ecosystem"}
476
- result["key_dependencies"] = [
477
- {k: v for k, v in asdict(d).items() if v is not None and k not in _dep_skip}
478
- for d in sm.key_dependencies
479
- ]
621
+ # ── 4. Dependencies: separated by operational role ───────────────────────
622
+ dep_groups = _dependency_groups(sm)
623
+ if dep_groups["production_dependencies"]:
624
+ result["production_dependencies"] = dep_groups["production_dependencies"][:15]
625
+ for dep_key in ("dev_tools", "test_utilities", "build_tooling", "noise_dependencies", "suspicious_dependencies"):
626
+ if dep_groups[dep_key]:
627
+ result[dep_key] = dep_groups[dep_key][:15]
628
+
629
+ # Backward-compatible compact list, now production-only.
630
+ production_key_deps = [
631
+ d for d in sm.key_dependencies
632
+ if (d.role or "unknown") in _PRODUCTION_DEP_ROLES and d.scope not in {"dev"}
633
+ ]
634
+ if sm.dependency_summary and sm.dependency_summary.requested and production_key_deps:
635
+ _dep_skip = {"parent", "manifest_path", "workspace", "source", "ecosystem"}
636
+ result["key_dependencies"] = [
637
+ {k: v for k, v in asdict(d).items() if v is not None and k not in _dep_skip}
638
+ for d in production_key_deps[:15]
639
+ ]
480
640
 
481
641
  # ── 5. Signals — compact operational context ─────────────────────────────
482
642
  signals: dict[str, Any] = {}
@@ -509,11 +669,12 @@ def agent_view(sm: SourceMap) -> dict[str, Any]:
509
669
  # ── 6. Confidence summary ─────────────────────────────────────────────────
510
670
  if sm.confidence_summary is not None:
511
671
  cs = sm.confidence_summary
512
- conf: dict[str, Any] = {
513
- "overall": cs.overall,
514
- "stack": cs.stack_confidence,
515
- "entry_points": cs.entry_point_confidence,
516
- }
672
+ conf: dict[str, Any] = {
673
+ "overall": cs.overall,
674
+ "stack": cs.stack_confidence,
675
+ "entry_points": cs.entry_point_confidence,
676
+ "sections": _section_confidence(sm),
677
+ }
517
678
  if cs.hard_signals:
518
679
  conf["hard_signals"] = cs.hard_signals
519
680
  if cs.soft_signals:
@@ -596,10 +757,13 @@ def standard_view(sm: SourceMap, *, include_tree: bool = False) -> dict[str, Any
596
757
 
597
758
  # Layer B — signals (only when the corresponding analyzer ran)
598
759
  if sm.dependency_summary is not None and sm.dependency_summary.requested:
599
- dep_dict = asdict(sm.dependency_summary)
600
- dep_dict.pop("dependencies", None) # avoid duplication with key_dependencies
601
- result["dependency_summary"] = dep_dict
602
- result["key_dependencies"] = [asdict(d) for d in sm.key_dependencies]
760
+ dep_dict = asdict(sm.dependency_summary)
761
+ dep_dict.pop("dependencies", None) # avoid duplication with key_dependencies
762
+ result["dependency_summary"] = dep_dict
763
+ result["key_dependencies"] = [
764
+ asdict(d) for d in sm.key_dependencies
765
+ if (d.role or "unknown") in _PRODUCTION_DEP_ROLES and d.scope not in {"dev"}
766
+ ]
603
767
 
604
768
  if sm.env_summary is not None and sm.env_summary.requested:
605
769
  result["env_summary"] = asdict(sm.env_summary)
sourcecode/summarizer.py CHANGED
@@ -9,6 +9,7 @@ from pathlib import Path
9
9
  from typing import Any
10
10
 
11
11
  from sourcecode.detectors.parsers import load_json_file, load_toml_file
12
+ from sourcecode.entrypoint_classifier import is_production_entry_point
12
13
  from sourcecode.schema import MonorepoPackageInfo, SourceMap
13
14
 
14
15
  _TOOLING_PREFIXES = (".claude/", ".vscode/", "bin/")
@@ -31,6 +32,8 @@ _ARCH_LAYER_NAMES = {
31
32
  "schemas", "types",
32
33
  "migrations", "seeds",
33
34
  "scripts", "tools",
35
+ "docs", "doc", "documentation", "examples", "example", "benchmarks",
36
+ "benchmark", "playground", "playgrounds", "fixtures", "fixture",
34
37
  }
35
38
 
36
39
  _CODE_EXTENSIONS = {
@@ -108,7 +111,6 @@ class ProjectSummarizer:
108
111
  frameworks = [f.name for f in primary.frameworks]
109
112
  fw_part = f" ({', '.join(frameworks[:3])})" if frameworks else ""
110
113
 
111
- arch_pattern = self._detect_architecture_pattern(sm.file_paths)
112
114
  domains = self._extract_business_domains(sm.file_paths)
113
115
  dep_part = self._build_dep_part(sm)
114
116
 
@@ -122,13 +124,16 @@ class ProjectSummarizer:
122
124
  domains_part = f" Dominios: {', '.join(domains)}." if domains else ""
123
125
  return f"Monorepo{ws_part} en {stacks_desc}.{domains_part}{dep_part}"
124
126
 
125
- arch_suffix = f" con arquitectura {arch_pattern}" if arch_pattern else ""
127
+ arch_suffix = ""
126
128
  base = f"{type_label} en {stack_name}{fw_part}{arch_suffix}."
127
129
 
128
130
  if domains:
129
131
  extra = f" Dominios: {', '.join(domains)}."
130
132
  else:
131
- ep_paths = [ep.path for ep in sm.entry_points if not self._is_tooling_path(ep.path)][:3]
133
+ ep_paths = [
134
+ ep.path for ep in sm.entry_points
135
+ if not self._is_tooling_path(ep.path) and is_production_entry_point(ep)
136
+ ][:3]
132
137
  extra = f" Entry points: {', '.join(ep_paths)}." if ep_paths else ""
133
138
 
134
139
  return f"{base}{extra}{dep_part}"
@@ -210,12 +215,10 @@ class ProjectSummarizer:
210
215
  if non_tooling_stacks:
211
216
  primary = self._select_summary_primary_stack(non_tooling_stacks)
212
217
  frameworks = [fw.name for fw in primary.frameworks[:2]]
213
- arch_pattern = self._detect_architecture_pattern(sm.file_paths)
214
- arch_str = f" con arquitectura {arch_pattern}" if arch_pattern else ""
215
218
  if frameworks:
216
- parts.append(f"Stack: {primary.stack.capitalize()} ({', '.join(frameworks)}){arch_str}")
219
+ parts.append(f"Stack: {primary.stack.capitalize()} ({', '.join(frameworks)})")
217
220
  else:
218
- parts.append(f"Stack: {primary.stack.capitalize()}{arch_str}")
221
+ parts.append(f"Stack: {primary.stack.capitalize()}")
219
222
 
220
223
  # Business domains only — skip entry_points (too technical for product summary)
221
224
  domains = self._extract_business_domains(sm.file_paths)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 0.30.0
3
+ Version: 0.31.0
4
4
  Summary: Deterministic codebase context for AI coding agents
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -1,28 +1,29 @@
1
- sourcecode/__init__.py,sha256=MU2HxHzhdlDeES-MGTUNA1df0X4nB3GWAvjTRWUEoys,100
2
- sourcecode/architecture_analyzer.py,sha256=SBRMWJN70M2qeNLkm9oCG_1rw2UOVuNgikyeAHJsXKw,22859
1
+ sourcecode/__init__.py,sha256=lB4qjieACxD90qahkCtPTDiGAgKIQbJhcHbLxOgO4lc,100
2
+ sourcecode/architecture_analyzer.py,sha256=H6noGgVArUJ25z1qC0fFA0KvJJeHZYyhKvKSkOyWHUk,23096
3
3
  sourcecode/architecture_summary.py,sha256=rSY5MRiaz4N1YdG0pqDTDuFjSN7PO_Zplx-dtNzv2Yo,19985
4
4
  sourcecode/classifier.py,sha256=GKTMN8qKZX7ponSwDJfN08RrasI4CVpq1_gFBgEopps,7093
5
- sourcecode/cli.py,sha256=LKtus6aETNZv70fkp5LrjTfvu5w9jsB4go-7MCoDnzg,50611
5
+ sourcecode/cli.py,sha256=weX1vbYuzcSJ8Ny-6HWXevB9ZvNbu-8qrdh6Sxgl9JQ,50752
6
6
  sourcecode/code_notes_analyzer.py,sha256=rRd8bFYV0krjlxxQV0wenwE9K7pVpUQSR7KvSvUQKw4,9226
7
- sourcecode/confidence_analyzer.py,sha256=B48lCuz_t_qsyjPQdLbKUj2kJ0Wu4Sq5ZnO18F_v3eU,12069
7
+ sourcecode/confidence_analyzer.py,sha256=HxJMPLI5ulqtkncnv98W4iVO6yMbpQo87VuxiuNbDmY,12167
8
8
  sourcecode/context_summarizer.py,sha256=CiQrfBEzun949bWvmLabWoj2HhPn6Lw62ofqnsy0FlQ,6503
9
9
  sourcecode/coverage_parser.py,sha256=q0LeZJaX1bnntLu-ImksdBsMlpsVmk_iUfSaB4eaJGo,19702
10
10
  sourcecode/dependency_analyzer.py,sha256=Exq0BfInvfS5iAg9xAr6WI2uPNuotkIudTKcYJcRhB8,52757
11
11
  sourcecode/doc_analyzer.py,sha256=Ec3orx6vBKsh5cNM3-F4y2Got2KuKx8w3dErwtdtM-A,19891
12
12
  sourcecode/entrypoint_classifier.py,sha256=a69dMGyxCTd_LOm3oqj-EXWpRmbmeujN7T1mr2eJ1as,3877
13
13
  sourcecode/env_analyzer.py,sha256=slvq-eT24RVMNczLNDlZbe0hU8JXIIPxybqubvrrnSQ,14409
14
+ sourcecode/file_classifier.py,sha256=_KfFIIolharaIxbSTrCkaWauQIqNHCyor_n47RGyDh8,8577
14
15
  sourcecode/git_analyzer.py,sha256=saI5wtHBEOXBhdk7SrVR7ArSM6MFkyGgukvGRuD9WRc,9638
15
16
  sourcecode/graph_analyzer.py,sha256=hMOsLLz9B0UnQ4xwbHdgr3bFvqpw0bQ8kN-xmEn3Krk,64156
16
17
  sourcecode/metrics_analyzer.py,sha256=4uh11v-Q0gdrN87BOxuFWUym3N3AOkOuy21K5N8peB8,20126
17
- sourcecode/prepare_context.py,sha256=--lD2dhNkBYI8kwb14d1DlFmEN8XF1Ygtf0Qk7-Y1Bs,30911
18
+ sourcecode/prepare_context.py,sha256=vxEzr8czS3MFbdTx4hBJQlJLrl9cuvbHdL3ZokxFkvo,31384
18
19
  sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
19
20
  sourcecode/relevance_scorer.py,sha256=2yvxDFnz9YGrHEJubgx9soiVIDZHKv_pntOtTARtKow,5928
20
21
  sourcecode/runtime_classifier.py,sha256=zWX3r3HCKHc-qtIobErOa8aKMmaoPYREtJKvPcBGPjQ,14792
21
22
  sourcecode/scanner.py,sha256=aM3h9-DCQ3xKpeHpHYdo2vX6T5P95HA_YwZbkAVNwmo,8288
22
23
  sourcecode/schema.py,sha256=wylO5aKFBHBUAvMh4AH6hKKcN8p5yt6XRkyRvZRjV-4,20378
23
24
  sourcecode/semantic_analyzer.py,sha256=asQfJf-EhzYaOTA-iMuZsrVXtbW7SV2WEKCxgsxa88Y,79413
24
- sourcecode/serializer.py,sha256=VksZokFUG3GLWz_eUtVqNdkddkeV-tBY2lzfa8ociAc,27898
25
- sourcecode/summarizer.py,sha256=YfBixsN1zWHHXdOEqaf793BylbJrsj75ST7FN6jcqRU,15424
25
+ sourcecode/serializer.py,sha256=c6q0rdrxeVpVfMF_yYK_1xRp6jtfR2UWePBKG9dx6-o,34315
26
+ sourcecode/summarizer.py,sha256=NJiq8zzL9qsvMkIQxqvv0oGBSuFTc5OwplrK_blJV4o,15409
26
27
  sourcecode/tree_utils.py,sha256=Fj9OIuUksBvgibNd3feog0sMDjVypJzPexp5lvMoYWI,1424
27
28
  sourcecode/workspace.py,sha256=fQlVoNx8S-fSHpKoJ0JBvEHCFkxszH0KZVJed1i3TRk,6845
28
29
  sourcecode/detectors/__init__.py,sha256=A0AACJFF6HWf_RgatNtWu3PUzstcKtIGM9f1PoFcJug,1987
@@ -36,7 +37,7 @@ sourcecode/detectors/heuristic.py,sha256=Hab_Uiuxtq-WBs_wCnzETBS5hhaxeEtf-GOGMH6
36
37
  sourcecode/detectors/hybrid.py,sha256=IGFRUVsAZ1ooRlFdznCeJAV6vy1yVDx-VyghvLtddXc,9101
37
38
  sourcecode/detectors/java.py,sha256=cZvB13cqJ76zHDncEG-TOCuK8gJjJN2mZGS2DGEcZy8,7715
38
39
  sourcecode/detectors/jvm_ext.py,sha256=EgHJ5W8EE-ZTN9V607mVzohyKgZE8Mc2jCi-DF8RAZU,2616
39
- sourcecode/detectors/nodejs.py,sha256=LN-m3bERpijlBMl1TNVOH_cJDhfDYRhn8K8lsNzztVc,12923
40
+ sourcecode/detectors/nodejs.py,sha256=7fsyAmrGkkguX6U80HUQpIe9MRaYyi_A7zbaRtmFmGc,13097
40
41
  sourcecode/detectors/parsers.py,sha256=ugPg8yNUf0Ai1gA7Fnn6wAkYGFjTxRodSP3IeViYJJ4,2290
41
42
  sourcecode/detectors/php.py,sha256=W_AQD0WMVDdWHa9h_ilX6W8XSpz0X4ctpMK2WXfXf1I,1887
42
43
  sourcecode/detectors/project.py,sha256=egFUnHC93xFfb-ikGCIOSkRdyP52qytDx9W7pGkX0MY,6525
@@ -52,8 +53,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
52
53
  sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
53
54
  sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
54
55
  sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
55
- sourcecode-0.30.0.dist-info/METADATA,sha256=wjMQ_CyxnBDjQ6G_7PLE5crhTdh2sl6wd6Bkdy3t48o,25020
56
- sourcecode-0.30.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
57
- sourcecode-0.30.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
58
- sourcecode-0.30.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
59
- sourcecode-0.30.0.dist-info/RECORD,,
56
+ sourcecode-0.31.0.dist-info/METADATA,sha256=hWhgC_eeLe8eKsxxIIp9iAtJNmXT3yKIeVn2il4MBB8,25020
57
+ sourcecode-0.31.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
58
+ sourcecode-0.31.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
59
+ sourcecode-0.31.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
60
+ sourcecode-0.31.0.dist-info/RECORD,,