vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,586 @@
1
+ """Generic structural map builder -- Map 1.
2
+
3
+ Scans any target project directory and builds a dependency graph:
4
+ - imports_out / imports_in per file
5
+ - cycle detection (iterative Tarjan SCC)
6
+ - auto-tags: large_file, high_fan_in, high_fan_out, cycle_member, unparseable
7
+ - symbols_defined (class / function names)
8
+
9
+ Multi-language: Python files are parsed via AST (Pass 1); all other languages
10
+ registered in source_adapters.ADAPTERS (TypeScript, JavaScript, Go, Java) are
11
+ processed via regex-based adapters in Pass 1b (_collect_non_python_raw_data).
12
+ Both passes contribute StructuralEntry records to the same result list.
13
+
14
+ Remaining gap: contracts/authority/runtime maps (Maps 2-4) are Python-AST-only
15
+ today; non-Python adapters return [] stubs for those passes.
16
+
17
+ Generic design: operates on any project_dir, no Vigil-specific assumptions.
18
+ Self-diagnosis: pass project_dir=Path(".") to run against Vigil itself.
19
+
20
+ Public API:
21
+ build_structural_map(project_dir, include_roots, time_budget_s) -> list[StructuralEntry]
22
+ """
23
+ from __future__ import annotations
24
+
25
+ import ast
26
+ import logging
27
+ import time
28
+ from collections.abc import Mapping
29
+
30
+ from pathlib import Path
31
+ from typing import Any, Sequence
32
+
33
+ from .map_common import STRUCTURAL_THRESHOLDS, iter_py_files, iter_source_files
34
+ from .map_errors import MapBuilderError
35
+ from .map_models import StructuralEntry
36
+ from .source_adapters import get_adapter_for_file
37
+ from ._extract_imports_impl import _extract_imports # noqa: PLC2701
38
+
39
+ __all__ = ["build_structural_map"]
40
+
41
+ _log = logging.getLogger(__name__)
42
+
43
+ # ---------------------------------------------------------------------------
44
+ # Internal helpers
45
+ # ---------------------------------------------------------------------------
46
+
47
+ from .runtime_builder import _freshness_now
48
+
49
+
50
+ def _rel_posix(path: Path, project_dir: Path) -> str:
51
+ """Return path relative to project_dir as forward-slash string."""
52
+ try:
53
+ return path.relative_to(project_dir).as_posix()
54
+ except ValueError:
55
+ # Fallback: shouldn't happen since iter_py_files already resolves
56
+ return path.as_posix()
57
+
58
+
59
+ def _is_parseable(source: str) -> bool:
60
+ """Return False if source has a SyntaxError, True otherwise."""
61
+ try:
62
+ ast.parse(source)
63
+ return True
64
+ except SyntaxError:
65
+ return False
66
+
67
+
68
+ def _extract_symbols_defined(source: str) -> list[str]:
69
+ """Return class and function names defined in source (all scopes).
70
+
71
+ Returns empty list on SyntaxError (caller already tagged unparseable).
72
+ """
73
+ try:
74
+ tree = ast.parse(source)
75
+ except SyntaxError:
76
+ return []
77
+ names: list[str] = []
78
+ for node in ast.walk(tree):
79
+ if isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)):
80
+ names.append(node.name)
81
+ return names
82
+
83
+
84
+ def _collect_from_import_candidates(source: str) -> list[str]:
85
+ """Collect additional dotted candidates from 'from X import Y' statements.
86
+
87
+ _extract_imports adds only the module X for 'from X import Y'.
88
+ Here we also produce 'X.Y' as a candidate, so that sub-module imports
89
+ like 'from pkg import submod' resolve to 'pkg/submod.py'.
90
+
91
+ Returns list of dotted strings (may contain duplicates -- dedup is done by caller).
92
+ Silently returns empty list on SyntaxError.
93
+ """
94
+ candidates: list[str] = []
95
+ try:
96
+ tree = ast.parse(source)
97
+ except SyntaxError:
98
+ return candidates
99
+ for node in ast.walk(tree):
100
+ if isinstance(node, ast.ImportFrom) and node.level == 0 and node.module:
101
+ for alias in node.names:
102
+ if alias.name != "*":
103
+ candidates.append(f"{node.module}.{alias.name}")
104
+ return candidates
105
+
106
+
107
+ _TS_EXTS = (".ts", ".tsx", ".js", ".jsx")
108
+
109
+
110
+ def _collect_non_python_raw_data(
111
+ project_dir: Path,
112
+ include_roots: "Sequence[str] | None",
113
+ max_file_bytes: float = float("inf"),
114
+ oversized_files: "list[dict] | None" = None,
115
+ cancel_event: "Any | None" = None,
116
+ ) -> "dict[str, dict]":
117
+ result: dict[str, dict] = {}
118
+ for src_file in iter_source_files(project_dir, include_roots=include_roots):
119
+ if cancel_event is not None and cancel_event.is_set():
120
+ _log.info("_collect_non_python_raw_data: cancelled, stopping early")
121
+ break
122
+ adapter = get_adapter_for_file(src_file)
123
+ if adapter is None or adapter.language == "python" or not adapter.supports_structural:
124
+ continue
125
+ rel = _rel_posix(src_file, project_dir)
126
+ # File-size guard — skip oversized non-Python files
127
+ try:
128
+ file_bytes = src_file.stat().st_size
129
+ except OSError:
130
+ file_bytes = 0
131
+ if file_bytes > max_file_bytes:
132
+ size_mb = file_bytes / (1024 * 1024)
133
+ _log.warning(
134
+ "_collect_non_python_raw_data: skipping oversized file %s (%.1f MiB)",
135
+ src_file, size_mb,
136
+ )
137
+ if oversized_files is not None:
138
+ oversized_files.append({"path": str(src_file), "size_mb": round(size_mb, 3)})
139
+ continue
140
+ try:
141
+ content = src_file.read_text(encoding="utf-8", errors="replace")
142
+ except OSError as exc:
143
+ _log.warning("_collect_non_python_raw_data: cannot read %s: %s", src_file, exc)
144
+ continue
145
+ size_lines = content.count("\n") + (1 if content and not content.endswith("\n") else 0)
146
+ try:
147
+ import_edges = adapter.extract_imports(content, src_file)
148
+ symbol_defs = adapter.extract_symbols(content, src_file)
149
+ unparseable = False
150
+ except Exception as exc:
151
+ _log.warning("_collect_non_python_raw_data: adapter error on %s: %s", rel, exc)
152
+ import_edges, symbol_defs, unparseable = [], [], True
153
+ result[rel] = {
154
+ "imports_out": list(dict.fromkeys(e.to_module for e in import_edges if e.to_module)),
155
+ "symbols_defined": [s.name for s in symbol_defs],
156
+ "size_lines": size_lines,
157
+ "unparseable": unparseable,
158
+ "language": adapter.language,
159
+ }
160
+ _log.debug("_collect_non_python_raw_data: %d non-Python files", len(result))
161
+ return result
162
+
163
+
164
+ def _resolve_ts_import_to_rel(import_str: str, importer_rel: str, known_files: Mapping[str, object]) -> str | None:
165
+ """Resolve TS/JS import specifier to known relative file key; returns None for packages."""
166
+ if not import_str:
167
+ return None
168
+
169
+ def _probe(base: str) -> "str | None":
170
+ for ext in _TS_EXTS:
171
+ if base.endswith(ext):
172
+ base = base[: -len(ext)]
173
+ break
174
+ for ext in _TS_EXTS:
175
+ if (base + ext) in known_files:
176
+ return base + ext
177
+ if (base + "/index" + ext) in known_files:
178
+ return base + "/index" + ext
179
+ return None
180
+
181
+ if import_str.startswith("./") or import_str.startswith("../"):
182
+ importer_dir = "/".join(importer_rel.split("/")[:-1])
183
+ raw = (importer_dir + "/" + import_str) if importer_dir else import_str
184
+ parts: list[str] = []
185
+ for p in raw.split("/"):
186
+ if p == "..":
187
+ if parts: parts.pop()
188
+ elif p and p != ".":
189
+ parts.append(p)
190
+ return _probe("/".join(parts))
191
+ if import_str.startswith("@/"):
192
+ return _probe(import_str[2:])
193
+ return None # bare package import -- external
194
+
195
+
196
+ def _resolve_any_import(import_str: str, importer_rel: str, known_files: Mapping[str, object]) -> str | None:
197
+ if importer_rel.endswith(".py"):
198
+ return _resolve_import_to_rel(import_str, importer_rel, known_files)
199
+ return _resolve_ts_import_to_rel(import_str, importer_rel, known_files)
200
+
201
+
202
+ # ---------------------------------------------------------------------------
203
+ # Tarjan SCC (iterative) — cycle detection
204
+ # ---------------------------------------------------------------------------
205
+
206
+ def _tarjan_sccs(graph: dict[str, list[str]]) -> list[list[str]]:
207
+ """Compute all SCCs using iterative Tarjan algorithm.
208
+
209
+ Returns list of SCCs where len > 1 (i.e., cycles only).
210
+ Single-node SCCs without self-loops are excluded.
211
+ """
212
+ index_counter = [0]
213
+ stack: list[str] = []
214
+ lowlink: dict[str, int] = {}
215
+ index: dict[str, int] = {}
216
+ on_stack: dict[str, bool] = {}
217
+ sccs: list[list[str]] = []
218
+
219
+ nodes = list(graph.keys())
220
+
221
+ for start in nodes:
222
+ if start in index:
223
+ continue
224
+ # Iterative DFS with explicit call stack
225
+ # Each frame: (node, iterator-over-neighbours, was-just-pushed)
226
+ call_stack: list[tuple[str, list[str], int]] = []
227
+ call_stack.append((start, list(graph.get(start, [])), 0))
228
+ index[start] = lowlink[start] = index_counter[0]
229
+ index_counter[0] += 1
230
+ stack.append(start)
231
+ on_stack[start] = True
232
+
233
+ while call_stack:
234
+ node, neighbours, ni = call_stack[-1]
235
+
236
+ if ni < len(neighbours):
237
+ # Advance to next neighbour
238
+ call_stack[-1] = (node, neighbours, ni + 1)
239
+ w = neighbours[ni]
240
+ if w not in index:
241
+ # Tree edge — recurse
242
+ index[w] = lowlink[w] = index_counter[0]
243
+ index_counter[0] += 1
244
+ stack.append(w)
245
+ on_stack[w] = True
246
+ call_stack.append((w, list(graph.get(w, [])), 0))
247
+ elif on_stack.get(w, False):
248
+ # Back edge
249
+ if lowlink[node] > index[w]:
250
+ lowlink[node] = index[w]
251
+ else:
252
+ # Done with all neighbours — pop frame
253
+ call_stack.pop()
254
+ if call_stack:
255
+ parent, _, _ = call_stack[-1]
256
+ if lowlink[parent] > lowlink[node]:
257
+ lowlink[parent] = lowlink[node]
258
+
259
+ # Check if node is root of an SCC
260
+ if lowlink[node] == index[node]:
261
+ scc: list[str] = []
262
+ while True:
263
+ w = stack.pop()
264
+ on_stack[w] = False
265
+ scc.append(w)
266
+ if w == node:
267
+ break
268
+ # Only keep SCCs with actual cycles
269
+ if len(scc) > 1:
270
+ sccs.append(scc)
271
+ elif scc and scc[0] in graph and scc[0] in graph.get(scc[0], []):
272
+ # Self-loop
273
+ sccs.append(scc)
274
+
275
+ return sccs
276
+
277
+
278
+ # ---------------------------------------------------------------------------
279
+ # Main builder
280
+ # ---------------------------------------------------------------------------
281
+
282
+ def build_structural_map(
283
+ project_dir: Path,
284
+ include_roots: Sequence[str] | None = None,
285
+ time_budget_s: float = 30.0,
286
+ parse_cache: "Any | None" = None,
287
+ cancel_event: "Any | None" = None,
288
+ ) -> list[StructuralEntry]:
289
+ """Build Map 1 (structural) for a target project directory.
290
+
291
+ Args:
292
+ project_dir: Root of the target project to scan.
293
+ include_roots: Optional list of subdirectory names (relative to
294
+ project_dir) to restrict the scan. None = whole project.
295
+ time_budget_s: Soft time limit in seconds. Emits a warning if
296
+ exceeded but does NOT truncate results.
297
+
298
+ Returns:
299
+ Sorted list of StructuralEntry, one per source file found. Includes
300
+ Python (.py), TypeScript (.ts/.tsx), JavaScript (.js/.jsx), Go (.go),
301
+ and Java (.java) — all languages registered in source_adapters.ADAPTERS.
302
+
303
+ Coverage note: the structural map (imports_out, symbols_defined) is
304
+ multi-language. The contracts, authority, and runtime maps are
305
+ Python-AST-only today; non-Python adapters return empty stubs for
306
+ those passes.
307
+
308
+ Raises:
309
+ MapBuilderError: On unexpected errors during scan (not SyntaxError --
310
+ those are caught and tagged as unparseable).
311
+ """
312
+ project_dir = project_dir.resolve()
313
+ _log.info(
314
+ "build_structural_map: scanning project_dir=%s include_roots=%s",
315
+ project_dir,
316
+ include_roots,
317
+ )
318
+ t_start = time.monotonic()
319
+
320
+ # ------------------------------------------------------------------
321
+ # Pass 1: parse each file → collect raw data
322
+ # ------------------------------------------------------------------
323
+ # raw_data[rel_posix] = {imports_out, symbols_defined, size_lines, unparseable}
324
+ raw_data: dict[str, dict] = {}
325
+
326
+ try:
327
+ py_files = list(iter_py_files(project_dir, include_roots))
328
+ except Exception as exc:
329
+ raise MapBuilderError(
330
+ "build_structural_map: iter_py_files failed: %s" % exc
331
+ ) from exc
332
+
333
+ # Derive max_file_bytes from parse_cache if available (keeps the limit consistent)
334
+ _max_file_bytes: float = getattr(parse_cache, "_max_file_bytes", float("inf"))
335
+ _oversized: list[dict] = getattr(parse_cache, "oversized_files", [])
336
+
337
+ for abs_path in py_files:
338
+ if cancel_event is not None and cancel_event.is_set():
339
+ _log.info("build_structural_map: cancelled, stopping py_files loop early")
340
+ break
341
+ rel = _rel_posix(abs_path, project_dir)
342
+
343
+ # --- Fast path: use parse_cache if available ---
344
+ if parse_cache is not None:
345
+ parsed = parse_cache.get_or_parse(abs_path, project_dir)
346
+ raw_data[rel] = {
347
+ "imports_out": parsed.imports_out,
348
+ "symbols_defined": parsed.symbols_defined,
349
+ "size_lines": parsed.size_lines,
350
+ "unparseable": not parsed.is_parseable,
351
+ "language": "python",
352
+ }
353
+ continue
354
+
355
+ # --- Slow path: direct read + parse (backward-compat, parse_cache=None) ---
356
+ unparseable = False
357
+ imports_out: list[str] = []
358
+ symbols_defined: list[str] = []
359
+ size_lines = 0
360
+
361
+ try:
362
+ source = abs_path.read_text(encoding="utf-8", errors="replace")
363
+ except OSError as exc:
364
+ raise MapBuilderError(
365
+ "build_structural_map: cannot read %s: %s" % (abs_path, exc)
366
+ ) from exc
367
+
368
+ size_lines = source.count("\n") + (1 if source and not source.endswith("\n") else 0)
369
+
370
+ # Check parseability first (ast.parse directly, before _extract_imports
371
+ # which silently swallows SyntaxError and returns an empty ModuleNode)
372
+ if not _is_parseable(source):
373
+ unparseable = True
374
+ else:
375
+ # AST donor: _extract_imports returns ModuleNode
376
+ try:
377
+ module_node = _extract_imports(source, rel)
378
+ except Exception as exc:
379
+ raise MapBuilderError(
380
+ "build_structural_map: unexpected error parsing %s: %s" % (rel, exc)
381
+ ) from exc
382
+
383
+ # Combine top-level + lazy + dynamic imports — deduplicated
384
+ seen: set[str] = set()
385
+ for imp in (
386
+ module_node.imports
387
+ + module_node.lazy_imports
388
+ + module_node.dynamic_imports
389
+ ):
390
+ if imp and imp not in seen:
391
+ seen.add(imp)
392
+ imports_out.append(imp)
393
+
394
+ # Also add "module.name" candidates from "from module import name"
395
+ # so that sub-module imports resolve correctly (e.g. from pkg import sub
396
+ # → candidate "pkg.sub" → resolves to "pkg/sub.py")
397
+ for candidate in _collect_from_import_candidates(source):
398
+ if candidate and candidate not in seen:
399
+ seen.add(candidate)
400
+ imports_out.append(candidate)
401
+
402
+ # symbols_defined: class/function names
403
+ symbols_defined = _extract_symbols_defined(source)
404
+
405
+ raw_data[rel] = {
406
+ "imports_out": imports_out,
407
+ "symbols_defined": symbols_defined,
408
+ "size_lines": size_lines,
409
+ "unparseable": unparseable,
410
+ "language": "python",
411
+ }
412
+
413
+ # Pass 1b: non-Python structural extraction via registered adapters
414
+ non_py_raw = _collect_non_python_raw_data(
415
+ project_dir,
416
+ include_roots,
417
+ max_file_bytes=_max_file_bytes,
418
+ oversized_files=_oversized,
419
+ cancel_event=cancel_event,
420
+ )
421
+ raw_data.update(non_py_raw)
422
+
423
+ _log.debug("build_structural_map: pass 1 done, %d files", len(raw_data))
424
+
425
+ # ------------------------------------------------------------------
426
+ # Pass 2: build reverse index (imports_in)
427
+ # ------------------------------------------------------------------
428
+ # imports_in[file] = set of files that import it
429
+ imports_in: dict[str, set[str]] = {rel: set() for rel in raw_data}
430
+
431
+ for rel, data in raw_data.items():
432
+ for imp in data["imports_out"]:
433
+ # Match against known relative keys by module-path heuristic
434
+ # imports_out are module dotted names (e.g. "BRAIN.foo.bar" or ".bar")
435
+ # We try to resolve them to a known rel path
436
+ target_rel = _resolve_any_import(imp, rel, raw_data)
437
+ if target_rel is not None and target_rel in imports_in:
438
+ imports_in[target_rel].add(rel)
439
+
440
+ _log.debug("build_structural_map: pass 2 done (reverse index built)")
441
+
442
+ # ------------------------------------------------------------------
443
+ # Cycle detection (Tarjan SCC)
444
+ # ------------------------------------------------------------------
445
+ graph: dict[str, list[str]] = {}
446
+ for rel, data in raw_data.items():
447
+ resolved_targets: list[str] = []
448
+ for imp in data["imports_out"]:
449
+ t = _resolve_any_import(imp, rel, raw_data)
450
+ if t is not None:
451
+ resolved_targets.append(t)
452
+ graph[rel] = resolved_targets
453
+
454
+ try:
455
+ sccs = _tarjan_sccs(graph)
456
+ except Exception as exc:
457
+ raise MapBuilderError(
458
+ "build_structural_map: cycle detection failed: %s" % exc
459
+ ) from exc
460
+
461
+ # Map each file to its cycle members (excluding itself)
462
+ cycle_map: dict[str, list[str]] = {}
463
+ for scc in sccs:
464
+ scc_set = set(scc)
465
+ for member in scc:
466
+ cycle_map[member] = sorted(scc_set - {member})
467
+
468
+ _log.debug(
469
+ "build_structural_map: cycle detection done, %d SCCs with cycles",
470
+ len(sccs),
471
+ )
472
+
473
+ # ------------------------------------------------------------------
474
+ # Build StructuralEntry list
475
+ # ------------------------------------------------------------------
476
+ large_file_threshold = STRUCTURAL_THRESHOLDS["large_file_lines"]
477
+ high_fan_in_threshold = STRUCTURAL_THRESHOLDS["high_fan_in"]
478
+ high_fan_out_threshold = STRUCTURAL_THRESHOLDS["high_fan_out"]
479
+
480
+ freshness = _freshness_now()
481
+ entries: list[StructuralEntry] = []
482
+
483
+ for rel in sorted(raw_data.keys()):
484
+ data = raw_data[rel]
485
+ size_lines = data["size_lines"]
486
+ imports_out_list = data["imports_out"]
487
+ symbols_defined = data["symbols_defined"]
488
+ unparseable = data["unparseable"]
489
+ imports_in_list = sorted(imports_in.get(rel, set()))
490
+ cycles_list = cycle_map.get(rel, [])
491
+
492
+ tags: list[str] = []
493
+ if unparseable:
494
+ tags.append("unparseable")
495
+ if size_lines > large_file_threshold:
496
+ tags.append("large_file")
497
+ if len(imports_in_list) > high_fan_in_threshold:
498
+ tags.append("high_fan_in")
499
+ if len(imports_out_list) > high_fan_out_threshold:
500
+ tags.append("high_fan_out")
501
+ if cycles_list:
502
+ tags.append("cycle_member")
503
+
504
+ entry = StructuralEntry(
505
+ file=rel,
506
+ language=data.get("language", "unknown"),
507
+ size_lines=size_lines,
508
+ imports_out=tuple(imports_out_list),
509
+ imports_in=tuple(imports_in_list),
510
+ symbols_defined=tuple(symbols_defined),
511
+ symbols_used_external=(),
512
+ cycles=tuple(cycles_list),
513
+ tags=tuple(sorted(tags)),
514
+ source="static_scan",
515
+ evidence=(rel,),
516
+ confidence=0.95,
517
+ freshness=freshness,
518
+ status="inferred",
519
+ )
520
+ entries.append(entry)
521
+
522
+ elapsed = time.monotonic() - t_start
523
+ if elapsed > time_budget_s:
524
+ _log.warning(
525
+ "build_structural_map: SLA exceeded -- %.1fs > %.1fs budget (%d files)",
526
+ elapsed,
527
+ time_budget_s,
528
+ len(entries),
529
+ )
530
+ else:
531
+ _log.info(
532
+ "build_structural_map: done in %.2fs, %d entries",
533
+ elapsed,
534
+ len(entries),
535
+ )
536
+
537
+ return entries
538
+
539
+
540
+ # ---------------------------------------------------------------------------
541
+ # Import resolution helper
542
+ # ---------------------------------------------------------------------------
543
+
544
+ def _resolve_import_to_rel(
545
+ import_name: str,
546
+ importer_rel: str,
547
+ known_files: Mapping[str, object],
548
+ ) -> str | None:
549
+ """Try to map a dotted import name to a known relative file path.
550
+
551
+ Handles:
552
+ - Absolute dotted names: "foo.bar.baz" -> "foo/bar/baz.py" or "foo/bar/baz/__init__.py"
553
+ - Relative imports: ".foo" or "..foo" (resolved relative to importer's package)
554
+
555
+ Returns the matching key from known_files or None if unresolvable.
556
+ """
557
+ if not import_name:
558
+ return None
559
+
560
+ # Resolve relative imports
561
+ if import_name.startswith("."):
562
+ dots = len(import_name) - len(import_name.lstrip("."))
563
+ rest = import_name.lstrip(".")
564
+ # Importer's package dir (strip filename, go up `dots-1` levels)
565
+ parts = importer_rel.split("/")
566
+ pkg_parts = parts[:-dots] if dots <= len(parts) else []
567
+ if rest:
568
+ pkg_parts = pkg_parts + rest.split(".")
569
+ candidate_module = "/".join(pkg_parts)
570
+ else:
571
+ candidate_module = "/".join(import_name.split("."))
572
+
573
+ if not candidate_module:
574
+ return None
575
+
576
+ # Try direct .py file
577
+ candidate_py = candidate_module + ".py"
578
+ if candidate_py in known_files:
579
+ return candidate_py
580
+
581
+ # Try package __init__.py
582
+ candidate_init = candidate_module + "/__init__.py"
583
+ if candidate_init in known_files:
584
+ return candidate_init
585
+
586
+ return None
vigil_mcp/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """vigil_mcp — FastMCP stdio servers wrapping vigil_mapper and vigil_forensic."""