vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,586 @@
|
|
|
1
|
+
"""Generic structural map builder -- Map 1.
|
|
2
|
+
|
|
3
|
+
Scans any target project directory and builds a dependency graph:
|
|
4
|
+
- imports_out / imports_in per file
|
|
5
|
+
- cycle detection (iterative Tarjan SCC)
|
|
6
|
+
- auto-tags: large_file, high_fan_in, high_fan_out, cycle_member, unparseable
|
|
7
|
+
- symbols_defined (class / function names)
|
|
8
|
+
|
|
9
|
+
Multi-language: Python files are parsed via AST (Pass 1); all other languages
|
|
10
|
+
registered in source_adapters.ADAPTERS (TypeScript, JavaScript, Go, Java) are
|
|
11
|
+
processed via regex-based adapters in Pass 1b (_collect_non_python_raw_data).
|
|
12
|
+
Both passes contribute StructuralEntry records to the same result list.
|
|
13
|
+
|
|
14
|
+
Remaining gap: contracts/authority/runtime maps (Maps 2-4) are Python-AST-only
|
|
15
|
+
today; non-Python adapters return [] stubs for those passes.
|
|
16
|
+
|
|
17
|
+
Generic design: operates on any project_dir, no Vigil-specific assumptions.
|
|
18
|
+
Self-diagnosis: pass project_dir=Path(".") to run against Vigil itself.
|
|
19
|
+
|
|
20
|
+
Public API:
|
|
21
|
+
build_structural_map(project_dir, include_roots, time_budget_s) -> list[StructuralEntry]
|
|
22
|
+
"""
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import ast
|
|
26
|
+
import logging
|
|
27
|
+
import time
|
|
28
|
+
from collections.abc import Mapping
|
|
29
|
+
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
from typing import Any, Sequence
|
|
32
|
+
|
|
33
|
+
from .map_common import STRUCTURAL_THRESHOLDS, iter_py_files, iter_source_files
|
|
34
|
+
from .map_errors import MapBuilderError
|
|
35
|
+
from .map_models import StructuralEntry
|
|
36
|
+
from .source_adapters import get_adapter_for_file
|
|
37
|
+
from ._extract_imports_impl import _extract_imports # noqa: PLC2701
|
|
38
|
+
|
|
39
|
+
__all__ = ["build_structural_map"]
|
|
40
|
+
|
|
41
|
+
_log = logging.getLogger(__name__)
|
|
42
|
+
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
# Internal helpers
|
|
45
|
+
# ---------------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
from .runtime_builder import _freshness_now
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _rel_posix(path: Path, project_dir: Path) -> str:
|
|
51
|
+
"""Return path relative to project_dir as forward-slash string."""
|
|
52
|
+
try:
|
|
53
|
+
return path.relative_to(project_dir).as_posix()
|
|
54
|
+
except ValueError:
|
|
55
|
+
# Fallback: shouldn't happen since iter_py_files already resolves
|
|
56
|
+
return path.as_posix()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _is_parseable(source: str) -> bool:
|
|
60
|
+
"""Return False if source has a SyntaxError, True otherwise."""
|
|
61
|
+
try:
|
|
62
|
+
ast.parse(source)
|
|
63
|
+
return True
|
|
64
|
+
except SyntaxError:
|
|
65
|
+
return False
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _extract_symbols_defined(source: str) -> list[str]:
|
|
69
|
+
"""Return class and function names defined in source (all scopes).
|
|
70
|
+
|
|
71
|
+
Returns empty list on SyntaxError (caller already tagged unparseable).
|
|
72
|
+
"""
|
|
73
|
+
try:
|
|
74
|
+
tree = ast.parse(source)
|
|
75
|
+
except SyntaxError:
|
|
76
|
+
return []
|
|
77
|
+
names: list[str] = []
|
|
78
|
+
for node in ast.walk(tree):
|
|
79
|
+
if isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
80
|
+
names.append(node.name)
|
|
81
|
+
return names
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _collect_from_import_candidates(source: str) -> list[str]:
|
|
85
|
+
"""Collect additional dotted candidates from 'from X import Y' statements.
|
|
86
|
+
|
|
87
|
+
_extract_imports adds only the module X for 'from X import Y'.
|
|
88
|
+
Here we also produce 'X.Y' as a candidate, so that sub-module imports
|
|
89
|
+
like 'from pkg import submod' resolve to 'pkg/submod.py'.
|
|
90
|
+
|
|
91
|
+
Returns list of dotted strings (may contain duplicates -- dedup is done by caller).
|
|
92
|
+
Silently returns empty list on SyntaxError.
|
|
93
|
+
"""
|
|
94
|
+
candidates: list[str] = []
|
|
95
|
+
try:
|
|
96
|
+
tree = ast.parse(source)
|
|
97
|
+
except SyntaxError:
|
|
98
|
+
return candidates
|
|
99
|
+
for node in ast.walk(tree):
|
|
100
|
+
if isinstance(node, ast.ImportFrom) and node.level == 0 and node.module:
|
|
101
|
+
for alias in node.names:
|
|
102
|
+
if alias.name != "*":
|
|
103
|
+
candidates.append(f"{node.module}.{alias.name}")
|
|
104
|
+
return candidates
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
_TS_EXTS = (".ts", ".tsx", ".js", ".jsx")
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _collect_non_python_raw_data(
|
|
111
|
+
project_dir: Path,
|
|
112
|
+
include_roots: "Sequence[str] | None",
|
|
113
|
+
max_file_bytes: float = float("inf"),
|
|
114
|
+
oversized_files: "list[dict] | None" = None,
|
|
115
|
+
cancel_event: "Any | None" = None,
|
|
116
|
+
) -> "dict[str, dict]":
|
|
117
|
+
result: dict[str, dict] = {}
|
|
118
|
+
for src_file in iter_source_files(project_dir, include_roots=include_roots):
|
|
119
|
+
if cancel_event is not None and cancel_event.is_set():
|
|
120
|
+
_log.info("_collect_non_python_raw_data: cancelled, stopping early")
|
|
121
|
+
break
|
|
122
|
+
adapter = get_adapter_for_file(src_file)
|
|
123
|
+
if adapter is None or adapter.language == "python" or not adapter.supports_structural:
|
|
124
|
+
continue
|
|
125
|
+
rel = _rel_posix(src_file, project_dir)
|
|
126
|
+
# File-size guard — skip oversized non-Python files
|
|
127
|
+
try:
|
|
128
|
+
file_bytes = src_file.stat().st_size
|
|
129
|
+
except OSError:
|
|
130
|
+
file_bytes = 0
|
|
131
|
+
if file_bytes > max_file_bytes:
|
|
132
|
+
size_mb = file_bytes / (1024 * 1024)
|
|
133
|
+
_log.warning(
|
|
134
|
+
"_collect_non_python_raw_data: skipping oversized file %s (%.1f MiB)",
|
|
135
|
+
src_file, size_mb,
|
|
136
|
+
)
|
|
137
|
+
if oversized_files is not None:
|
|
138
|
+
oversized_files.append({"path": str(src_file), "size_mb": round(size_mb, 3)})
|
|
139
|
+
continue
|
|
140
|
+
try:
|
|
141
|
+
content = src_file.read_text(encoding="utf-8", errors="replace")
|
|
142
|
+
except OSError as exc:
|
|
143
|
+
_log.warning("_collect_non_python_raw_data: cannot read %s: %s", src_file, exc)
|
|
144
|
+
continue
|
|
145
|
+
size_lines = content.count("\n") + (1 if content and not content.endswith("\n") else 0)
|
|
146
|
+
try:
|
|
147
|
+
import_edges = adapter.extract_imports(content, src_file)
|
|
148
|
+
symbol_defs = adapter.extract_symbols(content, src_file)
|
|
149
|
+
unparseable = False
|
|
150
|
+
except Exception as exc:
|
|
151
|
+
_log.warning("_collect_non_python_raw_data: adapter error on %s: %s", rel, exc)
|
|
152
|
+
import_edges, symbol_defs, unparseable = [], [], True
|
|
153
|
+
result[rel] = {
|
|
154
|
+
"imports_out": list(dict.fromkeys(e.to_module for e in import_edges if e.to_module)),
|
|
155
|
+
"symbols_defined": [s.name for s in symbol_defs],
|
|
156
|
+
"size_lines": size_lines,
|
|
157
|
+
"unparseable": unparseable,
|
|
158
|
+
"language": adapter.language,
|
|
159
|
+
}
|
|
160
|
+
_log.debug("_collect_non_python_raw_data: %d non-Python files", len(result))
|
|
161
|
+
return result
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _resolve_ts_import_to_rel(import_str: str, importer_rel: str, known_files: Mapping[str, object]) -> str | None:
|
|
165
|
+
"""Resolve TS/JS import specifier to known relative file key; returns None for packages."""
|
|
166
|
+
if not import_str:
|
|
167
|
+
return None
|
|
168
|
+
|
|
169
|
+
def _probe(base: str) -> "str | None":
|
|
170
|
+
for ext in _TS_EXTS:
|
|
171
|
+
if base.endswith(ext):
|
|
172
|
+
base = base[: -len(ext)]
|
|
173
|
+
break
|
|
174
|
+
for ext in _TS_EXTS:
|
|
175
|
+
if (base + ext) in known_files:
|
|
176
|
+
return base + ext
|
|
177
|
+
if (base + "/index" + ext) in known_files:
|
|
178
|
+
return base + "/index" + ext
|
|
179
|
+
return None
|
|
180
|
+
|
|
181
|
+
if import_str.startswith("./") or import_str.startswith("../"):
|
|
182
|
+
importer_dir = "/".join(importer_rel.split("/")[:-1])
|
|
183
|
+
raw = (importer_dir + "/" + import_str) if importer_dir else import_str
|
|
184
|
+
parts: list[str] = []
|
|
185
|
+
for p in raw.split("/"):
|
|
186
|
+
if p == "..":
|
|
187
|
+
if parts: parts.pop()
|
|
188
|
+
elif p and p != ".":
|
|
189
|
+
parts.append(p)
|
|
190
|
+
return _probe("/".join(parts))
|
|
191
|
+
if import_str.startswith("@/"):
|
|
192
|
+
return _probe(import_str[2:])
|
|
193
|
+
return None # bare package import -- external
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _resolve_any_import(import_str: str, importer_rel: str, known_files: Mapping[str, object]) -> str | None:
|
|
197
|
+
if importer_rel.endswith(".py"):
|
|
198
|
+
return _resolve_import_to_rel(import_str, importer_rel, known_files)
|
|
199
|
+
return _resolve_ts_import_to_rel(import_str, importer_rel, known_files)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
# ---------------------------------------------------------------------------
|
|
203
|
+
# Tarjan SCC (iterative) — cycle detection
|
|
204
|
+
# ---------------------------------------------------------------------------
|
|
205
|
+
|
|
206
|
+
def _tarjan_sccs(graph: dict[str, list[str]]) -> list[list[str]]:
|
|
207
|
+
"""Compute all SCCs using iterative Tarjan algorithm.
|
|
208
|
+
|
|
209
|
+
Returns list of SCCs where len > 1 (i.e., cycles only).
|
|
210
|
+
Single-node SCCs without self-loops are excluded.
|
|
211
|
+
"""
|
|
212
|
+
index_counter = [0]
|
|
213
|
+
stack: list[str] = []
|
|
214
|
+
lowlink: dict[str, int] = {}
|
|
215
|
+
index: dict[str, int] = {}
|
|
216
|
+
on_stack: dict[str, bool] = {}
|
|
217
|
+
sccs: list[list[str]] = []
|
|
218
|
+
|
|
219
|
+
nodes = list(graph.keys())
|
|
220
|
+
|
|
221
|
+
for start in nodes:
|
|
222
|
+
if start in index:
|
|
223
|
+
continue
|
|
224
|
+
# Iterative DFS with explicit call stack
|
|
225
|
+
# Each frame: (node, iterator-over-neighbours, was-just-pushed)
|
|
226
|
+
call_stack: list[tuple[str, list[str], int]] = []
|
|
227
|
+
call_stack.append((start, list(graph.get(start, [])), 0))
|
|
228
|
+
index[start] = lowlink[start] = index_counter[0]
|
|
229
|
+
index_counter[0] += 1
|
|
230
|
+
stack.append(start)
|
|
231
|
+
on_stack[start] = True
|
|
232
|
+
|
|
233
|
+
while call_stack:
|
|
234
|
+
node, neighbours, ni = call_stack[-1]
|
|
235
|
+
|
|
236
|
+
if ni < len(neighbours):
|
|
237
|
+
# Advance to next neighbour
|
|
238
|
+
call_stack[-1] = (node, neighbours, ni + 1)
|
|
239
|
+
w = neighbours[ni]
|
|
240
|
+
if w not in index:
|
|
241
|
+
# Tree edge — recurse
|
|
242
|
+
index[w] = lowlink[w] = index_counter[0]
|
|
243
|
+
index_counter[0] += 1
|
|
244
|
+
stack.append(w)
|
|
245
|
+
on_stack[w] = True
|
|
246
|
+
call_stack.append((w, list(graph.get(w, [])), 0))
|
|
247
|
+
elif on_stack.get(w, False):
|
|
248
|
+
# Back edge
|
|
249
|
+
if lowlink[node] > index[w]:
|
|
250
|
+
lowlink[node] = index[w]
|
|
251
|
+
else:
|
|
252
|
+
# Done with all neighbours — pop frame
|
|
253
|
+
call_stack.pop()
|
|
254
|
+
if call_stack:
|
|
255
|
+
parent, _, _ = call_stack[-1]
|
|
256
|
+
if lowlink[parent] > lowlink[node]:
|
|
257
|
+
lowlink[parent] = lowlink[node]
|
|
258
|
+
|
|
259
|
+
# Check if node is root of an SCC
|
|
260
|
+
if lowlink[node] == index[node]:
|
|
261
|
+
scc: list[str] = []
|
|
262
|
+
while True:
|
|
263
|
+
w = stack.pop()
|
|
264
|
+
on_stack[w] = False
|
|
265
|
+
scc.append(w)
|
|
266
|
+
if w == node:
|
|
267
|
+
break
|
|
268
|
+
# Only keep SCCs with actual cycles
|
|
269
|
+
if len(scc) > 1:
|
|
270
|
+
sccs.append(scc)
|
|
271
|
+
elif scc and scc[0] in graph and scc[0] in graph.get(scc[0], []):
|
|
272
|
+
# Self-loop
|
|
273
|
+
sccs.append(scc)
|
|
274
|
+
|
|
275
|
+
return sccs
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
# ---------------------------------------------------------------------------
|
|
279
|
+
# Main builder
|
|
280
|
+
# ---------------------------------------------------------------------------
|
|
281
|
+
|
|
282
|
+
def build_structural_map(
|
|
283
|
+
project_dir: Path,
|
|
284
|
+
include_roots: Sequence[str] | None = None,
|
|
285
|
+
time_budget_s: float = 30.0,
|
|
286
|
+
parse_cache: "Any | None" = None,
|
|
287
|
+
cancel_event: "Any | None" = None,
|
|
288
|
+
) -> list[StructuralEntry]:
|
|
289
|
+
"""Build Map 1 (structural) for a target project directory.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
project_dir: Root of the target project to scan.
|
|
293
|
+
include_roots: Optional list of subdirectory names (relative to
|
|
294
|
+
project_dir) to restrict the scan. None = whole project.
|
|
295
|
+
time_budget_s: Soft time limit in seconds. Emits a warning if
|
|
296
|
+
exceeded but does NOT truncate results.
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
Sorted list of StructuralEntry, one per source file found. Includes
|
|
300
|
+
Python (.py), TypeScript (.ts/.tsx), JavaScript (.js/.jsx), Go (.go),
|
|
301
|
+
and Java (.java) — all languages registered in source_adapters.ADAPTERS.
|
|
302
|
+
|
|
303
|
+
Coverage note: the structural map (imports_out, symbols_defined) is
|
|
304
|
+
multi-language. The contracts, authority, and runtime maps are
|
|
305
|
+
Python-AST-only today; non-Python adapters return empty stubs for
|
|
306
|
+
those passes.
|
|
307
|
+
|
|
308
|
+
Raises:
|
|
309
|
+
MapBuilderError: On unexpected errors during scan (not SyntaxError --
|
|
310
|
+
those are caught and tagged as unparseable).
|
|
311
|
+
"""
|
|
312
|
+
project_dir = project_dir.resolve()
|
|
313
|
+
_log.info(
|
|
314
|
+
"build_structural_map: scanning project_dir=%s include_roots=%s",
|
|
315
|
+
project_dir,
|
|
316
|
+
include_roots,
|
|
317
|
+
)
|
|
318
|
+
t_start = time.monotonic()
|
|
319
|
+
|
|
320
|
+
# ------------------------------------------------------------------
|
|
321
|
+
# Pass 1: parse each file → collect raw data
|
|
322
|
+
# ------------------------------------------------------------------
|
|
323
|
+
# raw_data[rel_posix] = {imports_out, symbols_defined, size_lines, unparseable}
|
|
324
|
+
raw_data: dict[str, dict] = {}
|
|
325
|
+
|
|
326
|
+
try:
|
|
327
|
+
py_files = list(iter_py_files(project_dir, include_roots))
|
|
328
|
+
except Exception as exc:
|
|
329
|
+
raise MapBuilderError(
|
|
330
|
+
"build_structural_map: iter_py_files failed: %s" % exc
|
|
331
|
+
) from exc
|
|
332
|
+
|
|
333
|
+
# Derive max_file_bytes from parse_cache if available (keeps the limit consistent)
|
|
334
|
+
_max_file_bytes: float = getattr(parse_cache, "_max_file_bytes", float("inf"))
|
|
335
|
+
_oversized: list[dict] = getattr(parse_cache, "oversized_files", [])
|
|
336
|
+
|
|
337
|
+
for abs_path in py_files:
|
|
338
|
+
if cancel_event is not None and cancel_event.is_set():
|
|
339
|
+
_log.info("build_structural_map: cancelled, stopping py_files loop early")
|
|
340
|
+
break
|
|
341
|
+
rel = _rel_posix(abs_path, project_dir)
|
|
342
|
+
|
|
343
|
+
# --- Fast path: use parse_cache if available ---
|
|
344
|
+
if parse_cache is not None:
|
|
345
|
+
parsed = parse_cache.get_or_parse(abs_path, project_dir)
|
|
346
|
+
raw_data[rel] = {
|
|
347
|
+
"imports_out": parsed.imports_out,
|
|
348
|
+
"symbols_defined": parsed.symbols_defined,
|
|
349
|
+
"size_lines": parsed.size_lines,
|
|
350
|
+
"unparseable": not parsed.is_parseable,
|
|
351
|
+
"language": "python",
|
|
352
|
+
}
|
|
353
|
+
continue
|
|
354
|
+
|
|
355
|
+
# --- Slow path: direct read + parse (backward-compat, parse_cache=None) ---
|
|
356
|
+
unparseable = False
|
|
357
|
+
imports_out: list[str] = []
|
|
358
|
+
symbols_defined: list[str] = []
|
|
359
|
+
size_lines = 0
|
|
360
|
+
|
|
361
|
+
try:
|
|
362
|
+
source = abs_path.read_text(encoding="utf-8", errors="replace")
|
|
363
|
+
except OSError as exc:
|
|
364
|
+
raise MapBuilderError(
|
|
365
|
+
"build_structural_map: cannot read %s: %s" % (abs_path, exc)
|
|
366
|
+
) from exc
|
|
367
|
+
|
|
368
|
+
size_lines = source.count("\n") + (1 if source and not source.endswith("\n") else 0)
|
|
369
|
+
|
|
370
|
+
# Check parseability first (ast.parse directly, before _extract_imports
|
|
371
|
+
# which silently swallows SyntaxError and returns an empty ModuleNode)
|
|
372
|
+
if not _is_parseable(source):
|
|
373
|
+
unparseable = True
|
|
374
|
+
else:
|
|
375
|
+
# AST donor: _extract_imports returns ModuleNode
|
|
376
|
+
try:
|
|
377
|
+
module_node = _extract_imports(source, rel)
|
|
378
|
+
except Exception as exc:
|
|
379
|
+
raise MapBuilderError(
|
|
380
|
+
"build_structural_map: unexpected error parsing %s: %s" % (rel, exc)
|
|
381
|
+
) from exc
|
|
382
|
+
|
|
383
|
+
# Combine top-level + lazy + dynamic imports — deduplicated
|
|
384
|
+
seen: set[str] = set()
|
|
385
|
+
for imp in (
|
|
386
|
+
module_node.imports
|
|
387
|
+
+ module_node.lazy_imports
|
|
388
|
+
+ module_node.dynamic_imports
|
|
389
|
+
):
|
|
390
|
+
if imp and imp not in seen:
|
|
391
|
+
seen.add(imp)
|
|
392
|
+
imports_out.append(imp)
|
|
393
|
+
|
|
394
|
+
# Also add "module.name" candidates from "from module import name"
|
|
395
|
+
# so that sub-module imports resolve correctly (e.g. from pkg import sub
|
|
396
|
+
# → candidate "pkg.sub" → resolves to "pkg/sub.py")
|
|
397
|
+
for candidate in _collect_from_import_candidates(source):
|
|
398
|
+
if candidate and candidate not in seen:
|
|
399
|
+
seen.add(candidate)
|
|
400
|
+
imports_out.append(candidate)
|
|
401
|
+
|
|
402
|
+
# symbols_defined: class/function names
|
|
403
|
+
symbols_defined = _extract_symbols_defined(source)
|
|
404
|
+
|
|
405
|
+
raw_data[rel] = {
|
|
406
|
+
"imports_out": imports_out,
|
|
407
|
+
"symbols_defined": symbols_defined,
|
|
408
|
+
"size_lines": size_lines,
|
|
409
|
+
"unparseable": unparseable,
|
|
410
|
+
"language": "python",
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
# Pass 1b: non-Python structural extraction via registered adapters
|
|
414
|
+
non_py_raw = _collect_non_python_raw_data(
|
|
415
|
+
project_dir,
|
|
416
|
+
include_roots,
|
|
417
|
+
max_file_bytes=_max_file_bytes,
|
|
418
|
+
oversized_files=_oversized,
|
|
419
|
+
cancel_event=cancel_event,
|
|
420
|
+
)
|
|
421
|
+
raw_data.update(non_py_raw)
|
|
422
|
+
|
|
423
|
+
_log.debug("build_structural_map: pass 1 done, %d files", len(raw_data))
|
|
424
|
+
|
|
425
|
+
# ------------------------------------------------------------------
|
|
426
|
+
# Pass 2: build reverse index (imports_in)
|
|
427
|
+
# ------------------------------------------------------------------
|
|
428
|
+
# imports_in[file] = set of files that import it
|
|
429
|
+
imports_in: dict[str, set[str]] = {rel: set() for rel in raw_data}
|
|
430
|
+
|
|
431
|
+
for rel, data in raw_data.items():
|
|
432
|
+
for imp in data["imports_out"]:
|
|
433
|
+
# Match against known relative keys by module-path heuristic
|
|
434
|
+
# imports_out are module dotted names (e.g. "BRAIN.foo.bar" or ".bar")
|
|
435
|
+
# We try to resolve them to a known rel path
|
|
436
|
+
target_rel = _resolve_any_import(imp, rel, raw_data)
|
|
437
|
+
if target_rel is not None and target_rel in imports_in:
|
|
438
|
+
imports_in[target_rel].add(rel)
|
|
439
|
+
|
|
440
|
+
_log.debug("build_structural_map: pass 2 done (reverse index built)")
|
|
441
|
+
|
|
442
|
+
# ------------------------------------------------------------------
|
|
443
|
+
# Cycle detection (Tarjan SCC)
|
|
444
|
+
# ------------------------------------------------------------------
|
|
445
|
+
graph: dict[str, list[str]] = {}
|
|
446
|
+
for rel, data in raw_data.items():
|
|
447
|
+
resolved_targets: list[str] = []
|
|
448
|
+
for imp in data["imports_out"]:
|
|
449
|
+
t = _resolve_any_import(imp, rel, raw_data)
|
|
450
|
+
if t is not None:
|
|
451
|
+
resolved_targets.append(t)
|
|
452
|
+
graph[rel] = resolved_targets
|
|
453
|
+
|
|
454
|
+
try:
|
|
455
|
+
sccs = _tarjan_sccs(graph)
|
|
456
|
+
except Exception as exc:
|
|
457
|
+
raise MapBuilderError(
|
|
458
|
+
"build_structural_map: cycle detection failed: %s" % exc
|
|
459
|
+
) from exc
|
|
460
|
+
|
|
461
|
+
# Map each file to its cycle members (excluding itself)
|
|
462
|
+
cycle_map: dict[str, list[str]] = {}
|
|
463
|
+
for scc in sccs:
|
|
464
|
+
scc_set = set(scc)
|
|
465
|
+
for member in scc:
|
|
466
|
+
cycle_map[member] = sorted(scc_set - {member})
|
|
467
|
+
|
|
468
|
+
_log.debug(
|
|
469
|
+
"build_structural_map: cycle detection done, %d SCCs with cycles",
|
|
470
|
+
len(sccs),
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
# ------------------------------------------------------------------
|
|
474
|
+
# Build StructuralEntry list
|
|
475
|
+
# ------------------------------------------------------------------
|
|
476
|
+
large_file_threshold = STRUCTURAL_THRESHOLDS["large_file_lines"]
|
|
477
|
+
high_fan_in_threshold = STRUCTURAL_THRESHOLDS["high_fan_in"]
|
|
478
|
+
high_fan_out_threshold = STRUCTURAL_THRESHOLDS["high_fan_out"]
|
|
479
|
+
|
|
480
|
+
freshness = _freshness_now()
|
|
481
|
+
entries: list[StructuralEntry] = []
|
|
482
|
+
|
|
483
|
+
for rel in sorted(raw_data.keys()):
|
|
484
|
+
data = raw_data[rel]
|
|
485
|
+
size_lines = data["size_lines"]
|
|
486
|
+
imports_out_list = data["imports_out"]
|
|
487
|
+
symbols_defined = data["symbols_defined"]
|
|
488
|
+
unparseable = data["unparseable"]
|
|
489
|
+
imports_in_list = sorted(imports_in.get(rel, set()))
|
|
490
|
+
cycles_list = cycle_map.get(rel, [])
|
|
491
|
+
|
|
492
|
+
tags: list[str] = []
|
|
493
|
+
if unparseable:
|
|
494
|
+
tags.append("unparseable")
|
|
495
|
+
if size_lines > large_file_threshold:
|
|
496
|
+
tags.append("large_file")
|
|
497
|
+
if len(imports_in_list) > high_fan_in_threshold:
|
|
498
|
+
tags.append("high_fan_in")
|
|
499
|
+
if len(imports_out_list) > high_fan_out_threshold:
|
|
500
|
+
tags.append("high_fan_out")
|
|
501
|
+
if cycles_list:
|
|
502
|
+
tags.append("cycle_member")
|
|
503
|
+
|
|
504
|
+
entry = StructuralEntry(
|
|
505
|
+
file=rel,
|
|
506
|
+
language=data.get("language", "unknown"),
|
|
507
|
+
size_lines=size_lines,
|
|
508
|
+
imports_out=tuple(imports_out_list),
|
|
509
|
+
imports_in=tuple(imports_in_list),
|
|
510
|
+
symbols_defined=tuple(symbols_defined),
|
|
511
|
+
symbols_used_external=(),
|
|
512
|
+
cycles=tuple(cycles_list),
|
|
513
|
+
tags=tuple(sorted(tags)),
|
|
514
|
+
source="static_scan",
|
|
515
|
+
evidence=(rel,),
|
|
516
|
+
confidence=0.95,
|
|
517
|
+
freshness=freshness,
|
|
518
|
+
status="inferred",
|
|
519
|
+
)
|
|
520
|
+
entries.append(entry)
|
|
521
|
+
|
|
522
|
+
elapsed = time.monotonic() - t_start
|
|
523
|
+
if elapsed > time_budget_s:
|
|
524
|
+
_log.warning(
|
|
525
|
+
"build_structural_map: SLA exceeded -- %.1fs > %.1fs budget (%d files)",
|
|
526
|
+
elapsed,
|
|
527
|
+
time_budget_s,
|
|
528
|
+
len(entries),
|
|
529
|
+
)
|
|
530
|
+
else:
|
|
531
|
+
_log.info(
|
|
532
|
+
"build_structural_map: done in %.2fs, %d entries",
|
|
533
|
+
elapsed,
|
|
534
|
+
len(entries),
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
return entries
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
# ---------------------------------------------------------------------------
|
|
541
|
+
# Import resolution helper
|
|
542
|
+
# ---------------------------------------------------------------------------
|
|
543
|
+
|
|
544
|
+
def _resolve_import_to_rel(
|
|
545
|
+
import_name: str,
|
|
546
|
+
importer_rel: str,
|
|
547
|
+
known_files: Mapping[str, object],
|
|
548
|
+
) -> str | None:
|
|
549
|
+
"""Try to map a dotted import name to a known relative file path.
|
|
550
|
+
|
|
551
|
+
Handles:
|
|
552
|
+
- Absolute dotted names: "foo.bar.baz" -> "foo/bar/baz.py" or "foo/bar/baz/__init__.py"
|
|
553
|
+
- Relative imports: ".foo" or "..foo" (resolved relative to importer's package)
|
|
554
|
+
|
|
555
|
+
Returns the matching key from known_files or None if unresolvable.
|
|
556
|
+
"""
|
|
557
|
+
if not import_name:
|
|
558
|
+
return None
|
|
559
|
+
|
|
560
|
+
# Resolve relative imports
|
|
561
|
+
if import_name.startswith("."):
|
|
562
|
+
dots = len(import_name) - len(import_name.lstrip("."))
|
|
563
|
+
rest = import_name.lstrip(".")
|
|
564
|
+
# Importer's package dir (strip filename, go up `dots-1` levels)
|
|
565
|
+
parts = importer_rel.split("/")
|
|
566
|
+
pkg_parts = parts[:-dots] if dots <= len(parts) else []
|
|
567
|
+
if rest:
|
|
568
|
+
pkg_parts = pkg_parts + rest.split(".")
|
|
569
|
+
candidate_module = "/".join(pkg_parts)
|
|
570
|
+
else:
|
|
571
|
+
candidate_module = "/".join(import_name.split("."))
|
|
572
|
+
|
|
573
|
+
if not candidate_module:
|
|
574
|
+
return None
|
|
575
|
+
|
|
576
|
+
# Try direct .py file
|
|
577
|
+
candidate_py = candidate_module + ".py"
|
|
578
|
+
if candidate_py in known_files:
|
|
579
|
+
return candidate_py
|
|
580
|
+
|
|
581
|
+
# Try package __init__.py
|
|
582
|
+
candidate_init = candidate_module + "/__init__.py"
|
|
583
|
+
if candidate_init in known_files:
|
|
584
|
+
return candidate_init
|
|
585
|
+
|
|
586
|
+
return None
|
vigil_mcp/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""vigil_mcp — FastMCP stdio servers wrapping vigil_mapper and vigil_forensic."""
|