vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,438 @@
|
|
|
1
|
+
"""AST visitor and utilities for runtime_builder.py (Map 2 static scanner).
|
|
2
|
+
|
|
3
|
+
Internal module -- not part of the public API.
|
|
4
|
+
Extracted from runtime_builder.py to keep each file under 400 lines.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import ast
|
|
9
|
+
import logging
|
|
10
|
+
_log = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"_RuntimeVisitor",
|
|
14
|
+
"_call_name",
|
|
15
|
+
"_decorator_registry_tag",
|
|
16
|
+
"_background_task_tag",
|
|
17
|
+
"_env_var_from_call",
|
|
18
|
+
"_collect_env_vars_from_stmt",
|
|
19
|
+
"_collect_env_vars_from_expr",
|
|
20
|
+
"_ROUTE_DECORATOR_ATTRS",
|
|
21
|
+
"_BACKGROUND_TASK_CALLS",
|
|
22
|
+
"_SCANNED_FUNC_NAMES",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
# Pattern constants
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
|
|
29
|
+
# Decorator attribute chains that signal route/dispatch registration.
|
|
30
|
+
_ROUTE_DECORATOR_ATTRS: frozenset[tuple[str, str]] = frozenset({
|
|
31
|
+
("app", "route"),
|
|
32
|
+
("bp", "route"),
|
|
33
|
+
("blueprint", "route"),
|
|
34
|
+
("router", "get"),
|
|
35
|
+
("router", "post"),
|
|
36
|
+
("router", "put"),
|
|
37
|
+
("router", "delete"),
|
|
38
|
+
("router", "patch"),
|
|
39
|
+
("router", "head"),
|
|
40
|
+
("router", "options"),
|
|
41
|
+
("router", "route"),
|
|
42
|
+
("api", "route"),
|
|
43
|
+
("dispatch", "register"),
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
# Background task call patterns: (module_attr, func_name)
|
|
47
|
+
_BACKGROUND_TASK_CALLS: frozenset[tuple[str, str]] = frozenset({
|
|
48
|
+
("threading", "Thread"),
|
|
49
|
+
("asyncio", "create_task"),
|
|
50
|
+
("subprocess", "Popen"),
|
|
51
|
+
("subprocess", "run"),
|
|
52
|
+
("subprocess", "call"),
|
|
53
|
+
})
|
|
54
|
+
|
|
55
|
+
# Functions whose bodies are scanned for background task spawns.
|
|
56
|
+
_SCANNED_FUNC_NAMES: frozenset[str] = frozenset({
|
|
57
|
+
"__init__",
|
|
58
|
+
"bootstrap",
|
|
59
|
+
"setup",
|
|
60
|
+
"startup",
|
|
61
|
+
"start",
|
|
62
|
+
"initialize",
|
|
63
|
+
"init",
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# ---------------------------------------------------------------------------
|
|
68
|
+
# AST utility functions
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
|
|
71
|
+
def _call_name(call: ast.Call) -> str:
|
|
72
|
+
"""Return a best-effort string representation of a call target."""
|
|
73
|
+
func = call.func
|
|
74
|
+
if isinstance(func, ast.Name):
|
|
75
|
+
return func.id
|
|
76
|
+
if isinstance(func, ast.Attribute):
|
|
77
|
+
parts: list[str] = []
|
|
78
|
+
node: ast.expr = func
|
|
79
|
+
while isinstance(node, ast.Attribute):
|
|
80
|
+
parts.append(node.attr)
|
|
81
|
+
node = node.value
|
|
82
|
+
if isinstance(node, ast.Name):
|
|
83
|
+
parts.append(node.id)
|
|
84
|
+
return ".".join(reversed(parts))
|
|
85
|
+
return "<unknown>"
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _decorator_registry_tag(decorator: ast.expr) -> str | None:
|
|
89
|
+
"""Return 'decorator_registry' if the decorator matches a known route/dispatch pattern."""
|
|
90
|
+
if isinstance(decorator, ast.Attribute):
|
|
91
|
+
attr = decorator.attr
|
|
92
|
+
value = decorator.value
|
|
93
|
+
if isinstance(value, ast.Name):
|
|
94
|
+
if (value.id, attr) in _ROUTE_DECORATOR_ATTRS:
|
|
95
|
+
return "decorator_registry"
|
|
96
|
+
elif isinstance(decorator, ast.Call):
|
|
97
|
+
return _decorator_registry_tag(decorator.func)
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _is_main_guard(test: ast.expr) -> bool:
|
|
102
|
+
"""True iff *test* is the canonical ``__name__ == "__main__"`` comparison.
|
|
103
|
+
|
|
104
|
+
Matches either operand order (``__name__ == "__main__"`` and
|
|
105
|
+
``"__main__" == __name__``). A bare ``def main(): ...`` without this guard
|
|
106
|
+
is NOT an entrypoint -- this keeps the precision guard honest.
|
|
107
|
+
"""
|
|
108
|
+
if not isinstance(test, ast.Compare):
|
|
109
|
+
return False
|
|
110
|
+
if len(test.ops) != 1 or not isinstance(test.ops[0], ast.Eq):
|
|
111
|
+
return False
|
|
112
|
+
operands = [test.left, *test.comparators]
|
|
113
|
+
has_name = any(isinstance(o, ast.Name) and o.id == "__name__" for o in operands)
|
|
114
|
+
has_main = any(
|
|
115
|
+
isinstance(o, ast.Constant) and o.value == "__main__" for o in operands
|
|
116
|
+
)
|
|
117
|
+
return has_name and has_main
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _entry_calls_in_block(body: list[ast.stmt]) -> tuple[list[str], bool]:
|
|
121
|
+
"""Scan a ``__main__`` block body, returning (entry_call_names, is_async).
|
|
122
|
+
|
|
123
|
+
entry_call_names: best-effort call targets invoked from the block (e.g.
|
|
124
|
+
``main``, ``app.run``, ``asyncio.run``), deduplicated, source order.
|
|
125
|
+
is_async: True when an ``asyncio.run(...)`` call is present (async
|
|
126
|
+
entrypoint signal).
|
|
127
|
+
|
|
128
|
+
Only call expressions inside the block are inspected; a plain assignment or
|
|
129
|
+
import in the guard does not by itself name an entry function.
|
|
130
|
+
"""
|
|
131
|
+
names: list[str] = []
|
|
132
|
+
is_async = False
|
|
133
|
+
for stmt in body:
|
|
134
|
+
for node in ast.walk(stmt):
|
|
135
|
+
if not isinstance(node, ast.Call):
|
|
136
|
+
continue
|
|
137
|
+
cname = _call_name(node)
|
|
138
|
+
if cname and cname not in ("<unknown>",) and cname not in names:
|
|
139
|
+
names.append(cname)
|
|
140
|
+
if cname in ("asyncio.run", "asyncio.get_event_loop"):
|
|
141
|
+
is_async = True
|
|
142
|
+
return names, is_async
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _background_task_tag(call: ast.Call) -> str | None:
|
|
146
|
+
"""Return kind string if call is a known background task spawn, else None."""
|
|
147
|
+
func = call.func
|
|
148
|
+
if isinstance(func, ast.Attribute):
|
|
149
|
+
attr = func.attr
|
|
150
|
+
value = func.value
|
|
151
|
+
if isinstance(value, ast.Name):
|
|
152
|
+
pair = (value.id, attr)
|
|
153
|
+
if pair in _BACKGROUND_TASK_CALLS:
|
|
154
|
+
if attr == "Thread":
|
|
155
|
+
return "threading_thread"
|
|
156
|
+
if attr == "create_task":
|
|
157
|
+
return "asyncio_create_task"
|
|
158
|
+
if attr in ("Popen", "run", "call"):
|
|
159
|
+
return "subprocess_spawn"
|
|
160
|
+
return None
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _env_var_from_call(call: ast.Call) -> list[str]:
|
|
164
|
+
"""Extract env var name from os.environ.get(X), os.getenv(X)."""
|
|
165
|
+
results: list[str] = []
|
|
166
|
+
func = call.func
|
|
167
|
+
if isinstance(func, ast.Attribute):
|
|
168
|
+
attr = func.attr
|
|
169
|
+
if attr in ("get", "getenv"):
|
|
170
|
+
if call.args and isinstance(call.args[0], ast.Constant):
|
|
171
|
+
var = call.args[0].value
|
|
172
|
+
if isinstance(var, str):
|
|
173
|
+
results.append(var)
|
|
174
|
+
return results
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _subscript_env_var(node: ast.Subscript) -> str | None:
|
|
178
|
+
"""Extract env var from os.environ['VAR'] subscript."""
|
|
179
|
+
if not isinstance(node.value, ast.Attribute):
|
|
180
|
+
return None
|
|
181
|
+
attr = node.value
|
|
182
|
+
if not (attr.attr == "environ" and isinstance(attr.value, ast.Name) and attr.value.id == "os"):
|
|
183
|
+
return None
|
|
184
|
+
slice_node = node.slice
|
|
185
|
+
if isinstance(slice_node, ast.Constant) and isinstance(slice_node.value, str):
|
|
186
|
+
return slice_node.value
|
|
187
|
+
# Python 3.8 ast.Index wrapper
|
|
188
|
+
if hasattr(slice_node, "value"): # ast.Index
|
|
189
|
+
inner = slice_node.value # type: ignore[attr-defined]
|
|
190
|
+
if isinstance(inner, ast.Constant) and isinstance(inner.value, str):
|
|
191
|
+
return inner.value
|
|
192
|
+
return None
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _collect_env_vars_from_stmt(stmt: ast.stmt) -> list[str]:
|
|
196
|
+
"""Walk an assignment statement collecting os.environ reads."""
|
|
197
|
+
results: list[str] = []
|
|
198
|
+
for node in ast.walk(stmt):
|
|
199
|
+
if isinstance(node, ast.Call):
|
|
200
|
+
results.extend(_env_var_from_call(node))
|
|
201
|
+
elif isinstance(node, ast.Subscript):
|
|
202
|
+
var = _subscript_env_var(node)
|
|
203
|
+
if var:
|
|
204
|
+
results.append(var)
|
|
205
|
+
return results
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _collect_env_vars_from_expr(expr) -> list[str]:
|
|
209
|
+
"""Walk any expression collecting os.environ reads."""
|
|
210
|
+
if expr is None:
|
|
211
|
+
return []
|
|
212
|
+
results: list[str] = []
|
|
213
|
+
for node in ast.walk(expr):
|
|
214
|
+
if isinstance(node, ast.Call):
|
|
215
|
+
results.extend(_env_var_from_call(node))
|
|
216
|
+
elif isinstance(node, ast.Subscript):
|
|
217
|
+
var = _subscript_env_var(node)
|
|
218
|
+
if var:
|
|
219
|
+
results.append(var)
|
|
220
|
+
return results
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
# ---------------------------------------------------------------------------
|
|
224
|
+
# AST Visitor
|
|
225
|
+
# ---------------------------------------------------------------------------
|
|
226
|
+
|
|
227
|
+
class _RuntimeVisitor(ast.NodeVisitor):
|
|
228
|
+
"""Walk an AST and collect runtime-relevant patterns.
|
|
229
|
+
|
|
230
|
+
Collects:
|
|
231
|
+
- Module-level Call statements -> import_time_side_effects
|
|
232
|
+
- Route/dispatch decorators -> decorator_registry
|
|
233
|
+
- Background task spawns in scanned
|
|
234
|
+
function bodies -> background_task
|
|
235
|
+
- os.environ reads -> depends_on_env
|
|
236
|
+
"""
|
|
237
|
+
|
|
238
|
+
def __init__(self, rel: str) -> None:
|
|
239
|
+
self._rel = rel
|
|
240
|
+
self.results: list[dict] = []
|
|
241
|
+
self._module_env_vars: list[str] = []
|
|
242
|
+
# Names of functions invoked from a `__main__` block (entry functions).
|
|
243
|
+
self._entry_func_names: set[str] = set()
|
|
244
|
+
|
|
245
|
+
def visit_Module(self, node: ast.Module) -> None:
|
|
246
|
+
"""Visit top-level statements only (module-scope detection)."""
|
|
247
|
+
# First pass: detect `if __name__ == "__main__":` entrypoint block(s).
|
|
248
|
+
# Collect the invoked entry-function names so the corresponding
|
|
249
|
+
# module-level def can also be surfaced as an entry function.
|
|
250
|
+
for stmt in node.body:
|
|
251
|
+
if isinstance(stmt, ast.If) and _is_main_guard(stmt.test):
|
|
252
|
+
self._handle_main_block(stmt)
|
|
253
|
+
|
|
254
|
+
for stmt in node.body:
|
|
255
|
+
if isinstance(stmt, ast.Expr) and isinstance(stmt.value, ast.Call):
|
|
256
|
+
call = stmt.value
|
|
257
|
+
call_name_str = _call_name(call)
|
|
258
|
+
node_name = "%s:module" % self._rel
|
|
259
|
+
self.results.append({
|
|
260
|
+
"node": node_name,
|
|
261
|
+
"kind": "import_time_side_effect",
|
|
262
|
+
"tags": ["import_time_side_effects"],
|
|
263
|
+
"env_vars": [],
|
|
264
|
+
"side_effects": [call_name_str] if call_name_str else [],
|
|
265
|
+
"evidence": ("%s:module-level-call" % self._rel,),
|
|
266
|
+
})
|
|
267
|
+
# Also check if it's a bg task
|
|
268
|
+
bg_tag = _background_task_tag(call)
|
|
269
|
+
if bg_tag:
|
|
270
|
+
self.results.append({
|
|
271
|
+
"node": node_name,
|
|
272
|
+
"kind": bg_tag,
|
|
273
|
+
"tags": ["background_task"],
|
|
274
|
+
"env_vars": [],
|
|
275
|
+
"side_effects": [],
|
|
276
|
+
"evidence": ("%s:module-level-bg" % self._rel,),
|
|
277
|
+
})
|
|
278
|
+
elif isinstance(stmt, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
279
|
+
self._check_function(stmt)
|
|
280
|
+
elif isinstance(stmt, ast.ClassDef):
|
|
281
|
+
self._check_class(stmt)
|
|
282
|
+
|
|
283
|
+
# Collect module-level env vars from assignments
|
|
284
|
+
if isinstance(stmt, ast.Assign):
|
|
285
|
+
self._module_env_vars.extend(_collect_env_vars_from_stmt(stmt))
|
|
286
|
+
elif isinstance(stmt, ast.Expr):
|
|
287
|
+
self._module_env_vars.extend(
|
|
288
|
+
_collect_env_vars_from_expr(getattr(stmt, "value", None))
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
self._flush_module_env_vars()
|
|
292
|
+
# Do NOT call generic_visit — class/function bodies handled explicitly
|
|
293
|
+
|
|
294
|
+
def _flush_module_env_vars(self) -> None:
|
|
295
|
+
if not self._module_env_vars:
|
|
296
|
+
return
|
|
297
|
+
node_name = "%s:module" % self._rel
|
|
298
|
+
existing = [r for r in self.results if r["node"] == node_name]
|
|
299
|
+
if existing:
|
|
300
|
+
for r in existing:
|
|
301
|
+
r["env_vars"].extend(self._module_env_vars)
|
|
302
|
+
else:
|
|
303
|
+
self.results.append({
|
|
304
|
+
"node": node_name,
|
|
305
|
+
"kind": "module_env_read",
|
|
306
|
+
"tags": [],
|
|
307
|
+
"env_vars": self._module_env_vars[:],
|
|
308
|
+
"side_effects": [],
|
|
309
|
+
"evidence": ("%s:module-env" % self._rel,),
|
|
310
|
+
})
|
|
311
|
+
|
|
312
|
+
def _handle_main_block(self, if_node: ast.If) -> None:
|
|
313
|
+
"""Emit a main_entrypoint node for a `__main__` guard block.
|
|
314
|
+
|
|
315
|
+
Records the invoked entry-function names (for cross-referencing with
|
|
316
|
+
module-level defs) and flags async entrypoints (asyncio.run).
|
|
317
|
+
"""
|
|
318
|
+
entry_calls, is_async = _entry_calls_in_block(if_node.body)
|
|
319
|
+
# Remember bare-name calls (e.g. `main()`) so the module-level def of
|
|
320
|
+
# that function can be surfaced as an entry function too.
|
|
321
|
+
for cname in entry_calls:
|
|
322
|
+
if "." not in cname:
|
|
323
|
+
self._entry_func_names.add(cname)
|
|
324
|
+
line = getattr(if_node, "lineno", 0)
|
|
325
|
+
node_name = "%s:__main__" % self._rel
|
|
326
|
+
tags = ["entrypoint"]
|
|
327
|
+
if is_async:
|
|
328
|
+
tags.append("async_entrypoint")
|
|
329
|
+
self.results.append({
|
|
330
|
+
"node": node_name,
|
|
331
|
+
"kind": "main_entrypoint",
|
|
332
|
+
"tags": tags,
|
|
333
|
+
"env_vars": [],
|
|
334
|
+
"side_effects": list(entry_calls),
|
|
335
|
+
"calls": list(entry_calls),
|
|
336
|
+
"evidence": ("%s:%d" % (self._rel, line),),
|
|
337
|
+
})
|
|
338
|
+
|
|
339
|
+
def _check_class(self, class_node: ast.ClassDef) -> None:
|
|
340
|
+
for item in ast.walk(class_node):
|
|
341
|
+
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
342
|
+
self._check_function(item, class_name=class_node.name)
|
|
343
|
+
|
|
344
|
+
def _check_function(
|
|
345
|
+
self,
|
|
346
|
+
func_node: ast.FunctionDef | ast.AsyncFunctionDef,
|
|
347
|
+
class_name: str | None = None,
|
|
348
|
+
) -> None:
|
|
349
|
+
func_name = func_node.name
|
|
350
|
+
qualified = "%s.%s" % (class_name, func_name) if class_name else func_name
|
|
351
|
+
|
|
352
|
+
# Entry-function: a module-level def invoked from a `__main__` block.
|
|
353
|
+
# Only top-level functions qualify (class_name is None); a method named
|
|
354
|
+
# `main` on some class is not the script entrypoint.
|
|
355
|
+
if class_name is None and func_name in self._entry_func_names:
|
|
356
|
+
node_name = "%s:%s" % (self._rel, qualified)
|
|
357
|
+
is_async = isinstance(func_node, ast.AsyncFunctionDef)
|
|
358
|
+
tags = ["entrypoint", "entry_function"]
|
|
359
|
+
if is_async:
|
|
360
|
+
tags.append("async_entrypoint")
|
|
361
|
+
self.results.append({
|
|
362
|
+
"node": node_name,
|
|
363
|
+
"kind": "entry_function",
|
|
364
|
+
"tags": tags,
|
|
365
|
+
"env_vars": [],
|
|
366
|
+
"side_effects": [],
|
|
367
|
+
"calls": [],
|
|
368
|
+
"evidence": ("%s:%d" % (self._rel, getattr(func_node, "lineno", 0)),),
|
|
369
|
+
})
|
|
370
|
+
|
|
371
|
+
# Decorators check
|
|
372
|
+
for decorator in func_node.decorator_list:
|
|
373
|
+
if _decorator_registry_tag(decorator):
|
|
374
|
+
node_name = "%s:%s" % (self._rel, qualified)
|
|
375
|
+
self.results.append({
|
|
376
|
+
"node": node_name,
|
|
377
|
+
"kind": "decorator_registry",
|
|
378
|
+
"tags": ["decorator_registry"],
|
|
379
|
+
"env_vars": [],
|
|
380
|
+
"side_effects": [],
|
|
381
|
+
"evidence": ("%s:decorator" % node_name,),
|
|
382
|
+
})
|
|
383
|
+
break
|
|
384
|
+
|
|
385
|
+
# Body scan for scanned functions
|
|
386
|
+
if func_name not in _SCANNED_FUNC_NAMES:
|
|
387
|
+
return
|
|
388
|
+
|
|
389
|
+
env_vars: list[str] = []
|
|
390
|
+
for stmt in ast.walk(func_node):
|
|
391
|
+
# Collect Call nodes for bg-task detection (both Expr and Assign rhs)
|
|
392
|
+
calls_in_stmt: list[ast.Call] = []
|
|
393
|
+
if isinstance(stmt, ast.Expr) and isinstance(
|
|
394
|
+
getattr(stmt, "value", None), ast.Call
|
|
395
|
+
):
|
|
396
|
+
calls_in_stmt.append(stmt.value) # type: ignore[arg-type]
|
|
397
|
+
elif isinstance(stmt, ast.Assign) and isinstance(stmt.value, ast.Call):
|
|
398
|
+
calls_in_stmt.append(stmt.value)
|
|
399
|
+
for inner in ast.walk(stmt.value):
|
|
400
|
+
if inner is not stmt.value and isinstance(inner, ast.Call):
|
|
401
|
+
calls_in_stmt.append(inner)
|
|
402
|
+
|
|
403
|
+
for call in calls_in_stmt:
|
|
404
|
+
bg_tag = _background_task_tag(call)
|
|
405
|
+
if bg_tag:
|
|
406
|
+
node_name = "%s:%s" % (self._rel, qualified)
|
|
407
|
+
self.results.append({
|
|
408
|
+
"node": node_name,
|
|
409
|
+
"kind": bg_tag,
|
|
410
|
+
"tags": ["background_task"],
|
|
411
|
+
"env_vars": [],
|
|
412
|
+
"side_effects": [],
|
|
413
|
+
"evidence": ("%s:bg-task" % node_name,),
|
|
414
|
+
})
|
|
415
|
+
|
|
416
|
+
# Env vars
|
|
417
|
+
if isinstance(stmt, ast.Assign):
|
|
418
|
+
env_vars.extend(_collect_env_vars_from_stmt(stmt))
|
|
419
|
+
elif isinstance(stmt, ast.Expr):
|
|
420
|
+
env_vars.extend(_collect_env_vars_from_expr(getattr(stmt, "value", None)))
|
|
421
|
+
if isinstance(stmt, ast.Call):
|
|
422
|
+
env_vars.extend(_env_var_from_call(stmt))
|
|
423
|
+
|
|
424
|
+
if env_vars:
|
|
425
|
+
node_name = "%s:%s" % (self._rel, qualified)
|
|
426
|
+
existing = [r for r in self.results if r["node"] == node_name]
|
|
427
|
+
if existing:
|
|
428
|
+
for r in existing:
|
|
429
|
+
r["env_vars"].extend(env_vars)
|
|
430
|
+
else:
|
|
431
|
+
self.results.append({
|
|
432
|
+
"node": node_name,
|
|
433
|
+
"kind": "env_read",
|
|
434
|
+
"tags": [],
|
|
435
|
+
"env_vars": env_vars,
|
|
436
|
+
"side_effects": [],
|
|
437
|
+
"evidence": ("%s:env" % node_name,),
|
|
438
|
+
})
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""TS/JS adapter runtime signal dispatch -- Map 2 helper.
|
|
2
|
+
|
|
3
|
+
Converts TSRuntimeSignal -> RuntimeNode for all adapters with
|
|
4
|
+
supports_runtime_signals=True (excluding Python which uses the AST path).
|
|
5
|
+
Called by runtime_builder.build_runtime_map_static.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Callable
|
|
12
|
+
|
|
13
|
+
from .map_models import RuntimeNode
|
|
14
|
+
|
|
15
|
+
_log = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
_KIND_MAP: dict[str, dict] = {
|
|
18
|
+
"framework_route": {"node_kind": "api_route", "side_effects": ("http_handler",), "tags": ("nextjs", "framework_route")},
|
|
19
|
+
"middleware": {"node_kind": "middleware", "side_effects": ("request_intercept",), "tags": ("nextjs",)},
|
|
20
|
+
"module_init": {"node_kind": "init", "side_effects": (), "tags": ("module_init",)},
|
|
21
|
+
"background_job": {"node_kind": "worker", "side_effects": (), "tags": ("background_job",)},
|
|
22
|
+
"env_access": {"node_kind": "env_access", "side_effects": (), "tags": ("env",)},
|
|
23
|
+
# Go runtime kinds
|
|
24
|
+
"init_function": {"node_kind": "init", "side_effects": ("import_time",), "tags": ("go", "init")},
|
|
25
|
+
"goroutine_spawn": {"node_kind": "worker", "side_effects": ("concurrency",), "tags": ("go", "goroutine")},
|
|
26
|
+
"package_init": {"node_kind": "init", "side_effects": ("import_time",), "tags": ("go", "package_var")},
|
|
27
|
+
# Java runtime kinds
|
|
28
|
+
"static_block": {"node_kind": "init", "side_effects": ("import_time",), "tags": ("java", "static_block")},
|
|
29
|
+
"spring_component": {"node_kind": "init", "side_effects": ("di_registration",), "tags": ("java", "spring")},
|
|
30
|
+
"thread_spawn": {"node_kind": "worker", "side_effects": ("concurrency",), "tags": ("java", "thread")},
|
|
31
|
+
# JavaScript runtime kinds
|
|
32
|
+
"timer": {"node_kind": "worker", "side_effects": ("scheduled",), "tags": ("js", "timer")},
|
|
33
|
+
"event_listener": {"node_kind": "init", "side_effects": ("event_binding",), "tags": ("js", "event")},
|
|
34
|
+
"top_level_effect": {"node_kind": "init", "side_effects": ("import_time",), "tags": ("js", "top_level")},
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _signal_to_node(signal: object, freshness_fn: Callable[[], str]) -> RuntimeNode | None:
|
|
39
|
+
"""Convert one TSRuntimeSignal to a RuntimeNode. Returns None for unknown kinds."""
|
|
40
|
+
kind = getattr(signal, "kind", "")
|
|
41
|
+
file_posix = getattr(signal, "file", "")
|
|
42
|
+
line = getattr(signal, "line", 0)
|
|
43
|
+
confidence = getattr(signal, "confidence", 0.7)
|
|
44
|
+
payload = getattr(signal, "payload", {})
|
|
45
|
+
|
|
46
|
+
mapping = _KIND_MAP.get(kind)
|
|
47
|
+
if mapping is None:
|
|
48
|
+
_log.debug("_signal_to_node: unknown kind %r -- skipping", kind)
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
if kind == "framework_route":
|
|
52
|
+
methods = "|".join(payload.get("http_methods", ["*"]))
|
|
53
|
+
node_id = payload.get("route_path", file_posix) + ":" + methods
|
|
54
|
+
depends_on_env: tuple[str, ...] = ()
|
|
55
|
+
elif kind == "middleware":
|
|
56
|
+
node_id = file_posix + ":middleware"
|
|
57
|
+
depends_on_env = ()
|
|
58
|
+
elif kind == "module_init":
|
|
59
|
+
node_id = file_posix + ":server_init"
|
|
60
|
+
depends_on_env = ()
|
|
61
|
+
elif kind == "background_job":
|
|
62
|
+
node_id = file_posix + ":" + payload.get("call", "job")
|
|
63
|
+
depends_on_env = ()
|
|
64
|
+
elif kind == "env_access":
|
|
65
|
+
env_var = payload.get("env_var", "")
|
|
66
|
+
node_id = "env:" + env_var
|
|
67
|
+
depends_on_env = (env_var,) if env_var else ()
|
|
68
|
+
else:
|
|
69
|
+
# Generic fallback for _KIND_MAP entries not handled by specific branches above
|
|
70
|
+
# (e.g. Go kinds: init_function, goroutine_spawn, package_init).
|
|
71
|
+
# node_id uses payload["call"] when present, falling back to kind.
|
|
72
|
+
call = payload.get("call", kind)
|
|
73
|
+
node_id = file_posix + ":" + call
|
|
74
|
+
depends_on_env = ()
|
|
75
|
+
|
|
76
|
+
return RuntimeNode(
|
|
77
|
+
node=node_id,
|
|
78
|
+
defined_in=file_posix,
|
|
79
|
+
kind=mapping["node_kind"],
|
|
80
|
+
calls=(),
|
|
81
|
+
side_effects=mapping["side_effects"],
|
|
82
|
+
depends_on_env=depends_on_env,
|
|
83
|
+
order_constraints=(),
|
|
84
|
+
hidden_runtime_dependencies=(),
|
|
85
|
+
tags=mapping["tags"],
|
|
86
|
+
source="ts_regex_adapter",
|
|
87
|
+
evidence=(f"{kind}:line{line}",),
|
|
88
|
+
confidence=confidence,
|
|
89
|
+
freshness=freshness_fn(),
|
|
90
|
+
status="inferred",
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def collect_adapter_runtime_nodes(
|
|
95
|
+
project_dir: Path,
|
|
96
|
+
freshness_fn: Callable[[], str],
|
|
97
|
+
) -> list[RuntimeNode]:
|
|
98
|
+
"""Collect RuntimeNode objects from TS/JS adapter runtime signals.
|
|
99
|
+
|
|
100
|
+
Iterates source files for adapters with supports_runtime_signals=True
|
|
101
|
+
(non-Python), calls extract_runtime(), and converts signals to RuntimeNodes.
|
|
102
|
+
"""
|
|
103
|
+
from .map_common import iter_source_files # noqa: PLC0415
|
|
104
|
+
from .source_adapters import ADAPTERS # noqa: PLC0415
|
|
105
|
+
|
|
106
|
+
nodes: list[RuntimeNode] = []
|
|
107
|
+
|
|
108
|
+
runtime_adapters = {
|
|
109
|
+
ext: adapter
|
|
110
|
+
for ext, adapter in ADAPTERS.items()
|
|
111
|
+
if getattr(adapter, "supports_runtime_signals", False)
|
|
112
|
+
and getattr(adapter, "language", "") != "python"
|
|
113
|
+
}
|
|
114
|
+
if not runtime_adapters:
|
|
115
|
+
return nodes
|
|
116
|
+
|
|
117
|
+
languages = list({adapter.language for adapter in runtime_adapters.values()})
|
|
118
|
+
_log.debug("collect_adapter_runtime_nodes: languages=%r", languages)
|
|
119
|
+
|
|
120
|
+
for src_file in iter_source_files(project_dir, languages=languages):
|
|
121
|
+
adapter = ADAPTERS.get(src_file.suffix.lower())
|
|
122
|
+
if adapter is None or not getattr(adapter, "supports_runtime_signals", False):
|
|
123
|
+
continue
|
|
124
|
+
try:
|
|
125
|
+
content = src_file.read_text(encoding="utf-8", errors="replace")
|
|
126
|
+
signals = adapter.extract_runtime(content, src_file)
|
|
127
|
+
except Exception as exc: # noqa: BLE001
|
|
128
|
+
_log.error("collect_adapter_runtime_nodes: failed for %s: %s", src_file, exc)
|
|
129
|
+
continue
|
|
130
|
+
|
|
131
|
+
for sig in signals:
|
|
132
|
+
node = _signal_to_node(sig, freshness_fn)
|
|
133
|
+
if node is not None:
|
|
134
|
+
nodes.append(node)
|
|
135
|
+
|
|
136
|
+
_log.debug("collect_adapter_runtime_nodes: collected %d nodes", len(nodes))
|
|
137
|
+
return nodes
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""Shared helpers for seed bootstrap and adoption modules.
|
|
2
|
+
|
|
3
|
+
Extracted to avoid circular imports between seed_bootstrapper <-> seed_adoption.
|
|
4
|
+
Not a public API -- consumers are seed_bootstrapper.py and seed_adoption.py only.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from .map_storage import _atomic_write_json
|
|
13
|
+
import logging
|
|
14
|
+
_log = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
# Required top-level key per seed type (in addition to schema_version)
|
|
17
|
+
SEED_REQUIRED_KEY: dict[str, str] = {
|
|
18
|
+
"authority_domains": "domains",
|
|
19
|
+
"sanctioned_assets": "patterns",
|
|
20
|
+
"data_contract_priorities": "priority_entities",
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def validate_seed_schema(seed_name: str, data: Any) -> bool:
|
|
25
|
+
"""Minimal validation: schema_version present + required top-level key."""
|
|
26
|
+
if not isinstance(data, dict):
|
|
27
|
+
return False
|
|
28
|
+
if "schema_version" not in data:
|
|
29
|
+
return False
|
|
30
|
+
required_key = SEED_REQUIRED_KEY.get(seed_name)
|
|
31
|
+
if required_key and required_key not in data:
|
|
32
|
+
return False
|
|
33
|
+
return True
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def gather_minimal_context(project_dir: Path) -> dict[str, str]:
|
|
37
|
+
"""Lightweight 2-level directory tree + pyproject.toml snippet."""
|
|
38
|
+
tree_lines: list[str] = []
|
|
39
|
+
try:
|
|
40
|
+
skip_names = {"__pycache__", "node_modules", "venv", ".venv", ".git"}
|
|
41
|
+
for entry in sorted(project_dir.iterdir()):
|
|
42
|
+
if entry.name.startswith(".") or entry.name in skip_names:
|
|
43
|
+
continue
|
|
44
|
+
tree_lines.append(entry.name + ("/" if entry.is_dir() else ""))
|
|
45
|
+
if entry.is_dir():
|
|
46
|
+
try:
|
|
47
|
+
for sub in sorted(entry.iterdir())[:20]:
|
|
48
|
+
if sub.name.startswith(".") or sub.name in skip_names:
|
|
49
|
+
continue
|
|
50
|
+
tree_lines.append(
|
|
51
|
+
" " + sub.name + ("/" if sub.is_dir() else "")
|
|
52
|
+
)
|
|
53
|
+
except (OSError, PermissionError):
|
|
54
|
+
continue
|
|
55
|
+
except (OSError, PermissionError):
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
pyproj_path = project_dir / "pyproject.toml"
|
|
59
|
+
pyproj = (
|
|
60
|
+
pyproj_path.read_text(encoding="utf-8")[:2000]
|
|
61
|
+
if pyproj_path.exists()
|
|
62
|
+
else ""
|
|
63
|
+
)
|
|
64
|
+
return {"tree": "\n".join(tree_lines), "pyproject": pyproj}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def load_seed_state(state_path: Path) -> dict[str, Any]:
|
|
68
|
+
"""Load .bootstrap_state.json; return empty scaffold if missing or corrupt."""
|
|
69
|
+
if not state_path.exists():
|
|
70
|
+
return {"schema_version": "1.0.0", "seeds": {}}
|
|
71
|
+
try:
|
|
72
|
+
data = json.loads(state_path.read_text(encoding="utf-8"))
|
|
73
|
+
if not isinstance(data, dict):
|
|
74
|
+
return {"schema_version": "1.0.0", "seeds": {}}
|
|
75
|
+
return data
|
|
76
|
+
except (json.JSONDecodeError, OSError):
|
|
77
|
+
return {"schema_version": "1.0.0", "seeds": {}}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def save_seed_state(state_path: Path, state: dict[str, Any]) -> None:
|
|
81
|
+
"""Persist bootstrap/adoption state atomically."""
|
|
82
|
+
_atomic_write_json(state_path, state)
|