vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,438 @@
1
+ """AST visitor and utilities for runtime_builder.py (Map 2 static scanner).
2
+
3
+ Internal module -- not part of the public API.
4
+ Extracted from runtime_builder.py to keep each file under 400 lines.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import ast
9
+ import logging
10
+ _log = logging.getLogger(__name__)
11
+
12
+ __all__ = [
13
+ "_RuntimeVisitor",
14
+ "_call_name",
15
+ "_decorator_registry_tag",
16
+ "_background_task_tag",
17
+ "_env_var_from_call",
18
+ "_collect_env_vars_from_stmt",
19
+ "_collect_env_vars_from_expr",
20
+ "_ROUTE_DECORATOR_ATTRS",
21
+ "_BACKGROUND_TASK_CALLS",
22
+ "_SCANNED_FUNC_NAMES",
23
+ ]
24
+
25
+ # ---------------------------------------------------------------------------
26
+ # Pattern constants
27
+ # ---------------------------------------------------------------------------
28
+
29
+ # Decorator attribute chains that signal route/dispatch registration.
30
+ _ROUTE_DECORATOR_ATTRS: frozenset[tuple[str, str]] = frozenset({
31
+ ("app", "route"),
32
+ ("bp", "route"),
33
+ ("blueprint", "route"),
34
+ ("router", "get"),
35
+ ("router", "post"),
36
+ ("router", "put"),
37
+ ("router", "delete"),
38
+ ("router", "patch"),
39
+ ("router", "head"),
40
+ ("router", "options"),
41
+ ("router", "route"),
42
+ ("api", "route"),
43
+ ("dispatch", "register"),
44
+ })
45
+
46
+ # Background task call patterns: (module_attr, func_name)
47
+ _BACKGROUND_TASK_CALLS: frozenset[tuple[str, str]] = frozenset({
48
+ ("threading", "Thread"),
49
+ ("asyncio", "create_task"),
50
+ ("subprocess", "Popen"),
51
+ ("subprocess", "run"),
52
+ ("subprocess", "call"),
53
+ })
54
+
55
+ # Functions whose bodies are scanned for background task spawns.
56
+ _SCANNED_FUNC_NAMES: frozenset[str] = frozenset({
57
+ "__init__",
58
+ "bootstrap",
59
+ "setup",
60
+ "startup",
61
+ "start",
62
+ "initialize",
63
+ "init",
64
+ })
65
+
66
+
67
+ # ---------------------------------------------------------------------------
68
+ # AST utility functions
69
+ # ---------------------------------------------------------------------------
70
+
71
+ def _call_name(call: ast.Call) -> str:
72
+ """Return a best-effort string representation of a call target."""
73
+ func = call.func
74
+ if isinstance(func, ast.Name):
75
+ return func.id
76
+ if isinstance(func, ast.Attribute):
77
+ parts: list[str] = []
78
+ node: ast.expr = func
79
+ while isinstance(node, ast.Attribute):
80
+ parts.append(node.attr)
81
+ node = node.value
82
+ if isinstance(node, ast.Name):
83
+ parts.append(node.id)
84
+ return ".".join(reversed(parts))
85
+ return "<unknown>"
86
+
87
+
88
+ def _decorator_registry_tag(decorator: ast.expr) -> str | None:
89
+ """Return 'decorator_registry' if the decorator matches a known route/dispatch pattern."""
90
+ if isinstance(decorator, ast.Attribute):
91
+ attr = decorator.attr
92
+ value = decorator.value
93
+ if isinstance(value, ast.Name):
94
+ if (value.id, attr) in _ROUTE_DECORATOR_ATTRS:
95
+ return "decorator_registry"
96
+ elif isinstance(decorator, ast.Call):
97
+ return _decorator_registry_tag(decorator.func)
98
+ return None
99
+
100
+
101
+ def _is_main_guard(test: ast.expr) -> bool:
102
+ """True iff *test* is the canonical ``__name__ == "__main__"`` comparison.
103
+
104
+ Matches either operand order (``__name__ == "__main__"`` and
105
+ ``"__main__" == __name__``). A bare ``def main(): ...`` without this guard
106
+ is NOT an entrypoint -- this keeps the precision guard honest.
107
+ """
108
+ if not isinstance(test, ast.Compare):
109
+ return False
110
+ if len(test.ops) != 1 or not isinstance(test.ops[0], ast.Eq):
111
+ return False
112
+ operands = [test.left, *test.comparators]
113
+ has_name = any(isinstance(o, ast.Name) and o.id == "__name__" for o in operands)
114
+ has_main = any(
115
+ isinstance(o, ast.Constant) and o.value == "__main__" for o in operands
116
+ )
117
+ return has_name and has_main
118
+
119
+
120
+ def _entry_calls_in_block(body: list[ast.stmt]) -> tuple[list[str], bool]:
121
+ """Scan a ``__main__`` block body, returning (entry_call_names, is_async).
122
+
123
+ entry_call_names: best-effort call targets invoked from the block (e.g.
124
+ ``main``, ``app.run``, ``asyncio.run``), deduplicated, source order.
125
+ is_async: True when an ``asyncio.run(...)`` call is present (async
126
+ entrypoint signal).
127
+
128
+ Only call expressions inside the block are inspected; a plain assignment or
129
+ import in the guard does not by itself name an entry function.
130
+ """
131
+ names: list[str] = []
132
+ is_async = False
133
+ for stmt in body:
134
+ for node in ast.walk(stmt):
135
+ if not isinstance(node, ast.Call):
136
+ continue
137
+ cname = _call_name(node)
138
+ if cname and cname not in ("<unknown>",) and cname not in names:
139
+ names.append(cname)
140
+ if cname in ("asyncio.run", "asyncio.get_event_loop"):
141
+ is_async = True
142
+ return names, is_async
143
+
144
+
145
+ def _background_task_tag(call: ast.Call) -> str | None:
146
+ """Return kind string if call is a known background task spawn, else None."""
147
+ func = call.func
148
+ if isinstance(func, ast.Attribute):
149
+ attr = func.attr
150
+ value = func.value
151
+ if isinstance(value, ast.Name):
152
+ pair = (value.id, attr)
153
+ if pair in _BACKGROUND_TASK_CALLS:
154
+ if attr == "Thread":
155
+ return "threading_thread"
156
+ if attr == "create_task":
157
+ return "asyncio_create_task"
158
+ if attr in ("Popen", "run", "call"):
159
+ return "subprocess_spawn"
160
+ return None
161
+
162
+
163
+ def _env_var_from_call(call: ast.Call) -> list[str]:
164
+ """Extract env var name from os.environ.get(X), os.getenv(X)."""
165
+ results: list[str] = []
166
+ func = call.func
167
+ if isinstance(func, ast.Attribute):
168
+ attr = func.attr
169
+ if attr in ("get", "getenv"):
170
+ if call.args and isinstance(call.args[0], ast.Constant):
171
+ var = call.args[0].value
172
+ if isinstance(var, str):
173
+ results.append(var)
174
+ return results
175
+
176
+
177
+ def _subscript_env_var(node: ast.Subscript) -> str | None:
178
+ """Extract env var from os.environ['VAR'] subscript."""
179
+ if not isinstance(node.value, ast.Attribute):
180
+ return None
181
+ attr = node.value
182
+ if not (attr.attr == "environ" and isinstance(attr.value, ast.Name) and attr.value.id == "os"):
183
+ return None
184
+ slice_node = node.slice
185
+ if isinstance(slice_node, ast.Constant) and isinstance(slice_node.value, str):
186
+ return slice_node.value
187
+ # Python 3.8 ast.Index wrapper
188
+ if hasattr(slice_node, "value"): # ast.Index
189
+ inner = slice_node.value # type: ignore[attr-defined]
190
+ if isinstance(inner, ast.Constant) and isinstance(inner.value, str):
191
+ return inner.value
192
+ return None
193
+
194
+
195
+ def _collect_env_vars_from_stmt(stmt: ast.stmt) -> list[str]:
196
+ """Walk an assignment statement collecting os.environ reads."""
197
+ results: list[str] = []
198
+ for node in ast.walk(stmt):
199
+ if isinstance(node, ast.Call):
200
+ results.extend(_env_var_from_call(node))
201
+ elif isinstance(node, ast.Subscript):
202
+ var = _subscript_env_var(node)
203
+ if var:
204
+ results.append(var)
205
+ return results
206
+
207
+
208
+ def _collect_env_vars_from_expr(expr) -> list[str]:
209
+ """Walk any expression collecting os.environ reads."""
210
+ if expr is None:
211
+ return []
212
+ results: list[str] = []
213
+ for node in ast.walk(expr):
214
+ if isinstance(node, ast.Call):
215
+ results.extend(_env_var_from_call(node))
216
+ elif isinstance(node, ast.Subscript):
217
+ var = _subscript_env_var(node)
218
+ if var:
219
+ results.append(var)
220
+ return results
221
+
222
+
223
+ # ---------------------------------------------------------------------------
224
+ # AST Visitor
225
+ # ---------------------------------------------------------------------------
226
+
227
+ class _RuntimeVisitor(ast.NodeVisitor):
228
+ """Walk an AST and collect runtime-relevant patterns.
229
+
230
+ Collects:
231
+ - Module-level Call statements -> import_time_side_effects
232
+ - Route/dispatch decorators -> decorator_registry
233
+ - Background task spawns in scanned
234
+ function bodies -> background_task
235
+ - os.environ reads -> depends_on_env
236
+ """
237
+
238
+ def __init__(self, rel: str) -> None:
239
+ self._rel = rel
240
+ self.results: list[dict] = []
241
+ self._module_env_vars: list[str] = []
242
+ # Names of functions invoked from a `__main__` block (entry functions).
243
+ self._entry_func_names: set[str] = set()
244
+
245
+ def visit_Module(self, node: ast.Module) -> None:
246
+ """Visit top-level statements only (module-scope detection)."""
247
+ # First pass: detect `if __name__ == "__main__":` entrypoint block(s).
248
+ # Collect the invoked entry-function names so the corresponding
249
+ # module-level def can also be surfaced as an entry function.
250
+ for stmt in node.body:
251
+ if isinstance(stmt, ast.If) and _is_main_guard(stmt.test):
252
+ self._handle_main_block(stmt)
253
+
254
+ for stmt in node.body:
255
+ if isinstance(stmt, ast.Expr) and isinstance(stmt.value, ast.Call):
256
+ call = stmt.value
257
+ call_name_str = _call_name(call)
258
+ node_name = "%s:module" % self._rel
259
+ self.results.append({
260
+ "node": node_name,
261
+ "kind": "import_time_side_effect",
262
+ "tags": ["import_time_side_effects"],
263
+ "env_vars": [],
264
+ "side_effects": [call_name_str] if call_name_str else [],
265
+ "evidence": ("%s:module-level-call" % self._rel,),
266
+ })
267
+ # Also check if it's a bg task
268
+ bg_tag = _background_task_tag(call)
269
+ if bg_tag:
270
+ self.results.append({
271
+ "node": node_name,
272
+ "kind": bg_tag,
273
+ "tags": ["background_task"],
274
+ "env_vars": [],
275
+ "side_effects": [],
276
+ "evidence": ("%s:module-level-bg" % self._rel,),
277
+ })
278
+ elif isinstance(stmt, (ast.FunctionDef, ast.AsyncFunctionDef)):
279
+ self._check_function(stmt)
280
+ elif isinstance(stmt, ast.ClassDef):
281
+ self._check_class(stmt)
282
+
283
+ # Collect module-level env vars from assignments
284
+ if isinstance(stmt, ast.Assign):
285
+ self._module_env_vars.extend(_collect_env_vars_from_stmt(stmt))
286
+ elif isinstance(stmt, ast.Expr):
287
+ self._module_env_vars.extend(
288
+ _collect_env_vars_from_expr(getattr(stmt, "value", None))
289
+ )
290
+
291
+ self._flush_module_env_vars()
292
+ # Do NOT call generic_visit — class/function bodies handled explicitly
293
+
294
+ def _flush_module_env_vars(self) -> None:
295
+ if not self._module_env_vars:
296
+ return
297
+ node_name = "%s:module" % self._rel
298
+ existing = [r for r in self.results if r["node"] == node_name]
299
+ if existing:
300
+ for r in existing:
301
+ r["env_vars"].extend(self._module_env_vars)
302
+ else:
303
+ self.results.append({
304
+ "node": node_name,
305
+ "kind": "module_env_read",
306
+ "tags": [],
307
+ "env_vars": self._module_env_vars[:],
308
+ "side_effects": [],
309
+ "evidence": ("%s:module-env" % self._rel,),
310
+ })
311
+
312
+ def _handle_main_block(self, if_node: ast.If) -> None:
313
+ """Emit a main_entrypoint node for a `__main__` guard block.
314
+
315
+ Records the invoked entry-function names (for cross-referencing with
316
+ module-level defs) and flags async entrypoints (asyncio.run).
317
+ """
318
+ entry_calls, is_async = _entry_calls_in_block(if_node.body)
319
+ # Remember bare-name calls (e.g. `main()`) so the module-level def of
320
+ # that function can be surfaced as an entry function too.
321
+ for cname in entry_calls:
322
+ if "." not in cname:
323
+ self._entry_func_names.add(cname)
324
+ line = getattr(if_node, "lineno", 0)
325
+ node_name = "%s:__main__" % self._rel
326
+ tags = ["entrypoint"]
327
+ if is_async:
328
+ tags.append("async_entrypoint")
329
+ self.results.append({
330
+ "node": node_name,
331
+ "kind": "main_entrypoint",
332
+ "tags": tags,
333
+ "env_vars": [],
334
+ "side_effects": list(entry_calls),
335
+ "calls": list(entry_calls),
336
+ "evidence": ("%s:%d" % (self._rel, line),),
337
+ })
338
+
339
+ def _check_class(self, class_node: ast.ClassDef) -> None:
340
+ for item in ast.walk(class_node):
341
+ if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
342
+ self._check_function(item, class_name=class_node.name)
343
+
344
+ def _check_function(
345
+ self,
346
+ func_node: ast.FunctionDef | ast.AsyncFunctionDef,
347
+ class_name: str | None = None,
348
+ ) -> None:
349
+ func_name = func_node.name
350
+ qualified = "%s.%s" % (class_name, func_name) if class_name else func_name
351
+
352
+ # Entry-function: a module-level def invoked from a `__main__` block.
353
+ # Only top-level functions qualify (class_name is None); a method named
354
+ # `main` on some class is not the script entrypoint.
355
+ if class_name is None and func_name in self._entry_func_names:
356
+ node_name = "%s:%s" % (self._rel, qualified)
357
+ is_async = isinstance(func_node, ast.AsyncFunctionDef)
358
+ tags = ["entrypoint", "entry_function"]
359
+ if is_async:
360
+ tags.append("async_entrypoint")
361
+ self.results.append({
362
+ "node": node_name,
363
+ "kind": "entry_function",
364
+ "tags": tags,
365
+ "env_vars": [],
366
+ "side_effects": [],
367
+ "calls": [],
368
+ "evidence": ("%s:%d" % (self._rel, getattr(func_node, "lineno", 0)),),
369
+ })
370
+
371
+ # Decorators check
372
+ for decorator in func_node.decorator_list:
373
+ if _decorator_registry_tag(decorator):
374
+ node_name = "%s:%s" % (self._rel, qualified)
375
+ self.results.append({
376
+ "node": node_name,
377
+ "kind": "decorator_registry",
378
+ "tags": ["decorator_registry"],
379
+ "env_vars": [],
380
+ "side_effects": [],
381
+ "evidence": ("%s:decorator" % node_name,),
382
+ })
383
+ break
384
+
385
+ # Body scan for scanned functions
386
+ if func_name not in _SCANNED_FUNC_NAMES:
387
+ return
388
+
389
+ env_vars: list[str] = []
390
+ for stmt in ast.walk(func_node):
391
+ # Collect Call nodes for bg-task detection (both Expr and Assign rhs)
392
+ calls_in_stmt: list[ast.Call] = []
393
+ if isinstance(stmt, ast.Expr) and isinstance(
394
+ getattr(stmt, "value", None), ast.Call
395
+ ):
396
+ calls_in_stmt.append(stmt.value) # type: ignore[arg-type]
397
+ elif isinstance(stmt, ast.Assign) and isinstance(stmt.value, ast.Call):
398
+ calls_in_stmt.append(stmt.value)
399
+ for inner in ast.walk(stmt.value):
400
+ if inner is not stmt.value and isinstance(inner, ast.Call):
401
+ calls_in_stmt.append(inner)
402
+
403
+ for call in calls_in_stmt:
404
+ bg_tag = _background_task_tag(call)
405
+ if bg_tag:
406
+ node_name = "%s:%s" % (self._rel, qualified)
407
+ self.results.append({
408
+ "node": node_name,
409
+ "kind": bg_tag,
410
+ "tags": ["background_task"],
411
+ "env_vars": [],
412
+ "side_effects": [],
413
+ "evidence": ("%s:bg-task" % node_name,),
414
+ })
415
+
416
+ # Env vars
417
+ if isinstance(stmt, ast.Assign):
418
+ env_vars.extend(_collect_env_vars_from_stmt(stmt))
419
+ elif isinstance(stmt, ast.Expr):
420
+ env_vars.extend(_collect_env_vars_from_expr(getattr(stmt, "value", None)))
421
+ if isinstance(stmt, ast.Call):
422
+ env_vars.extend(_env_var_from_call(stmt))
423
+
424
+ if env_vars:
425
+ node_name = "%s:%s" % (self._rel, qualified)
426
+ existing = [r for r in self.results if r["node"] == node_name]
427
+ if existing:
428
+ for r in existing:
429
+ r["env_vars"].extend(env_vars)
430
+ else:
431
+ self.results.append({
432
+ "node": node_name,
433
+ "kind": "env_read",
434
+ "tags": [],
435
+ "env_vars": env_vars,
436
+ "side_effects": [],
437
+ "evidence": ("%s:env" % node_name,),
438
+ })
@@ -0,0 +1,137 @@
1
+ """TS/JS adapter runtime signal dispatch -- Map 2 helper.
2
+
3
+ Converts TSRuntimeSignal -> RuntimeNode for all adapters with
4
+ supports_runtime_signals=True (excluding Python which uses the AST path).
5
+ Called by runtime_builder.build_runtime_map_static.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ from pathlib import Path
11
+ from typing import Callable
12
+
13
+ from .map_models import RuntimeNode
14
+
15
+ _log = logging.getLogger(__name__)
16
+
17
+ _KIND_MAP: dict[str, dict] = {
18
+ "framework_route": {"node_kind": "api_route", "side_effects": ("http_handler",), "tags": ("nextjs", "framework_route")},
19
+ "middleware": {"node_kind": "middleware", "side_effects": ("request_intercept",), "tags": ("nextjs",)},
20
+ "module_init": {"node_kind": "init", "side_effects": (), "tags": ("module_init",)},
21
+ "background_job": {"node_kind": "worker", "side_effects": (), "tags": ("background_job",)},
22
+ "env_access": {"node_kind": "env_access", "side_effects": (), "tags": ("env",)},
23
+ # Go runtime kinds
24
+ "init_function": {"node_kind": "init", "side_effects": ("import_time",), "tags": ("go", "init")},
25
+ "goroutine_spawn": {"node_kind": "worker", "side_effects": ("concurrency",), "tags": ("go", "goroutine")},
26
+ "package_init": {"node_kind": "init", "side_effects": ("import_time",), "tags": ("go", "package_var")},
27
+ # Java runtime kinds
28
+ "static_block": {"node_kind": "init", "side_effects": ("import_time",), "tags": ("java", "static_block")},
29
+ "spring_component": {"node_kind": "init", "side_effects": ("di_registration",), "tags": ("java", "spring")},
30
+ "thread_spawn": {"node_kind": "worker", "side_effects": ("concurrency",), "tags": ("java", "thread")},
31
+ # JavaScript runtime kinds
32
+ "timer": {"node_kind": "worker", "side_effects": ("scheduled",), "tags": ("js", "timer")},
33
+ "event_listener": {"node_kind": "init", "side_effects": ("event_binding",), "tags": ("js", "event")},
34
+ "top_level_effect": {"node_kind": "init", "side_effects": ("import_time",), "tags": ("js", "top_level")},
35
+ }
36
+
37
+
38
+ def _signal_to_node(signal: object, freshness_fn: Callable[[], str]) -> RuntimeNode | None:
39
+ """Convert one TSRuntimeSignal to a RuntimeNode. Returns None for unknown kinds."""
40
+ kind = getattr(signal, "kind", "")
41
+ file_posix = getattr(signal, "file", "")
42
+ line = getattr(signal, "line", 0)
43
+ confidence = getattr(signal, "confidence", 0.7)
44
+ payload = getattr(signal, "payload", {})
45
+
46
+ mapping = _KIND_MAP.get(kind)
47
+ if mapping is None:
48
+ _log.debug("_signal_to_node: unknown kind %r -- skipping", kind)
49
+ return None
50
+
51
+ if kind == "framework_route":
52
+ methods = "|".join(payload.get("http_methods", ["*"]))
53
+ node_id = payload.get("route_path", file_posix) + ":" + methods
54
+ depends_on_env: tuple[str, ...] = ()
55
+ elif kind == "middleware":
56
+ node_id = file_posix + ":middleware"
57
+ depends_on_env = ()
58
+ elif kind == "module_init":
59
+ node_id = file_posix + ":server_init"
60
+ depends_on_env = ()
61
+ elif kind == "background_job":
62
+ node_id = file_posix + ":" + payload.get("call", "job")
63
+ depends_on_env = ()
64
+ elif kind == "env_access":
65
+ env_var = payload.get("env_var", "")
66
+ node_id = "env:" + env_var
67
+ depends_on_env = (env_var,) if env_var else ()
68
+ else:
69
+ # Generic fallback for _KIND_MAP entries not handled by specific branches above
70
+ # (e.g. Go kinds: init_function, goroutine_spawn, package_init).
71
+ # node_id uses payload["call"] when present, falling back to kind.
72
+ call = payload.get("call", kind)
73
+ node_id = file_posix + ":" + call
74
+ depends_on_env = ()
75
+
76
+ return RuntimeNode(
77
+ node=node_id,
78
+ defined_in=file_posix,
79
+ kind=mapping["node_kind"],
80
+ calls=(),
81
+ side_effects=mapping["side_effects"],
82
+ depends_on_env=depends_on_env,
83
+ order_constraints=(),
84
+ hidden_runtime_dependencies=(),
85
+ tags=mapping["tags"],
86
+ source="ts_regex_adapter",
87
+ evidence=(f"{kind}:line{line}",),
88
+ confidence=confidence,
89
+ freshness=freshness_fn(),
90
+ status="inferred",
91
+ )
92
+
93
+
94
+ def collect_adapter_runtime_nodes(
95
+ project_dir: Path,
96
+ freshness_fn: Callable[[], str],
97
+ ) -> list[RuntimeNode]:
98
+ """Collect RuntimeNode objects from TS/JS adapter runtime signals.
99
+
100
+ Iterates source files for adapters with supports_runtime_signals=True
101
+ (non-Python), calls extract_runtime(), and converts signals to RuntimeNodes.
102
+ """
103
+ from .map_common import iter_source_files # noqa: PLC0415
104
+ from .source_adapters import ADAPTERS # noqa: PLC0415
105
+
106
+ nodes: list[RuntimeNode] = []
107
+
108
+ runtime_adapters = {
109
+ ext: adapter
110
+ for ext, adapter in ADAPTERS.items()
111
+ if getattr(adapter, "supports_runtime_signals", False)
112
+ and getattr(adapter, "language", "") != "python"
113
+ }
114
+ if not runtime_adapters:
115
+ return nodes
116
+
117
+ languages = list({adapter.language for adapter in runtime_adapters.values()})
118
+ _log.debug("collect_adapter_runtime_nodes: languages=%r", languages)
119
+
120
+ for src_file in iter_source_files(project_dir, languages=languages):
121
+ adapter = ADAPTERS.get(src_file.suffix.lower())
122
+ if adapter is None or not getattr(adapter, "supports_runtime_signals", False):
123
+ continue
124
+ try:
125
+ content = src_file.read_text(encoding="utf-8", errors="replace")
126
+ signals = adapter.extract_runtime(content, src_file)
127
+ except Exception as exc: # noqa: BLE001
128
+ _log.error("collect_adapter_runtime_nodes: failed for %s: %s", src_file, exc)
129
+ continue
130
+
131
+ for sig in signals:
132
+ node = _signal_to_node(sig, freshness_fn)
133
+ if node is not None:
134
+ nodes.append(node)
135
+
136
+ _log.debug("collect_adapter_runtime_nodes: collected %d nodes", len(nodes))
137
+ return nodes
@@ -0,0 +1,82 @@
1
+ """Shared helpers for seed bootstrap and adoption modules.
2
+
3
+ Extracted to avoid circular imports between seed_bootstrapper <-> seed_adoption.
4
+ Not a public API -- consumers are seed_bootstrapper.py and seed_adoption.py only.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import json
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ from .map_storage import _atomic_write_json
13
+ import logging
14
+ _log = logging.getLogger(__name__)
15
+
16
+ # Required top-level key per seed type (in addition to schema_version)
17
+ SEED_REQUIRED_KEY: dict[str, str] = {
18
+ "authority_domains": "domains",
19
+ "sanctioned_assets": "patterns",
20
+ "data_contract_priorities": "priority_entities",
21
+ }
22
+
23
+
24
+ def validate_seed_schema(seed_name: str, data: Any) -> bool:
25
+ """Minimal validation: schema_version present + required top-level key."""
26
+ if not isinstance(data, dict):
27
+ return False
28
+ if "schema_version" not in data:
29
+ return False
30
+ required_key = SEED_REQUIRED_KEY.get(seed_name)
31
+ if required_key and required_key not in data:
32
+ return False
33
+ return True
34
+
35
+
36
+ def gather_minimal_context(project_dir: Path) -> dict[str, str]:
37
+ """Lightweight 2-level directory tree + pyproject.toml snippet."""
38
+ tree_lines: list[str] = []
39
+ try:
40
+ skip_names = {"__pycache__", "node_modules", "venv", ".venv", ".git"}
41
+ for entry in sorted(project_dir.iterdir()):
42
+ if entry.name.startswith(".") or entry.name in skip_names:
43
+ continue
44
+ tree_lines.append(entry.name + ("/" if entry.is_dir() else ""))
45
+ if entry.is_dir():
46
+ try:
47
+ for sub in sorted(entry.iterdir())[:20]:
48
+ if sub.name.startswith(".") or sub.name in skip_names:
49
+ continue
50
+ tree_lines.append(
51
+ " " + sub.name + ("/" if sub.is_dir() else "")
52
+ )
53
+ except (OSError, PermissionError):
54
+ continue
55
+ except (OSError, PermissionError):
56
+ pass
57
+
58
+ pyproj_path = project_dir / "pyproject.toml"
59
+ pyproj = (
60
+ pyproj_path.read_text(encoding="utf-8")[:2000]
61
+ if pyproj_path.exists()
62
+ else ""
63
+ )
64
+ return {"tree": "\n".join(tree_lines), "pyproject": pyproj}
65
+
66
+
67
+ def load_seed_state(state_path: Path) -> dict[str, Any]:
68
+ """Load .bootstrap_state.json; return empty scaffold if missing or corrupt."""
69
+ if not state_path.exists():
70
+ return {"schema_version": "1.0.0", "seeds": {}}
71
+ try:
72
+ data = json.loads(state_path.read_text(encoding="utf-8"))
73
+ if not isinstance(data, dict):
74
+ return {"schema_version": "1.0.0", "seeds": {}}
75
+ return data
76
+ except (json.JSONDecodeError, OSError):
77
+ return {"schema_version": "1.0.0", "seeds": {}}
78
+
79
+
80
+ def save_seed_state(state_path: Path, state: dict[str, Any]) -> None:
81
+ """Persist bootstrap/adoption state atomically."""
82
+ _atomic_write_json(state_path, state)