vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,626 @@
1
+ """JavaScript source adapter -- tree-sitter AST-based structural extractor.
2
+
3
+ Parses .js / .jsx / .mjs / .cjs files via tree-sitter for true AST accuracy,
4
+ replacing the former regex+lexer approach. All extracted IR items carry
5
+ ``confidence=1.0``.
6
+
7
+ Capabilities (L6a scope):
8
+ - supports_structural = True
9
+ - supports_contracts = False (L6b)
10
+ - supports_runtime_signals = True (L6a: timer/event_listener/top_level_effect)
11
+ - supports_authority_writes = True
12
+
13
+ Import forms handled (ES-module):
14
+ ``import X from 'Y'`` -- default import
15
+ ``import { A, B } from 'Y'`` -- named imports
16
+ ``import * as X from 'Y'`` -- namespace import
17
+ ``import 'Y'`` -- side-effect import
18
+ ``export { A, B } from 'Y'`` -- re-export named
19
+ ``export * from 'Y'`` -- re-export star
20
+ ``import('Y')`` / ``await import('Y')`` -- dynamic import
21
+
22
+ Import forms handled (CommonJS):
23
+ ``const X = require('Y')`` -- lexical_declaration
24
+ ``let X = require('Y')`` -- lexical_declaration
25
+ ``const { X } = require('Y')`` -- destructuring
26
+ ``var X = require('Y')`` -- variable_declaration
27
+ ``require('Y');`` -- bare side-effect
28
+
29
+ Symbol kinds extracted (top-level only):
30
+ function -- ``function_declaration`` (exported or not)
31
+ class -- ``class_declaration`` (exported or not)
32
+ const -- ``lexical_declaration`` / ``variable_declaration``
33
+
34
+ Visibility rule (JS):
35
+ - ``"public"`` -- declaration is wrapped in an ``export_statement``
36
+ - ``"module"`` -- declaration is not exported
37
+
38
+ Known limitations (explicit L2 tech-debt, do NOT fix here):
39
+ - ``module.exports = { ... }`` / ``exports.foo = ...`` are NOT emitted as
40
+ symbols. CJS exports are tracked as import edges for their consumers;
41
+ the producer side is L6 work.
42
+ - JSX attribute expressions are not inspected (treated as JS).
43
+ - ``require.resolve(...)`` and ``require.cache`` are ignored.
44
+ - Dynamic ``import(variable)`` with non-literal argument is skipped
45
+ (consistent with prior adapter behaviour).
46
+ - ``enum`` is not valid JavaScript; tree-sitter parses it as ERROR and it
47
+ is silently ignored (no SymbolDef emitted).
48
+ """
49
+ from __future__ import annotations
50
+
51
+ import logging
52
+ from pathlib import Path
53
+
54
+ from ._base import RegexAdapterBase
55
+ from ._ir import AuthorityWriteCandidate, ImportEdge, SymbolDef, TSRuntimeSignal
56
+ from ._patterns import classify_import
57
+ from ._treesitter import (
58
+ iter_named_children,
59
+ node_line,
60
+ node_text,
61
+ parse_bytes,
62
+ walk_named,
63
+ )
64
+
65
+ __all__ = ["JavascriptAdapter"]
66
+
67
+ _log = logging.getLogger(__name__)
68
+
69
+ _LANGUAGE = "javascript"
70
+
71
+
72
+ # ---------------------------------------------------------------------------
73
+ # Internal helpers
74
+ # ---------------------------------------------------------------------------
75
+
76
+ def _string_module(string_node, src: bytes) -> str:
77
+ """Extract the bare module specifier from a tree-sitter ``string`` node.
78
+
79
+ Looks for a ``string_fragment`` child first (single/double-quoted strings);
80
+ falls back to stripping quote characters from the node's full text.
81
+ """
82
+ for child in string_node.children:
83
+ if child.type == "string_fragment":
84
+ return node_text(child, src)
85
+ # Fallback: strip surrounding quotes from raw node text.
86
+ raw = node_text(string_node, src)
87
+ return raw.strip("'\"")
88
+
89
+
90
+ def _find_require_module(call_expr_node, src: bytes) -> str | None:
91
+ """Return the string literal passed to ``require(...)`` or ``None``.
92
+
93
+ Checks that the callee is an ``identifier`` named ``require`` and that
94
+ the first argument is a ``string`` literal.
95
+ """
96
+ callee = None
97
+ args_node = None
98
+ for child in call_expr_node.children:
99
+ if not child.is_named:
100
+ continue
101
+ if child.type == "identifier":
102
+ callee = child
103
+ elif child.type == "arguments":
104
+ args_node = child
105
+
106
+ if callee is None or node_text(callee, src) != "require":
107
+ return None
108
+ if args_node is None:
109
+ return None
110
+
111
+ # First named child of arguments that is a string literal.
112
+ for arg in args_node.children:
113
+ if not arg.is_named:
114
+ continue
115
+ if arg.type == "string":
116
+ return _string_module(arg, src)
117
+
118
+ return None
119
+
120
+
121
+ def _find_dynamic_import_module(node, src: bytes) -> str | None:
122
+ """Recursively search *node* for a dynamic ``import('literal')`` call.
123
+
124
+ Returns the module specifier string if found and the argument is a string
125
+ literal, otherwise None.
126
+ """
127
+ # call_expression whose function part is the ``import`` keyword node.
128
+ if node.type == "call_expression":
129
+ # First unnamed/named child should be ``import`` keyword.
130
+ for child in node.children:
131
+ if child.type == "import":
132
+ # Found a dynamic import — extract the first string argument.
133
+ for sibling in node.children:
134
+ if sibling.is_named and sibling.type == "arguments":
135
+ for arg in sibling.children:
136
+ if arg.is_named and arg.type == "string":
137
+ return _string_module(arg, src)
138
+ return None # Dynamic import with non-literal arg — skip.
139
+
140
+ # Recurse into named children.
141
+ for child in node.children:
142
+ result = _find_dynamic_import_module(child, src)
143
+ if result is not None:
144
+ return result
145
+ return None
146
+
147
+
148
+ # ---------------------------------------------------------------------------
149
+ # Adapter
150
+ # ---------------------------------------------------------------------------
151
+
152
+ class JavascriptAdapter(RegexAdapterBase):
153
+ """JavaScript adapter -- AST-based structural extractor via tree-sitter.
154
+
155
+ Operates on ``.js``, ``.jsx``, ``.mjs``, ``.cjs``. Structural capability
156
+ only for L2; all other supports_* flags remain False until later phases
157
+ wire the corresponding builders to IR dispatch.
158
+
159
+ Public interface (class name, method signatures, attributes, flags)
160
+ is preserved exactly from the prior regex-based JavascriptAdapter.
161
+ """
162
+
163
+ language = "javascript"
164
+ file_extensions = (".js", ".jsx", ".mjs", ".cjs")
165
+ supports_structural = True
166
+ supports_contracts = False
167
+ supports_runtime_signals = True
168
+ supports_authority_writes = True
169
+
170
+ # ------------------------------------------------------------------
171
+ # Structural: imports
172
+ # ------------------------------------------------------------------
173
+
174
+ def extract_imports(self, content: str, path: Path) -> list[ImportEdge]:
175
+ """Return one ImportEdge per ES-module / CJS / dynamic import.
176
+
177
+ Handled forms:
178
+ ES-module:
179
+ ``import X from 'Y'`` -- confidence 1.0
180
+ ``import { A, B } from 'Y'`` -- confidence 1.0
181
+ ``import * as X from 'Y'`` -- confidence 1.0
182
+ ``import 'Y'`` -- confidence 1.0
183
+ ``export { A, B } from 'Y'`` -- confidence 1.0
184
+ ``export * from 'Y'`` / ``export * as NS from 'Y'``
185
+ CommonJS:
186
+ ``const X = require('Y')`` -- confidence 1.0
187
+ ``bare require('Y')`` -- confidence 1.0
188
+ Dynamic:
189
+ ``import('Y')`` (literal module) -- confidence 1.0
190
+ """
191
+ _log.debug("extract_imports (tree-sitter): %s (%d chars)", path, len(content))
192
+ src: bytes = content.encode("utf-8", errors="replace")
193
+ root = parse_bytes(_LANGUAGE, src)
194
+ from_path = Path(path).as_posix()
195
+
196
+ edges: list[ImportEdge] = []
197
+ seen: set[tuple[int, str]] = set()
198
+
199
+ def _emit(module: str, line: int) -> None:
200
+ if not module:
201
+ return
202
+ key = (line, module)
203
+ if key in seen:
204
+ return
205
+ seen.add(key)
206
+ edges.append(ImportEdge(
207
+ from_file=from_path,
208
+ to_module=module,
209
+ kind=classify_import(module),
210
+ line=line,
211
+ confidence=1.0,
212
+ ))
213
+
214
+ for node in root.children:
215
+ if not node.is_named:
216
+ continue
217
+
218
+ # ---------------------------------------------------------------
219
+ # ES-module: import_statement
220
+ # ---------------------------------------------------------------
221
+ if node.type == "import_statement":
222
+ # The module specifier is always the last ``string`` child.
223
+ for child in node.children:
224
+ if child.is_named and child.type == "string":
225
+ _emit(_string_module(child, src), node_line(node))
226
+ break
227
+
228
+ # ---------------------------------------------------------------
229
+ # ES-module re-exports: export_statement with a ``from`` clause
230
+ # (export { X } from '...') and (export * from '...')
231
+ # ---------------------------------------------------------------
232
+ elif node.type == "export_statement":
233
+ # A re-export has a ``string`` child at the top level of the
234
+ # export_statement node (the ``from '...'`` part).
235
+ for child in node.children:
236
+ if child.is_named and child.type == "string":
237
+ _emit(_string_module(child, src), node_line(node))
238
+ break
239
+ # Note: exported declarations (function/class/const) are handled
240
+ # in extract_symbols and do NOT produce ImportEdge entries.
241
+
242
+ # ---------------------------------------------------------------
243
+ # CommonJS: lexical_declaration (const/let = require(...))
244
+ # ---------------------------------------------------------------
245
+ elif node.type in ("lexical_declaration", "variable_declaration"):
246
+ for decl in iter_named_children(node, "variable_declarator"):
247
+ # Check if the initialiser (or part of it) is a require call.
248
+ for child in decl.children:
249
+ if child.is_named and child.type == "call_expression":
250
+ module = _find_require_module(child, src)
251
+ if module is not None:
252
+ _emit(module, node_line(node))
253
+ # Dynamic import: await import('...') inside a declarator
254
+ elif child.is_named and child.type == "await_expression":
255
+ module = _find_dynamic_import_module(child, src)
256
+ if module is not None:
257
+ _emit(module, node_line(node))
258
+ # Non-await dynamic import: const m = import('...')
259
+ elif child.is_named and child.type == "call_expression":
260
+ module = _find_dynamic_import_module(child, src)
261
+ if module is not None:
262
+ _emit(module, node_line(node))
263
+
264
+ # ---------------------------------------------------------------
265
+ # CommonJS: expression_statement -- bare require('...')
266
+ # Also catches: bare import('...') as an expression statement
267
+ # ---------------------------------------------------------------
268
+ elif node.type == "expression_statement":
269
+ for child in iter_named_children(node, "call_expression"):
270
+ module = _find_require_module(child, src)
271
+ if module is not None:
272
+ _emit(module, node_line(node))
273
+ continue
274
+ module = _find_dynamic_import_module(child, src)
275
+ if module is not None:
276
+ _emit(module, node_line(node))
277
+
278
+ edges.sort(key=lambda e: (e.line, e.to_module, e.kind))
279
+ return edges
280
+
281
+ # ------------------------------------------------------------------
282
+ # Structural: symbols
283
+ # ------------------------------------------------------------------
284
+
285
+ def extract_symbols(self, content: str, path: Path) -> list[SymbolDef]:
286
+ """Return one SymbolDef per top-level declaration in *content*.
287
+
288
+ Detected kinds (first match wins for a given declaration):
289
+ function -- ``function_declaration``
290
+ class -- ``class_declaration``
291
+ const -- ``lexical_declaration`` / ``variable_declaration``
292
+
293
+ No ``interface`` / ``type`` -- those are TS-only.
294
+ ``enum`` is not valid JS in the tree-sitter grammar; it parses as
295
+ ERROR and is silently skipped.
296
+
297
+ Visibility:
298
+ - ``"public"`` if the declaration is wrapped in an
299
+ ``export_statement``.
300
+ - ``"module"`` otherwise (CJS ``module.exports`` not tracked at
301
+ the symbol level in L2; see module-level docstring).
302
+ """
303
+ _log.debug("extract_symbols (tree-sitter): %s (%d chars)", path, len(content))
304
+ src: bytes = content.encode("utf-8", errors="replace")
305
+ root = parse_bytes(_LANGUAGE, src)
306
+
307
+ syms: list[SymbolDef] = []
308
+
309
+ def _emit(name: str, kind: str, line: int, exported: bool) -> None:
310
+ syms.append(SymbolDef(
311
+ name=name,
312
+ kind=kind,
313
+ line=line,
314
+ visibility="public" if exported else "module",
315
+ confidence=1.0,
316
+ ))
317
+
318
+ def _process_declaration(decl_node, exported: bool) -> None:
319
+ """Extract symbol(s) from a function/class/lexical/var declaration."""
320
+ t = decl_node.type
321
+
322
+ if t == "function_declaration":
323
+ for child in iter_named_children(decl_node, "identifier"):
324
+ _emit(node_text(child, src), "function", node_line(decl_node), exported)
325
+ break # only the function name
326
+
327
+ elif t == "class_declaration":
328
+ for child in iter_named_children(decl_node, "identifier"):
329
+ _emit(node_text(child, src), "class", node_line(decl_node), exported)
330
+ break # only the class name
331
+
332
+ elif t in ("lexical_declaration", "variable_declaration"):
333
+ for var_decl in iter_named_children(decl_node, "variable_declarator"):
334
+ # Name may be a plain identifier or a destructuring pattern.
335
+ for child in var_decl.children:
336
+ if not child.is_named:
337
+ continue
338
+ if child.type == "identifier":
339
+ _emit(
340
+ node_text(child, src),
341
+ "const",
342
+ node_line(decl_node),
343
+ exported,
344
+ )
345
+ break # first identifier per declarator
346
+ # Destructuring patterns (object_pattern, array_pattern):
347
+ # emit the enclosing const with the declarator's line;
348
+ # individual destructured names are not promoted to symbols
349
+ # (parity with prior adapter behaviour).
350
+ break
351
+
352
+ for node in root.children:
353
+ if not node.is_named:
354
+ continue
355
+
356
+ if node.type == "export_statement":
357
+ # Walk the export_statement's direct children for declarations.
358
+ for child in node.children:
359
+ if not child.is_named:
360
+ continue
361
+ _process_declaration(child, exported=True)
362
+
363
+ else:
364
+ _process_declaration(node, exported=False)
365
+
366
+ syms.sort(key=lambda s: (s.line, s.name))
367
+ return syms
368
+
369
+ # ------------------------------------------------------------------
370
+ # Runtime signals: timers, event listeners, top-level effects
371
+ # ------------------------------------------------------------------
372
+
373
+ #: Identifier names that indicate a timer call.
374
+ _TIMER_FNS: frozenset[str] = frozenset({"setInterval", "setTimeout", "setImmediate"})
375
+
376
+ #: Member-expression property names that indicate an event-listener call.
377
+ _EVENT_METHODS: frozenset[str] = frozenset({"addEventListener", "on"})
378
+
379
+ #: Identifier names that must NOT produce a top_level_effect signal.
380
+ _EXCLUDED_CALL_IDS: frozenset[str] = frozenset({"require"}) | _TIMER_FNS
381
+
382
+ def extract_runtime(self, content: str, path: Path) -> list[TSRuntimeSignal]:
383
+ """Detect JavaScript runtime side-effects via tree-sitter AST.
384
+
385
+ Emits TSRuntimeSignal (confidence=1.0) for TOP-LEVEL expression_statement
386
+ nodes (direct children of ``program``) that contain a call_expression:
387
+
388
+ setInterval(...) / setTimeout(...) / setImmediate(...)
389
+ → kind="timer", payload={"call": <fn name>}
390
+ *.addEventListener(...) / *.on(...)
391
+ → kind="event_listener", payload={"call": "<receiver>.<method>"}
392
+ Any other top-level call that is NOT require() and NOT a timer/listener
393
+ → kind="top_level_effect", payload={"call": <callee text, ≤30 chars>}
394
+
395
+ Calls nested inside function bodies are NOT flagged as top_level_effect
396
+ because they are not direct children of ``program``.
397
+
398
+ Test files (``*.test.js``, ``*.spec.js``, paths containing ``__tests__/``)
399
+ return ``[]``.
400
+ Results are sorted by ``(line, kind)``.
401
+ """
402
+ p = Path(path)
403
+ name = p.name
404
+ if name.endswith(".test.js") or name.endswith(".spec.js"):
405
+ return []
406
+ if "__tests__" in p.as_posix().split("/"):
407
+ return []
408
+
409
+ _log.debug("extract_runtime (tree-sitter): %s (%d chars)", path, len(content))
410
+ src: bytes = content.encode("utf-8", errors="replace")
411
+ root = parse_bytes(_LANGUAGE, src)
412
+ file_posix = p.as_posix()
413
+
414
+ signals: list[TSRuntimeSignal] = []
415
+
416
+ for node in root.children:
417
+ if not node.is_named or node.type != "expression_statement":
418
+ continue
419
+
420
+ # Direct child call_expression of the expression_statement
421
+ call_expr = None
422
+ for child in node.children:
423
+ if child.is_named and child.type == "call_expression":
424
+ call_expr = child
425
+ break
426
+ if call_expr is None:
427
+ continue
428
+
429
+ fn_node = call_expr.child_by_field_name("function")
430
+ if fn_node is None:
431
+ continue
432
+
433
+ line = node_line(call_expr)
434
+
435
+ if fn_node.type == "identifier":
436
+ fn_name = node_text(fn_node, src)
437
+
438
+ # Timer
439
+ if fn_name in self._TIMER_FNS:
440
+ signals.append(TSRuntimeSignal(
441
+ kind="timer",
442
+ file=file_posix,
443
+ line=line,
444
+ confidence=1.0,
445
+ payload={"call": fn_name},
446
+ ))
447
+
448
+ # Skip require() — not a runtime side-effect signal
449
+ elif fn_name == "require":
450
+ continue
451
+
452
+ # Top-level effect (anything else)
453
+ else:
454
+ signals.append(TSRuntimeSignal(
455
+ kind="top_level_effect",
456
+ file=file_posix,
457
+ line=line,
458
+ confidence=1.0,
459
+ payload={"call": fn_name[:30]},
460
+ ))
461
+
462
+ elif fn_node.type == "member_expression":
463
+ obj_node = fn_node.child_by_field_name("object")
464
+ prop_node = fn_node.child_by_field_name("property")
465
+ if obj_node is None or prop_node is None:
466
+ continue
467
+
468
+ method = node_text(prop_node, src)
469
+ receiver = node_text(obj_node, src)
470
+
471
+ # Event listener
472
+ if method in self._EVENT_METHODS:
473
+ detail = f"{receiver}.{method}"
474
+ signals.append(TSRuntimeSignal(
475
+ kind="event_listener",
476
+ file=file_posix,
477
+ line=line,
478
+ confidence=1.0,
479
+ payload={"call": detail},
480
+ ))
481
+
482
+ # Other member-expression top-level calls
483
+ else:
484
+ callee_text = node_text(fn_node, src)[:30]
485
+ signals.append(TSRuntimeSignal(
486
+ kind="top_level_effect",
487
+ file=file_posix,
488
+ line=line,
489
+ confidence=1.0,
490
+ payload={"call": callee_text},
491
+ ))
492
+
493
+ signals.sort(key=lambda s: (s.line, s.kind))
494
+ return signals
495
+
496
+ # ------------------------------------------------------------------
497
+ # Authority writes
498
+ # ------------------------------------------------------------------
499
+
500
+ def extract_writer_calls(
501
+ self, content: str, path: Path
502
+ ) -> list[AuthorityWriteCandidate]:
503
+ """Detect write operations in JavaScript source via tree-sitter AST.
504
+
505
+ Walks all ``call_expression`` nodes and matches by function shape:
506
+
507
+ ``member_expression`` (object.property):
508
+ - ``fs.writeFile`` / ``fs.writeFileSync``
509
+ → ``write_kind="fs_write"``, target_hint = first arg
510
+ - ``fs.appendFile`` / ``fs.appendFileSync``
511
+ → ``write_kind="fs_append"``, target_hint = first arg
512
+ - ``localStorage.setItem`` / ``sessionStorage.setItem``
513
+ → ``write_kind="storage_write"``, target_hint = first arg
514
+ - ``*.save`` / ``*.create`` (ORM — any receiver)
515
+ → ``write_kind="orm_save"``, target_hint = receiver
516
+ - ``*.update`` (ORM — any receiver)
517
+ → ``write_kind="orm_write"``, target_hint = receiver
518
+
519
+ ``identifier`` (standalone call):
520
+ - ``writeFile(...)``
521
+ → ``write_kind="fs_write"``, target_hint = first arg
522
+
523
+ Test files (``*.test.js``, ``*.spec.js``, paths containing
524
+ ``__tests__/``) return ``[]``.
525
+ All results carry ``confidence=1.0``.
526
+ Results are sorted by ``(line, write_kind)``.
527
+ """
528
+ p = Path(path)
529
+ name = p.name
530
+ if name.endswith(".test.js") or name.endswith(".spec.js"):
531
+ return []
532
+ if "__tests__" in p.as_posix().split("/"):
533
+ return []
534
+
535
+ _log.debug("extract_writer_calls (tree-sitter): %s (%d chars)", path, len(content))
536
+ src: bytes = content.encode("utf-8", errors="replace")
537
+ root = parse_bytes(_LANGUAGE, src)
538
+
539
+ candidates: list[AuthorityWriteCandidate] = []
540
+
541
+ def _hint(text: str) -> str:
542
+ """Strip surrounding quotes and cap at 30 chars."""
543
+ t = text.strip().strip("'\"`").strip()
544
+ return t[:30]
545
+
546
+ def _first_arg_text(args_node) -> str:
547
+ if args_node is None:
548
+ return ""
549
+ named = [c for c in args_node.children if c.is_named]
550
+ return node_text(named[0], src) if named else ""
551
+
552
+ for call in walk_named(root, "call_expression"):
553
+ fn = call.child_by_field_name("function")
554
+ args = call.child_by_field_name("arguments")
555
+ if fn is None:
556
+ continue
557
+
558
+ line = node_line(call)
559
+
560
+ if fn.type == "member_expression":
561
+ obj = fn.child_by_field_name("object")
562
+ prop = fn.child_by_field_name("property")
563
+ if obj is None or prop is None:
564
+ continue
565
+ receiver = node_text(obj, src)
566
+ method = node_text(prop, src)
567
+
568
+ # fs.writeFile / fs.writeFileSync
569
+ if method in ("writeFile", "writeFileSync") and receiver == "fs":
570
+ candidates.append(AuthorityWriteCandidate(
571
+ write_kind="fs_write",
572
+ target_hint=_hint(_first_arg_text(args)),
573
+ line=line,
574
+ confidence=1.0,
575
+ ))
576
+
577
+ # fs.appendFile / fs.appendFileSync
578
+ elif method in ("appendFile", "appendFileSync") and receiver == "fs":
579
+ candidates.append(AuthorityWriteCandidate(
580
+ write_kind="fs_append",
581
+ target_hint=_hint(_first_arg_text(args)),
582
+ line=line,
583
+ confidence=1.0,
584
+ ))
585
+
586
+ # localStorage.setItem / sessionStorage.setItem
587
+ elif method == "setItem" and receiver in ("localStorage", "sessionStorage"):
588
+ candidates.append(AuthorityWriteCandidate(
589
+ write_kind="storage_write",
590
+ target_hint=_hint(_first_arg_text(args)),
591
+ line=line,
592
+ confidence=1.0,
593
+ ))
594
+
595
+ # *.save / *.create (ORM)
596
+ elif method in ("save", "create"):
597
+ candidates.append(AuthorityWriteCandidate(
598
+ write_kind="orm_save",
599
+ target_hint=_hint(receiver),
600
+ line=line,
601
+ confidence=1.0,
602
+ ))
603
+
604
+ # *.update (ORM)
605
+ elif method == "update":
606
+ candidates.append(AuthorityWriteCandidate(
607
+ write_kind="orm_write",
608
+ target_hint=_hint(receiver),
609
+ line=line,
610
+ confidence=1.0,
611
+ ))
612
+
613
+ elif fn.type == "identifier":
614
+ fn_name = node_text(fn, src)
615
+
616
+ # standalone writeFile(...)
617
+ if fn_name == "writeFile":
618
+ candidates.append(AuthorityWriteCandidate(
619
+ write_kind="fs_write",
620
+ target_hint=_hint(_first_arg_text(args)),
621
+ line=line,
622
+ confidence=1.0,
623
+ ))
624
+
625
+ candidates.sort(key=lambda c: (c.line, c.write_kind))
626
+ return candidates