vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,626 @@
|
|
|
1
|
+
"""JavaScript source adapter -- tree-sitter AST-based structural extractor.
|
|
2
|
+
|
|
3
|
+
Parses .js / .jsx / .mjs / .cjs files via tree-sitter for true AST accuracy,
|
|
4
|
+
replacing the former regex+lexer approach. All extracted IR items carry
|
|
5
|
+
``confidence=1.0``.
|
|
6
|
+
|
|
7
|
+
Capabilities (L6a scope):
|
|
8
|
+
- supports_structural = True
|
|
9
|
+
- supports_contracts = False (L6b)
|
|
10
|
+
- supports_runtime_signals = True (L6a: timer/event_listener/top_level_effect)
|
|
11
|
+
- supports_authority_writes = True
|
|
12
|
+
|
|
13
|
+
Import forms handled (ES-module):
|
|
14
|
+
``import X from 'Y'`` -- default import
|
|
15
|
+
``import { A, B } from 'Y'`` -- named imports
|
|
16
|
+
``import * as X from 'Y'`` -- namespace import
|
|
17
|
+
``import 'Y'`` -- side-effect import
|
|
18
|
+
``export { A, B } from 'Y'`` -- re-export named
|
|
19
|
+
``export * from 'Y'`` -- re-export star
|
|
20
|
+
``import('Y')`` / ``await import('Y')`` -- dynamic import
|
|
21
|
+
|
|
22
|
+
Import forms handled (CommonJS):
|
|
23
|
+
``const X = require('Y')`` -- lexical_declaration
|
|
24
|
+
``let X = require('Y')`` -- lexical_declaration
|
|
25
|
+
``const { X } = require('Y')`` -- destructuring
|
|
26
|
+
``var X = require('Y')`` -- variable_declaration
|
|
27
|
+
``require('Y');`` -- bare side-effect
|
|
28
|
+
|
|
29
|
+
Symbol kinds extracted (top-level only):
|
|
30
|
+
function -- ``function_declaration`` (exported or not)
|
|
31
|
+
class -- ``class_declaration`` (exported or not)
|
|
32
|
+
const -- ``lexical_declaration`` / ``variable_declaration``
|
|
33
|
+
|
|
34
|
+
Visibility rule (JS):
|
|
35
|
+
- ``"public"`` -- declaration is wrapped in an ``export_statement``
|
|
36
|
+
- ``"module"`` -- declaration is not exported
|
|
37
|
+
|
|
38
|
+
Known limitations (explicit L2 tech-debt, do NOT fix here):
|
|
39
|
+
- ``module.exports = { ... }`` / ``exports.foo = ...`` are NOT emitted as
|
|
40
|
+
symbols. CJS exports are tracked as import edges for their consumers;
|
|
41
|
+
the producer side is L6 work.
|
|
42
|
+
- JSX attribute expressions are not inspected (treated as JS).
|
|
43
|
+
- ``require.resolve(...)`` and ``require.cache`` are ignored.
|
|
44
|
+
- Dynamic ``import(variable)`` with non-literal argument is skipped
|
|
45
|
+
(consistent with prior adapter behaviour).
|
|
46
|
+
- ``enum`` is not valid JavaScript; tree-sitter parses it as ERROR and it
|
|
47
|
+
is silently ignored (no SymbolDef emitted).
|
|
48
|
+
"""
|
|
49
|
+
from __future__ import annotations
|
|
50
|
+
|
|
51
|
+
import logging
|
|
52
|
+
from pathlib import Path
|
|
53
|
+
|
|
54
|
+
from ._base import RegexAdapterBase
|
|
55
|
+
from ._ir import AuthorityWriteCandidate, ImportEdge, SymbolDef, TSRuntimeSignal
|
|
56
|
+
from ._patterns import classify_import
|
|
57
|
+
from ._treesitter import (
|
|
58
|
+
iter_named_children,
|
|
59
|
+
node_line,
|
|
60
|
+
node_text,
|
|
61
|
+
parse_bytes,
|
|
62
|
+
walk_named,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
__all__ = ["JavascriptAdapter"]
|
|
66
|
+
|
|
67
|
+
_log = logging.getLogger(__name__)
|
|
68
|
+
|
|
69
|
+
_LANGUAGE = "javascript"
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# ---------------------------------------------------------------------------
|
|
73
|
+
# Internal helpers
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
def _string_module(string_node, src: bytes) -> str:
|
|
77
|
+
"""Extract the bare module specifier from a tree-sitter ``string`` node.
|
|
78
|
+
|
|
79
|
+
Looks for a ``string_fragment`` child first (single/double-quoted strings);
|
|
80
|
+
falls back to stripping quote characters from the node's full text.
|
|
81
|
+
"""
|
|
82
|
+
for child in string_node.children:
|
|
83
|
+
if child.type == "string_fragment":
|
|
84
|
+
return node_text(child, src)
|
|
85
|
+
# Fallback: strip surrounding quotes from raw node text.
|
|
86
|
+
raw = node_text(string_node, src)
|
|
87
|
+
return raw.strip("'\"")
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _find_require_module(call_expr_node, src: bytes) -> str | None:
|
|
91
|
+
"""Return the string literal passed to ``require(...)`` or ``None``.
|
|
92
|
+
|
|
93
|
+
Checks that the callee is an ``identifier`` named ``require`` and that
|
|
94
|
+
the first argument is a ``string`` literal.
|
|
95
|
+
"""
|
|
96
|
+
callee = None
|
|
97
|
+
args_node = None
|
|
98
|
+
for child in call_expr_node.children:
|
|
99
|
+
if not child.is_named:
|
|
100
|
+
continue
|
|
101
|
+
if child.type == "identifier":
|
|
102
|
+
callee = child
|
|
103
|
+
elif child.type == "arguments":
|
|
104
|
+
args_node = child
|
|
105
|
+
|
|
106
|
+
if callee is None or node_text(callee, src) != "require":
|
|
107
|
+
return None
|
|
108
|
+
if args_node is None:
|
|
109
|
+
return None
|
|
110
|
+
|
|
111
|
+
# First named child of arguments that is a string literal.
|
|
112
|
+
for arg in args_node.children:
|
|
113
|
+
if not arg.is_named:
|
|
114
|
+
continue
|
|
115
|
+
if arg.type == "string":
|
|
116
|
+
return _string_module(arg, src)
|
|
117
|
+
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _find_dynamic_import_module(node, src: bytes) -> str | None:
|
|
122
|
+
"""Recursively search *node* for a dynamic ``import('literal')`` call.
|
|
123
|
+
|
|
124
|
+
Returns the module specifier string if found and the argument is a string
|
|
125
|
+
literal, otherwise None.
|
|
126
|
+
"""
|
|
127
|
+
# call_expression whose function part is the ``import`` keyword node.
|
|
128
|
+
if node.type == "call_expression":
|
|
129
|
+
# First unnamed/named child should be ``import`` keyword.
|
|
130
|
+
for child in node.children:
|
|
131
|
+
if child.type == "import":
|
|
132
|
+
# Found a dynamic import — extract the first string argument.
|
|
133
|
+
for sibling in node.children:
|
|
134
|
+
if sibling.is_named and sibling.type == "arguments":
|
|
135
|
+
for arg in sibling.children:
|
|
136
|
+
if arg.is_named and arg.type == "string":
|
|
137
|
+
return _string_module(arg, src)
|
|
138
|
+
return None # Dynamic import with non-literal arg — skip.
|
|
139
|
+
|
|
140
|
+
# Recurse into named children.
|
|
141
|
+
for child in node.children:
|
|
142
|
+
result = _find_dynamic_import_module(child, src)
|
|
143
|
+
if result is not None:
|
|
144
|
+
return result
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# ---------------------------------------------------------------------------
|
|
149
|
+
# Adapter
|
|
150
|
+
# ---------------------------------------------------------------------------
|
|
151
|
+
|
|
152
|
+
class JavascriptAdapter(RegexAdapterBase):
|
|
153
|
+
"""JavaScript adapter -- AST-based structural extractor via tree-sitter.
|
|
154
|
+
|
|
155
|
+
Operates on ``.js``, ``.jsx``, ``.mjs``, ``.cjs``. Structural capability
|
|
156
|
+
only for L2; all other supports_* flags remain False until later phases
|
|
157
|
+
wire the corresponding builders to IR dispatch.
|
|
158
|
+
|
|
159
|
+
Public interface (class name, method signatures, attributes, flags)
|
|
160
|
+
is preserved exactly from the prior regex-based JavascriptAdapter.
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
language = "javascript"
|
|
164
|
+
file_extensions = (".js", ".jsx", ".mjs", ".cjs")
|
|
165
|
+
supports_structural = True
|
|
166
|
+
supports_contracts = False
|
|
167
|
+
supports_runtime_signals = True
|
|
168
|
+
supports_authority_writes = True
|
|
169
|
+
|
|
170
|
+
# ------------------------------------------------------------------
|
|
171
|
+
# Structural: imports
|
|
172
|
+
# ------------------------------------------------------------------
|
|
173
|
+
|
|
174
|
+
def extract_imports(self, content: str, path: Path) -> list[ImportEdge]:
|
|
175
|
+
"""Return one ImportEdge per ES-module / CJS / dynamic import.
|
|
176
|
+
|
|
177
|
+
Handled forms:
|
|
178
|
+
ES-module:
|
|
179
|
+
``import X from 'Y'`` -- confidence 1.0
|
|
180
|
+
``import { A, B } from 'Y'`` -- confidence 1.0
|
|
181
|
+
``import * as X from 'Y'`` -- confidence 1.0
|
|
182
|
+
``import 'Y'`` -- confidence 1.0
|
|
183
|
+
``export { A, B } from 'Y'`` -- confidence 1.0
|
|
184
|
+
``export * from 'Y'`` / ``export * as NS from 'Y'``
|
|
185
|
+
CommonJS:
|
|
186
|
+
``const X = require('Y')`` -- confidence 1.0
|
|
187
|
+
``bare require('Y')`` -- confidence 1.0
|
|
188
|
+
Dynamic:
|
|
189
|
+
``import('Y')`` (literal module) -- confidence 1.0
|
|
190
|
+
"""
|
|
191
|
+
_log.debug("extract_imports (tree-sitter): %s (%d chars)", path, len(content))
|
|
192
|
+
src: bytes = content.encode("utf-8", errors="replace")
|
|
193
|
+
root = parse_bytes(_LANGUAGE, src)
|
|
194
|
+
from_path = Path(path).as_posix()
|
|
195
|
+
|
|
196
|
+
edges: list[ImportEdge] = []
|
|
197
|
+
seen: set[tuple[int, str]] = set()
|
|
198
|
+
|
|
199
|
+
def _emit(module: str, line: int) -> None:
|
|
200
|
+
if not module:
|
|
201
|
+
return
|
|
202
|
+
key = (line, module)
|
|
203
|
+
if key in seen:
|
|
204
|
+
return
|
|
205
|
+
seen.add(key)
|
|
206
|
+
edges.append(ImportEdge(
|
|
207
|
+
from_file=from_path,
|
|
208
|
+
to_module=module,
|
|
209
|
+
kind=classify_import(module),
|
|
210
|
+
line=line,
|
|
211
|
+
confidence=1.0,
|
|
212
|
+
))
|
|
213
|
+
|
|
214
|
+
for node in root.children:
|
|
215
|
+
if not node.is_named:
|
|
216
|
+
continue
|
|
217
|
+
|
|
218
|
+
# ---------------------------------------------------------------
|
|
219
|
+
# ES-module: import_statement
|
|
220
|
+
# ---------------------------------------------------------------
|
|
221
|
+
if node.type == "import_statement":
|
|
222
|
+
# The module specifier is always the last ``string`` child.
|
|
223
|
+
for child in node.children:
|
|
224
|
+
if child.is_named and child.type == "string":
|
|
225
|
+
_emit(_string_module(child, src), node_line(node))
|
|
226
|
+
break
|
|
227
|
+
|
|
228
|
+
# ---------------------------------------------------------------
|
|
229
|
+
# ES-module re-exports: export_statement with a ``from`` clause
|
|
230
|
+
# (export { X } from '...') and (export * from '...')
|
|
231
|
+
# ---------------------------------------------------------------
|
|
232
|
+
elif node.type == "export_statement":
|
|
233
|
+
# A re-export has a ``string`` child at the top level of the
|
|
234
|
+
# export_statement node (the ``from '...'`` part).
|
|
235
|
+
for child in node.children:
|
|
236
|
+
if child.is_named and child.type == "string":
|
|
237
|
+
_emit(_string_module(child, src), node_line(node))
|
|
238
|
+
break
|
|
239
|
+
# Note: exported declarations (function/class/const) are handled
|
|
240
|
+
# in extract_symbols and do NOT produce ImportEdge entries.
|
|
241
|
+
|
|
242
|
+
# ---------------------------------------------------------------
|
|
243
|
+
# CommonJS: lexical_declaration (const/let = require(...))
|
|
244
|
+
# ---------------------------------------------------------------
|
|
245
|
+
elif node.type in ("lexical_declaration", "variable_declaration"):
|
|
246
|
+
for decl in iter_named_children(node, "variable_declarator"):
|
|
247
|
+
# Check if the initialiser (or part of it) is a require call.
|
|
248
|
+
for child in decl.children:
|
|
249
|
+
if child.is_named and child.type == "call_expression":
|
|
250
|
+
module = _find_require_module(child, src)
|
|
251
|
+
if module is not None:
|
|
252
|
+
_emit(module, node_line(node))
|
|
253
|
+
# Dynamic import: await import('...') inside a declarator
|
|
254
|
+
elif child.is_named and child.type == "await_expression":
|
|
255
|
+
module = _find_dynamic_import_module(child, src)
|
|
256
|
+
if module is not None:
|
|
257
|
+
_emit(module, node_line(node))
|
|
258
|
+
# Non-await dynamic import: const m = import('...')
|
|
259
|
+
elif child.is_named and child.type == "call_expression":
|
|
260
|
+
module = _find_dynamic_import_module(child, src)
|
|
261
|
+
if module is not None:
|
|
262
|
+
_emit(module, node_line(node))
|
|
263
|
+
|
|
264
|
+
# ---------------------------------------------------------------
|
|
265
|
+
# CommonJS: expression_statement -- bare require('...')
|
|
266
|
+
# Also catches: bare import('...') as an expression statement
|
|
267
|
+
# ---------------------------------------------------------------
|
|
268
|
+
elif node.type == "expression_statement":
|
|
269
|
+
for child in iter_named_children(node, "call_expression"):
|
|
270
|
+
module = _find_require_module(child, src)
|
|
271
|
+
if module is not None:
|
|
272
|
+
_emit(module, node_line(node))
|
|
273
|
+
continue
|
|
274
|
+
module = _find_dynamic_import_module(child, src)
|
|
275
|
+
if module is not None:
|
|
276
|
+
_emit(module, node_line(node))
|
|
277
|
+
|
|
278
|
+
edges.sort(key=lambda e: (e.line, e.to_module, e.kind))
|
|
279
|
+
return edges
|
|
280
|
+
|
|
281
|
+
# ------------------------------------------------------------------
|
|
282
|
+
# Structural: symbols
|
|
283
|
+
# ------------------------------------------------------------------
|
|
284
|
+
|
|
285
|
+
def extract_symbols(self, content: str, path: Path) -> list[SymbolDef]:
|
|
286
|
+
"""Return one SymbolDef per top-level declaration in *content*.
|
|
287
|
+
|
|
288
|
+
Detected kinds (first match wins for a given declaration):
|
|
289
|
+
function -- ``function_declaration``
|
|
290
|
+
class -- ``class_declaration``
|
|
291
|
+
const -- ``lexical_declaration`` / ``variable_declaration``
|
|
292
|
+
|
|
293
|
+
No ``interface`` / ``type`` -- those are TS-only.
|
|
294
|
+
``enum`` is not valid JS in the tree-sitter grammar; it parses as
|
|
295
|
+
ERROR and is silently skipped.
|
|
296
|
+
|
|
297
|
+
Visibility:
|
|
298
|
+
- ``"public"`` if the declaration is wrapped in an
|
|
299
|
+
``export_statement``.
|
|
300
|
+
- ``"module"`` otherwise (CJS ``module.exports`` not tracked at
|
|
301
|
+
the symbol level in L2; see module-level docstring).
|
|
302
|
+
"""
|
|
303
|
+
_log.debug("extract_symbols (tree-sitter): %s (%d chars)", path, len(content))
|
|
304
|
+
src: bytes = content.encode("utf-8", errors="replace")
|
|
305
|
+
root = parse_bytes(_LANGUAGE, src)
|
|
306
|
+
|
|
307
|
+
syms: list[SymbolDef] = []
|
|
308
|
+
|
|
309
|
+
def _emit(name: str, kind: str, line: int, exported: bool) -> None:
|
|
310
|
+
syms.append(SymbolDef(
|
|
311
|
+
name=name,
|
|
312
|
+
kind=kind,
|
|
313
|
+
line=line,
|
|
314
|
+
visibility="public" if exported else "module",
|
|
315
|
+
confidence=1.0,
|
|
316
|
+
))
|
|
317
|
+
|
|
318
|
+
def _process_declaration(decl_node, exported: bool) -> None:
|
|
319
|
+
"""Extract symbol(s) from a function/class/lexical/var declaration."""
|
|
320
|
+
t = decl_node.type
|
|
321
|
+
|
|
322
|
+
if t == "function_declaration":
|
|
323
|
+
for child in iter_named_children(decl_node, "identifier"):
|
|
324
|
+
_emit(node_text(child, src), "function", node_line(decl_node), exported)
|
|
325
|
+
break # only the function name
|
|
326
|
+
|
|
327
|
+
elif t == "class_declaration":
|
|
328
|
+
for child in iter_named_children(decl_node, "identifier"):
|
|
329
|
+
_emit(node_text(child, src), "class", node_line(decl_node), exported)
|
|
330
|
+
break # only the class name
|
|
331
|
+
|
|
332
|
+
elif t in ("lexical_declaration", "variable_declaration"):
|
|
333
|
+
for var_decl in iter_named_children(decl_node, "variable_declarator"):
|
|
334
|
+
# Name may be a plain identifier or a destructuring pattern.
|
|
335
|
+
for child in var_decl.children:
|
|
336
|
+
if not child.is_named:
|
|
337
|
+
continue
|
|
338
|
+
if child.type == "identifier":
|
|
339
|
+
_emit(
|
|
340
|
+
node_text(child, src),
|
|
341
|
+
"const",
|
|
342
|
+
node_line(decl_node),
|
|
343
|
+
exported,
|
|
344
|
+
)
|
|
345
|
+
break # first identifier per declarator
|
|
346
|
+
# Destructuring patterns (object_pattern, array_pattern):
|
|
347
|
+
# emit the enclosing const with the declarator's line;
|
|
348
|
+
# individual destructured names are not promoted to symbols
|
|
349
|
+
# (parity with prior adapter behaviour).
|
|
350
|
+
break
|
|
351
|
+
|
|
352
|
+
for node in root.children:
|
|
353
|
+
if not node.is_named:
|
|
354
|
+
continue
|
|
355
|
+
|
|
356
|
+
if node.type == "export_statement":
|
|
357
|
+
# Walk the export_statement's direct children for declarations.
|
|
358
|
+
for child in node.children:
|
|
359
|
+
if not child.is_named:
|
|
360
|
+
continue
|
|
361
|
+
_process_declaration(child, exported=True)
|
|
362
|
+
|
|
363
|
+
else:
|
|
364
|
+
_process_declaration(node, exported=False)
|
|
365
|
+
|
|
366
|
+
syms.sort(key=lambda s: (s.line, s.name))
|
|
367
|
+
return syms
|
|
368
|
+
|
|
369
|
+
# ------------------------------------------------------------------
|
|
370
|
+
# Runtime signals: timers, event listeners, top-level effects
|
|
371
|
+
# ------------------------------------------------------------------
|
|
372
|
+
|
|
373
|
+
#: Identifier names that indicate a timer call.
|
|
374
|
+
_TIMER_FNS: frozenset[str] = frozenset({"setInterval", "setTimeout", "setImmediate"})
|
|
375
|
+
|
|
376
|
+
#: Member-expression property names that indicate an event-listener call.
|
|
377
|
+
_EVENT_METHODS: frozenset[str] = frozenset({"addEventListener", "on"})
|
|
378
|
+
|
|
379
|
+
#: Identifier names that must NOT produce a top_level_effect signal.
|
|
380
|
+
_EXCLUDED_CALL_IDS: frozenset[str] = frozenset({"require"}) | _TIMER_FNS
|
|
381
|
+
|
|
382
|
+
def extract_runtime(self, content: str, path: Path) -> list[TSRuntimeSignal]:
|
|
383
|
+
"""Detect JavaScript runtime side-effects via tree-sitter AST.
|
|
384
|
+
|
|
385
|
+
Emits TSRuntimeSignal (confidence=1.0) for TOP-LEVEL expression_statement
|
|
386
|
+
nodes (direct children of ``program``) that contain a call_expression:
|
|
387
|
+
|
|
388
|
+
setInterval(...) / setTimeout(...) / setImmediate(...)
|
|
389
|
+
→ kind="timer", payload={"call": <fn name>}
|
|
390
|
+
*.addEventListener(...) / *.on(...)
|
|
391
|
+
→ kind="event_listener", payload={"call": "<receiver>.<method>"}
|
|
392
|
+
Any other top-level call that is NOT require() and NOT a timer/listener
|
|
393
|
+
→ kind="top_level_effect", payload={"call": <callee text, ≤30 chars>}
|
|
394
|
+
|
|
395
|
+
Calls nested inside function bodies are NOT flagged as top_level_effect
|
|
396
|
+
because they are not direct children of ``program``.
|
|
397
|
+
|
|
398
|
+
Test files (``*.test.js``, ``*.spec.js``, paths containing ``__tests__/``)
|
|
399
|
+
return ``[]``.
|
|
400
|
+
Results are sorted by ``(line, kind)``.
|
|
401
|
+
"""
|
|
402
|
+
p = Path(path)
|
|
403
|
+
name = p.name
|
|
404
|
+
if name.endswith(".test.js") or name.endswith(".spec.js"):
|
|
405
|
+
return []
|
|
406
|
+
if "__tests__" in p.as_posix().split("/"):
|
|
407
|
+
return []
|
|
408
|
+
|
|
409
|
+
_log.debug("extract_runtime (tree-sitter): %s (%d chars)", path, len(content))
|
|
410
|
+
src: bytes = content.encode("utf-8", errors="replace")
|
|
411
|
+
root = parse_bytes(_LANGUAGE, src)
|
|
412
|
+
file_posix = p.as_posix()
|
|
413
|
+
|
|
414
|
+
signals: list[TSRuntimeSignal] = []
|
|
415
|
+
|
|
416
|
+
for node in root.children:
|
|
417
|
+
if not node.is_named or node.type != "expression_statement":
|
|
418
|
+
continue
|
|
419
|
+
|
|
420
|
+
# Direct child call_expression of the expression_statement
|
|
421
|
+
call_expr = None
|
|
422
|
+
for child in node.children:
|
|
423
|
+
if child.is_named and child.type == "call_expression":
|
|
424
|
+
call_expr = child
|
|
425
|
+
break
|
|
426
|
+
if call_expr is None:
|
|
427
|
+
continue
|
|
428
|
+
|
|
429
|
+
fn_node = call_expr.child_by_field_name("function")
|
|
430
|
+
if fn_node is None:
|
|
431
|
+
continue
|
|
432
|
+
|
|
433
|
+
line = node_line(call_expr)
|
|
434
|
+
|
|
435
|
+
if fn_node.type == "identifier":
|
|
436
|
+
fn_name = node_text(fn_node, src)
|
|
437
|
+
|
|
438
|
+
# Timer
|
|
439
|
+
if fn_name in self._TIMER_FNS:
|
|
440
|
+
signals.append(TSRuntimeSignal(
|
|
441
|
+
kind="timer",
|
|
442
|
+
file=file_posix,
|
|
443
|
+
line=line,
|
|
444
|
+
confidence=1.0,
|
|
445
|
+
payload={"call": fn_name},
|
|
446
|
+
))
|
|
447
|
+
|
|
448
|
+
# Skip require() — not a runtime side-effect signal
|
|
449
|
+
elif fn_name == "require":
|
|
450
|
+
continue
|
|
451
|
+
|
|
452
|
+
# Top-level effect (anything else)
|
|
453
|
+
else:
|
|
454
|
+
signals.append(TSRuntimeSignal(
|
|
455
|
+
kind="top_level_effect",
|
|
456
|
+
file=file_posix,
|
|
457
|
+
line=line,
|
|
458
|
+
confidence=1.0,
|
|
459
|
+
payload={"call": fn_name[:30]},
|
|
460
|
+
))
|
|
461
|
+
|
|
462
|
+
elif fn_node.type == "member_expression":
|
|
463
|
+
obj_node = fn_node.child_by_field_name("object")
|
|
464
|
+
prop_node = fn_node.child_by_field_name("property")
|
|
465
|
+
if obj_node is None or prop_node is None:
|
|
466
|
+
continue
|
|
467
|
+
|
|
468
|
+
method = node_text(prop_node, src)
|
|
469
|
+
receiver = node_text(obj_node, src)
|
|
470
|
+
|
|
471
|
+
# Event listener
|
|
472
|
+
if method in self._EVENT_METHODS:
|
|
473
|
+
detail = f"{receiver}.{method}"
|
|
474
|
+
signals.append(TSRuntimeSignal(
|
|
475
|
+
kind="event_listener",
|
|
476
|
+
file=file_posix,
|
|
477
|
+
line=line,
|
|
478
|
+
confidence=1.0,
|
|
479
|
+
payload={"call": detail},
|
|
480
|
+
))
|
|
481
|
+
|
|
482
|
+
# Other member-expression top-level calls
|
|
483
|
+
else:
|
|
484
|
+
callee_text = node_text(fn_node, src)[:30]
|
|
485
|
+
signals.append(TSRuntimeSignal(
|
|
486
|
+
kind="top_level_effect",
|
|
487
|
+
file=file_posix,
|
|
488
|
+
line=line,
|
|
489
|
+
confidence=1.0,
|
|
490
|
+
payload={"call": callee_text},
|
|
491
|
+
))
|
|
492
|
+
|
|
493
|
+
signals.sort(key=lambda s: (s.line, s.kind))
|
|
494
|
+
return signals
|
|
495
|
+
|
|
496
|
+
# ------------------------------------------------------------------
|
|
497
|
+
# Authority writes
|
|
498
|
+
# ------------------------------------------------------------------
|
|
499
|
+
|
|
500
|
+
def extract_writer_calls(
|
|
501
|
+
self, content: str, path: Path
|
|
502
|
+
) -> list[AuthorityWriteCandidate]:
|
|
503
|
+
"""Detect write operations in JavaScript source via tree-sitter AST.
|
|
504
|
+
|
|
505
|
+
Walks all ``call_expression`` nodes and matches by function shape:
|
|
506
|
+
|
|
507
|
+
``member_expression`` (object.property):
|
|
508
|
+
- ``fs.writeFile`` / ``fs.writeFileSync``
|
|
509
|
+
→ ``write_kind="fs_write"``, target_hint = first arg
|
|
510
|
+
- ``fs.appendFile`` / ``fs.appendFileSync``
|
|
511
|
+
→ ``write_kind="fs_append"``, target_hint = first arg
|
|
512
|
+
- ``localStorage.setItem`` / ``sessionStorage.setItem``
|
|
513
|
+
→ ``write_kind="storage_write"``, target_hint = first arg
|
|
514
|
+
- ``*.save`` / ``*.create`` (ORM — any receiver)
|
|
515
|
+
→ ``write_kind="orm_save"``, target_hint = receiver
|
|
516
|
+
- ``*.update`` (ORM — any receiver)
|
|
517
|
+
→ ``write_kind="orm_write"``, target_hint = receiver
|
|
518
|
+
|
|
519
|
+
``identifier`` (standalone call):
|
|
520
|
+
- ``writeFile(...)``
|
|
521
|
+
→ ``write_kind="fs_write"``, target_hint = first arg
|
|
522
|
+
|
|
523
|
+
Test files (``*.test.js``, ``*.spec.js``, paths containing
|
|
524
|
+
``__tests__/``) return ``[]``.
|
|
525
|
+
All results carry ``confidence=1.0``.
|
|
526
|
+
Results are sorted by ``(line, write_kind)``.
|
|
527
|
+
"""
|
|
528
|
+
p = Path(path)
|
|
529
|
+
name = p.name
|
|
530
|
+
if name.endswith(".test.js") or name.endswith(".spec.js"):
|
|
531
|
+
return []
|
|
532
|
+
if "__tests__" in p.as_posix().split("/"):
|
|
533
|
+
return []
|
|
534
|
+
|
|
535
|
+
_log.debug("extract_writer_calls (tree-sitter): %s (%d chars)", path, len(content))
|
|
536
|
+
src: bytes = content.encode("utf-8", errors="replace")
|
|
537
|
+
root = parse_bytes(_LANGUAGE, src)
|
|
538
|
+
|
|
539
|
+
candidates: list[AuthorityWriteCandidate] = []
|
|
540
|
+
|
|
541
|
+
def _hint(text: str) -> str:
|
|
542
|
+
"""Strip surrounding quotes and cap at 30 chars."""
|
|
543
|
+
t = text.strip().strip("'\"`").strip()
|
|
544
|
+
return t[:30]
|
|
545
|
+
|
|
546
|
+
def _first_arg_text(args_node) -> str:
|
|
547
|
+
if args_node is None:
|
|
548
|
+
return ""
|
|
549
|
+
named = [c for c in args_node.children if c.is_named]
|
|
550
|
+
return node_text(named[0], src) if named else ""
|
|
551
|
+
|
|
552
|
+
for call in walk_named(root, "call_expression"):
|
|
553
|
+
fn = call.child_by_field_name("function")
|
|
554
|
+
args = call.child_by_field_name("arguments")
|
|
555
|
+
if fn is None:
|
|
556
|
+
continue
|
|
557
|
+
|
|
558
|
+
line = node_line(call)
|
|
559
|
+
|
|
560
|
+
if fn.type == "member_expression":
|
|
561
|
+
obj = fn.child_by_field_name("object")
|
|
562
|
+
prop = fn.child_by_field_name("property")
|
|
563
|
+
if obj is None or prop is None:
|
|
564
|
+
continue
|
|
565
|
+
receiver = node_text(obj, src)
|
|
566
|
+
method = node_text(prop, src)
|
|
567
|
+
|
|
568
|
+
# fs.writeFile / fs.writeFileSync
|
|
569
|
+
if method in ("writeFile", "writeFileSync") and receiver == "fs":
|
|
570
|
+
candidates.append(AuthorityWriteCandidate(
|
|
571
|
+
write_kind="fs_write",
|
|
572
|
+
target_hint=_hint(_first_arg_text(args)),
|
|
573
|
+
line=line,
|
|
574
|
+
confidence=1.0,
|
|
575
|
+
))
|
|
576
|
+
|
|
577
|
+
# fs.appendFile / fs.appendFileSync
|
|
578
|
+
elif method in ("appendFile", "appendFileSync") and receiver == "fs":
|
|
579
|
+
candidates.append(AuthorityWriteCandidate(
|
|
580
|
+
write_kind="fs_append",
|
|
581
|
+
target_hint=_hint(_first_arg_text(args)),
|
|
582
|
+
line=line,
|
|
583
|
+
confidence=1.0,
|
|
584
|
+
))
|
|
585
|
+
|
|
586
|
+
# localStorage.setItem / sessionStorage.setItem
|
|
587
|
+
elif method == "setItem" and receiver in ("localStorage", "sessionStorage"):
|
|
588
|
+
candidates.append(AuthorityWriteCandidate(
|
|
589
|
+
write_kind="storage_write",
|
|
590
|
+
target_hint=_hint(_first_arg_text(args)),
|
|
591
|
+
line=line,
|
|
592
|
+
confidence=1.0,
|
|
593
|
+
))
|
|
594
|
+
|
|
595
|
+
# *.save / *.create (ORM)
|
|
596
|
+
elif method in ("save", "create"):
|
|
597
|
+
candidates.append(AuthorityWriteCandidate(
|
|
598
|
+
write_kind="orm_save",
|
|
599
|
+
target_hint=_hint(receiver),
|
|
600
|
+
line=line,
|
|
601
|
+
confidence=1.0,
|
|
602
|
+
))
|
|
603
|
+
|
|
604
|
+
# *.update (ORM)
|
|
605
|
+
elif method == "update":
|
|
606
|
+
candidates.append(AuthorityWriteCandidate(
|
|
607
|
+
write_kind="orm_write",
|
|
608
|
+
target_hint=_hint(receiver),
|
|
609
|
+
line=line,
|
|
610
|
+
confidence=1.0,
|
|
611
|
+
))
|
|
612
|
+
|
|
613
|
+
elif fn.type == "identifier":
|
|
614
|
+
fn_name = node_text(fn, src)
|
|
615
|
+
|
|
616
|
+
# standalone writeFile(...)
|
|
617
|
+
if fn_name == "writeFile":
|
|
618
|
+
candidates.append(AuthorityWriteCandidate(
|
|
619
|
+
write_kind="fs_write",
|
|
620
|
+
target_hint=_hint(_first_arg_text(args)),
|
|
621
|
+
line=line,
|
|
622
|
+
confidence=1.0,
|
|
623
|
+
))
|
|
624
|
+
|
|
625
|
+
candidates.sort(key=lambda c: (c.line, c.write_kind))
|
|
626
|
+
return candidates
|