vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,553 @@
|
|
|
1
|
+
"""Go source adapter -- tree-sitter AST-based structural extractor.
|
|
2
|
+
|
|
3
|
+
Parses ``.go`` files via tree-sitter for true AST accuracy, replacing the
|
|
4
|
+
former regex+lexer approach. All extracted IR items carry ``confidence=1.0``.
|
|
5
|
+
|
|
6
|
+
Capabilities (L5 + authority scope + Go runtime):
|
|
7
|
+
- supports_structural = True (extract_imports + extract_symbols)
|
|
8
|
+
- supports_contracts = True (extract_contracts)
|
|
9
|
+
- supports_runtime_signals = True (extract_runtime)
|
|
10
|
+
- supports_authority_writes = True (extract_writer_calls)
|
|
11
|
+
|
|
12
|
+
Import forms handled:
|
|
13
|
+
``import "pkg"`` -- single unaliased
|
|
14
|
+
``import alias "pkg"`` -- single aliased
|
|
15
|
+
``import _ "pkg"`` -- blank import
|
|
16
|
+
``import ( "pkg" ... )`` -- grouped block (all entry forms above)
|
|
17
|
+
|
|
18
|
+
Symbol kinds extracted (top-level only):
|
|
19
|
+
function -- ``func Name(`` (NOT method declarations with receiver)
|
|
20
|
+
struct -- ``type X struct``
|
|
21
|
+
interface -- ``type X interface``
|
|
22
|
+
type -- ``type X = ...`` (type alias, via type_alias node)
|
|
23
|
+
``type X SomeType`` (type definition, via type_spec node)
|
|
24
|
+
const -- ``const X`` / ``const ( X ... )``
|
|
25
|
+
``var X`` / ``var ( X ... )`` (emitted as kind="const" for
|
|
26
|
+
parity with the prior adapter)
|
|
27
|
+
|
|
28
|
+
Visibility rule (Go):
|
|
29
|
+
- ``"public"`` -- name starts with an uppercase letter (exported)
|
|
30
|
+
- ``"module"`` -- name starts with a lowercase letter (unexported)
|
|
31
|
+
|
|
32
|
+
Uses shared ``_treesitter`` helpers; Java/JS/TS adapters will reuse the same
|
|
33
|
+
module. The public interface (class name, method signatures, flags,
|
|
34
|
+
file_extensions) is identical to the former regex adapter.
|
|
35
|
+
"""
|
|
36
|
+
from __future__ import annotations
|
|
37
|
+
|
|
38
|
+
import logging
|
|
39
|
+
from pathlib import Path
|
|
40
|
+
|
|
41
|
+
from ._base import RegexAdapterBase
|
|
42
|
+
from ._ir import AuthorityWriteCandidate, ContractCandidate, ImportEdge, SymbolDef, TSRuntimeSignal
|
|
43
|
+
from ._treesitter import (
|
|
44
|
+
iter_named_children,
|
|
45
|
+
node_line,
|
|
46
|
+
node_text,
|
|
47
|
+
parse_bytes,
|
|
48
|
+
walk_named,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
__all__ = ["GoAdapter"]
|
|
52
|
+
|
|
53
|
+
_log = logging.getLogger(__name__)
|
|
54
|
+
|
|
55
|
+
_LANGUAGE = "go"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# ---------------------------------------------------------------------------
|
|
59
|
+
# Internal helpers
|
|
60
|
+
# ---------------------------------------------------------------------------
|
|
61
|
+
|
|
62
|
+
def _visibility(name: str) -> str:
|
|
63
|
+
"""Return Go visibility based on first character of *name*.
|
|
64
|
+
|
|
65
|
+
Exported (uppercase first character) → ``"public"``.
|
|
66
|
+
Unexported → ``"module"``.
|
|
67
|
+
"""
|
|
68
|
+
return "public" if name and name[0].isupper() else "module"
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _extract_import_spec(
|
|
72
|
+
spec_node,
|
|
73
|
+
src: bytes,
|
|
74
|
+
from_path: str,
|
|
75
|
+
) -> ImportEdge | None:
|
|
76
|
+
"""Build an ImportEdge from a single ``import_spec`` node.
|
|
77
|
+
|
|
78
|
+
The path string is inside an ``interpreted_string_literal``; we take
|
|
79
|
+
the ``interpreted_string_literal_content`` child to get the bare path
|
|
80
|
+
without quotes. If no path literal is found, return None.
|
|
81
|
+
"""
|
|
82
|
+
path_literal = None
|
|
83
|
+
for child in spec_node.children:
|
|
84
|
+
if child.type == "interpreted_string_literal":
|
|
85
|
+
path_literal = child
|
|
86
|
+
break
|
|
87
|
+
|
|
88
|
+
if path_literal is None:
|
|
89
|
+
return None
|
|
90
|
+
|
|
91
|
+
# Extract the bare module path (without surrounding quotes).
|
|
92
|
+
content_node = None
|
|
93
|
+
for child in path_literal.children:
|
|
94
|
+
if child.type == "interpreted_string_literal_content":
|
|
95
|
+
content_node = child
|
|
96
|
+
break
|
|
97
|
+
|
|
98
|
+
pkg = node_text(content_node, src) if content_node else node_text(path_literal, src).strip('"')
|
|
99
|
+
if not pkg:
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
line = node_line(spec_node)
|
|
103
|
+
return ImportEdge(
|
|
104
|
+
from_file=from_path,
|
|
105
|
+
to_module=pkg,
|
|
106
|
+
kind="absolute", # Go has no relative imports
|
|
107
|
+
line=line,
|
|
108
|
+
confidence=1.0,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
# ---------------------------------------------------------------------------
|
|
113
|
+
# Adapter
|
|
114
|
+
# ---------------------------------------------------------------------------
|
|
115
|
+
|
|
116
|
+
class GoAdapter(RegexAdapterBase):
|
|
117
|
+
"""Go adapter -- AST-based structural extractor via tree-sitter.
|
|
118
|
+
|
|
119
|
+
Operates on ``.go`` files. Structural capability only for L5; all other
|
|
120
|
+
supports_* flags remain False until later phases wire the corresponding
|
|
121
|
+
builders to IR dispatch.
|
|
122
|
+
|
|
123
|
+
Public interface (class name, method signatures, attributes, flags)
|
|
124
|
+
is preserved exactly from the prior regex-based GoAdapter.
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
language = "go"
|
|
128
|
+
file_extensions = (".go",)
|
|
129
|
+
supports_structural = True
|
|
130
|
+
supports_contracts = True
|
|
131
|
+
supports_runtime_signals = True
|
|
132
|
+
supports_authority_writes = True
|
|
133
|
+
|
|
134
|
+
# ------------------------------------------------------------------
|
|
135
|
+
# Structural: imports
|
|
136
|
+
# ------------------------------------------------------------------
|
|
137
|
+
|
|
138
|
+
def extract_imports(self, content: str, path: Path) -> list[ImportEdge]:
|
|
139
|
+
"""Return one ImportEdge per import path found in *content*.
|
|
140
|
+
|
|
141
|
+
Handled forms:
|
|
142
|
+
``import "pkg"`` -- confidence 1.0
|
|
143
|
+
``import alias "pkg"`` -- confidence 1.0
|
|
144
|
+
``import _ "pkg"`` -- confidence 1.0
|
|
145
|
+
``import ( "pkg" ... )`` -- confidence 1.0 per entry
|
|
146
|
+
|
|
147
|
+
All Go imports are absolute (Go has no relative import syntax).
|
|
148
|
+
"""
|
|
149
|
+
_log.debug("extract_imports (tree-sitter): %s (%d chars)", path, len(content))
|
|
150
|
+
src: bytes = content.encode("utf-8", errors="replace")
|
|
151
|
+
root = parse_bytes(_LANGUAGE, src)
|
|
152
|
+
from_path = Path(path).as_posix()
|
|
153
|
+
|
|
154
|
+
edges: list[ImportEdge] = []
|
|
155
|
+
seen: set[tuple[int, str]] = set()
|
|
156
|
+
|
|
157
|
+
for decl in iter_named_children(root, "import_declaration"):
|
|
158
|
+
for child in decl.children:
|
|
159
|
+
if child.type == "import_spec":
|
|
160
|
+
# Single import (unaliased, aliased, or blank).
|
|
161
|
+
edge = _extract_import_spec(child, src, from_path)
|
|
162
|
+
if edge:
|
|
163
|
+
key = (edge.line, edge.to_module)
|
|
164
|
+
if key not in seen:
|
|
165
|
+
seen.add(key)
|
|
166
|
+
edges.append(edge)
|
|
167
|
+
|
|
168
|
+
elif child.type == "import_spec_list":
|
|
169
|
+
# Grouped import block: import ( ... )
|
|
170
|
+
for spec in iter_named_children(child, "import_spec"):
|
|
171
|
+
edge = _extract_import_spec(spec, src, from_path)
|
|
172
|
+
if edge:
|
|
173
|
+
key = (edge.line, edge.to_module)
|
|
174
|
+
if key not in seen:
|
|
175
|
+
seen.add(key)
|
|
176
|
+
edges.append(edge)
|
|
177
|
+
|
|
178
|
+
edges.sort(key=lambda e: (e.line, e.to_module, e.kind))
|
|
179
|
+
return edges
|
|
180
|
+
|
|
181
|
+
# ------------------------------------------------------------------
|
|
182
|
+
# Structural: symbols
|
|
183
|
+
# ------------------------------------------------------------------
|
|
184
|
+
|
|
185
|
+
def extract_symbols(self, content: str, path: Path) -> list[SymbolDef]:
|
|
186
|
+
"""Return one SymbolDef per top-level declaration in *content*.
|
|
187
|
+
|
|
188
|
+
Detected kinds:
|
|
189
|
+
function -- top-level ``func Name(`` (NOT method receivers)
|
|
190
|
+
struct -- ``type X struct``
|
|
191
|
+
interface -- ``type X interface``
|
|
192
|
+
type -- ``type X = ...`` or ``type X SomeType`` (alias/def)
|
|
193
|
+
const -- ``const X`` / ``const ( X ... )`` and
|
|
194
|
+
``var X`` / ``var ( X ... )`` at package level
|
|
195
|
+
|
|
196
|
+
Visibility:
|
|
197
|
+
- ``"public"`` if name starts with an uppercase letter (exported).
|
|
198
|
+
- ``"module"`` otherwise (unexported).
|
|
199
|
+
"""
|
|
200
|
+
_log.debug("extract_symbols (tree-sitter): %s (%d chars)", path, len(content))
|
|
201
|
+
src: bytes = content.encode("utf-8", errors="replace")
|
|
202
|
+
root = parse_bytes(_LANGUAGE, src)
|
|
203
|
+
|
|
204
|
+
syms: list[SymbolDef] = []
|
|
205
|
+
|
|
206
|
+
def _emit(name: str, kind: str, line: int) -> None:
|
|
207
|
+
syms.append(SymbolDef(
|
|
208
|
+
name=name,
|
|
209
|
+
kind=kind,
|
|
210
|
+
line=line,
|
|
211
|
+
visibility=_visibility(name),
|
|
212
|
+
confidence=1.0,
|
|
213
|
+
))
|
|
214
|
+
|
|
215
|
+
for node in root.children:
|
|
216
|
+
if not node.is_named:
|
|
217
|
+
continue
|
|
218
|
+
|
|
219
|
+
# --- function_declaration: top-level func (NOT method) ---
|
|
220
|
+
if node.type == "function_declaration":
|
|
221
|
+
for child in iter_named_children(node, "identifier"):
|
|
222
|
+
_emit(node_text(child, src), "function", node_line(node))
|
|
223
|
+
break # only the function name identifier
|
|
224
|
+
|
|
225
|
+
# --- type_declaration: struct / interface / alias / typedef ---
|
|
226
|
+
elif node.type == "type_declaration":
|
|
227
|
+
for spec in node.children:
|
|
228
|
+
if not spec.is_named:
|
|
229
|
+
continue
|
|
230
|
+
|
|
231
|
+
if spec.type == "type_spec":
|
|
232
|
+
# Children: type_identifier, then the type body.
|
|
233
|
+
# Body type determines kind: struct_type, interface_type,
|
|
234
|
+
# or a plain type_identifier (typedef).
|
|
235
|
+
name_node = None
|
|
236
|
+
body_type = None
|
|
237
|
+
for child in spec.children:
|
|
238
|
+
if child.is_named and child.type == "type_identifier" and name_node is None:
|
|
239
|
+
name_node = child
|
|
240
|
+
elif child.is_named and name_node is not None:
|
|
241
|
+
body_type = child.type
|
|
242
|
+
break
|
|
243
|
+
if name_node is None:
|
|
244
|
+
continue
|
|
245
|
+
name = node_text(name_node, src)
|
|
246
|
+
if body_type == "struct_type":
|
|
247
|
+
kind = "struct"
|
|
248
|
+
elif body_type == "interface_type":
|
|
249
|
+
kind = "interface"
|
|
250
|
+
else:
|
|
251
|
+
kind = "type"
|
|
252
|
+
_emit(name, kind, node_line(spec))
|
|
253
|
+
|
|
254
|
+
elif spec.type == "type_alias":
|
|
255
|
+
# type X = OtherType
|
|
256
|
+
for child in iter_named_children(spec, "type_identifier"):
|
|
257
|
+
_emit(node_text(child, src), "type", node_line(spec))
|
|
258
|
+
break
|
|
259
|
+
|
|
260
|
+
# --- const_declaration: const X / const ( ... ) ---
|
|
261
|
+
elif node.type == "const_declaration":
|
|
262
|
+
for spec in iter_named_children(node, "const_spec"):
|
|
263
|
+
for id_node in iter_named_children(spec, "identifier"):
|
|
264
|
+
_emit(node_text(id_node, src), "const", node_line(spec))
|
|
265
|
+
break # first identifier per spec (iota/multi-name handled)
|
|
266
|
+
|
|
267
|
+
# --- var_declaration: var X / var ( ... ) ---
|
|
268
|
+
elif node.type == "var_declaration":
|
|
269
|
+
# var_spec_list wraps grouped vars; single var uses var_spec directly.
|
|
270
|
+
for child in node.children:
|
|
271
|
+
if not child.is_named:
|
|
272
|
+
continue
|
|
273
|
+
if child.type == "var_spec_list":
|
|
274
|
+
for spec in iter_named_children(child, "var_spec"):
|
|
275
|
+
for id_node in iter_named_children(spec, "identifier"):
|
|
276
|
+
_emit(node_text(id_node, src), "const", node_line(spec))
|
|
277
|
+
break
|
|
278
|
+
elif child.type == "var_spec":
|
|
279
|
+
for id_node in iter_named_children(child, "identifier"):
|
|
280
|
+
_emit(node_text(id_node, src), "const", node_line(child))
|
|
281
|
+
break
|
|
282
|
+
|
|
283
|
+
syms.sort(key=lambda s: (s.line, s.name))
|
|
284
|
+
return syms
|
|
285
|
+
|
|
286
|
+
# ------------------------------------------------------------------
|
|
287
|
+
# Contracts: struct and interface type definitions
|
|
288
|
+
# ------------------------------------------------------------------
|
|
289
|
+
|
|
290
|
+
def extract_contracts(self, content: str, path: Path) -> list[ContractCandidate]:
|
|
291
|
+
"""Return one ContractCandidate per top-level struct or interface type.
|
|
292
|
+
|
|
293
|
+
Handled forms:
|
|
294
|
+
``type X struct { ... }`` → contract_kind="struct"
|
|
295
|
+
``type X interface { ... }`` → contract_kind="interface"
|
|
296
|
+
``type ( A struct{} ... )`` → grouped block, one entry per type
|
|
297
|
+
|
|
298
|
+
Plain type aliases / type definitions (``type X = Foo`` or
|
|
299
|
+
``type X SomeType``) are excluded — they are not data contracts.
|
|
300
|
+
|
|
301
|
+
Test files (path name ending with ``_test.go``) return ``[]``.
|
|
302
|
+
|
|
303
|
+
All results carry ``confidence=1.0`` (AST-based extraction).
|
|
304
|
+
Results are sorted by ``(line, name)``.
|
|
305
|
+
"""
|
|
306
|
+
if Path(path).name.endswith("_test.go"):
|
|
307
|
+
return []
|
|
308
|
+
|
|
309
|
+
_log.debug("extract_contracts (tree-sitter): %s (%d chars)", path, len(content))
|
|
310
|
+
src: bytes = content.encode("utf-8", errors="replace")
|
|
311
|
+
root = parse_bytes(_LANGUAGE, src)
|
|
312
|
+
|
|
313
|
+
candidates: list[ContractCandidate] = []
|
|
314
|
+
|
|
315
|
+
for node in root.children:
|
|
316
|
+
if not node.is_named or node.type != "type_declaration":
|
|
317
|
+
continue
|
|
318
|
+
|
|
319
|
+
for spec in node.children:
|
|
320
|
+
if not spec.is_named or spec.type != "type_spec":
|
|
321
|
+
continue
|
|
322
|
+
|
|
323
|
+
# Walk children of type_spec: first named type_identifier is the
|
|
324
|
+
# name; the next named child's type determines contract_kind.
|
|
325
|
+
name_node = None
|
|
326
|
+
body_type: str | None = None
|
|
327
|
+
for child in spec.children:
|
|
328
|
+
if not child.is_named:
|
|
329
|
+
continue
|
|
330
|
+
if child.type == "type_identifier" and name_node is None:
|
|
331
|
+
name_node = child
|
|
332
|
+
elif name_node is not None:
|
|
333
|
+
body_type = child.type
|
|
334
|
+
break
|
|
335
|
+
|
|
336
|
+
if name_node is None or body_type not in ("struct_type", "interface_type"):
|
|
337
|
+
continue
|
|
338
|
+
|
|
339
|
+
contract_kind = "struct" if body_type == "struct_type" else "interface"
|
|
340
|
+
candidates.append(ContractCandidate(
|
|
341
|
+
name=node_text(name_node, src),
|
|
342
|
+
contract_kind=contract_kind,
|
|
343
|
+
line=node_line(spec),
|
|
344
|
+
confidence=1.0,
|
|
345
|
+
))
|
|
346
|
+
|
|
347
|
+
candidates.sort(key=lambda c: (c.line, c.name))
|
|
348
|
+
return candidates
|
|
349
|
+
|
|
350
|
+
# ------------------------------------------------------------------
|
|
351
|
+
# Runtime signals: init functions, goroutine spawns, package-level
|
|
352
|
+
# var initialized by a call (import-time side effects).
|
|
353
|
+
# ------------------------------------------------------------------
|
|
354
|
+
|
|
355
|
+
def extract_runtime(self, content: str, path: Path) -> list[TSRuntimeSignal]:
|
|
356
|
+
"""Detect Go import-time and concurrency side effects via tree-sitter AST.
|
|
357
|
+
|
|
358
|
+
Emits TSRuntimeSignal (confidence=1.0) for:
|
|
359
|
+
``func init() { ... }``
|
|
360
|
+
→ kind="init_function", payload={"call": "init"}
|
|
361
|
+
``go someCall(...)`` (go_statement anywhere in the file)
|
|
362
|
+
→ kind="goroutine_spawn", payload={"call": <full callee text>}
|
|
363
|
+
Top-level ``var X = <call_expr>`` (package-level var initialized by
|
|
364
|
+
a function call — import-time side effect)
|
|
365
|
+
→ kind="package_init", payload={"call": <var name>}
|
|
366
|
+
|
|
367
|
+
Test files (path ending with ``_test.go``) return ``[]``.
|
|
368
|
+
Results are sorted by ``(line, kind)``.
|
|
369
|
+
"""
|
|
370
|
+
if Path(path).name.endswith("_test.go"):
|
|
371
|
+
return []
|
|
372
|
+
|
|
373
|
+
_log.debug("extract_runtime (tree-sitter): %s (%d chars)", path, len(content))
|
|
374
|
+
src: bytes = content.encode("utf-8", errors="replace")
|
|
375
|
+
root = parse_bytes(_LANGUAGE, src)
|
|
376
|
+
file_posix = Path(path).as_posix()
|
|
377
|
+
|
|
378
|
+
signals: list[TSRuntimeSignal] = []
|
|
379
|
+
|
|
380
|
+
# ------------------------------------------------------------------
|
|
381
|
+
# Pass 1: top-level declarations — init functions and package-level
|
|
382
|
+
# vars initialized by a call expression.
|
|
383
|
+
# ------------------------------------------------------------------
|
|
384
|
+
for node in root.children:
|
|
385
|
+
if not node.is_named:
|
|
386
|
+
continue
|
|
387
|
+
|
|
388
|
+
# func init() { ... }
|
|
389
|
+
if node.type == "function_declaration":
|
|
390
|
+
for child in iter_named_children(node, "identifier"):
|
|
391
|
+
if node_text(child, src) == "init":
|
|
392
|
+
signals.append(TSRuntimeSignal(
|
|
393
|
+
kind="init_function",
|
|
394
|
+
file=file_posix,
|
|
395
|
+
line=node_line(node),
|
|
396
|
+
confidence=1.0,
|
|
397
|
+
payload={"call": "init"},
|
|
398
|
+
))
|
|
399
|
+
break # only inspect the first identifier (function name)
|
|
400
|
+
|
|
401
|
+
# var X = <call_expr> at package level
|
|
402
|
+
elif node.type == "var_declaration":
|
|
403
|
+
for child in node.children:
|
|
404
|
+
if not child.is_named:
|
|
405
|
+
continue
|
|
406
|
+
specs: list = []
|
|
407
|
+
if child.type == "var_spec_list":
|
|
408
|
+
specs = list(iter_named_children(child, "var_spec"))
|
|
409
|
+
elif child.type == "var_spec":
|
|
410
|
+
specs = [child]
|
|
411
|
+
for spec in specs:
|
|
412
|
+
# Extract the first identifier (var name) and check
|
|
413
|
+
# whether the value contains a call_expression.
|
|
414
|
+
var_name = ""
|
|
415
|
+
has_call = False
|
|
416
|
+
for spec_child in spec.children:
|
|
417
|
+
if spec_child.is_named and spec_child.type == "identifier" and not var_name:
|
|
418
|
+
var_name = node_text(spec_child, src)
|
|
419
|
+
if spec_child.is_named and spec_child.type == "expression_list":
|
|
420
|
+
for expr in walk_named(spec_child, "call_expression"):
|
|
421
|
+
has_call = True
|
|
422
|
+
break
|
|
423
|
+
if var_name and has_call:
|
|
424
|
+
signals.append(TSRuntimeSignal(
|
|
425
|
+
kind="package_init",
|
|
426
|
+
file=file_posix,
|
|
427
|
+
line=node_line(spec),
|
|
428
|
+
confidence=1.0,
|
|
429
|
+
payload={"call": var_name},
|
|
430
|
+
))
|
|
431
|
+
|
|
432
|
+
# ------------------------------------------------------------------
|
|
433
|
+
# Pass 2: goroutine spawns — walk entire tree for go_statement nodes.
|
|
434
|
+
# ------------------------------------------------------------------
|
|
435
|
+
for go_node in walk_named(root, "go_statement"):
|
|
436
|
+
# The call expression is a direct child of go_statement.
|
|
437
|
+
call_text = ""
|
|
438
|
+
for child in go_node.children:
|
|
439
|
+
if child.is_named and child.type == "call_expression":
|
|
440
|
+
call_text = node_text(child, src)
|
|
441
|
+
break
|
|
442
|
+
signals.append(TSRuntimeSignal(
|
|
443
|
+
kind="goroutine_spawn",
|
|
444
|
+
file=file_posix,
|
|
445
|
+
line=node_line(go_node),
|
|
446
|
+
confidence=1.0,
|
|
447
|
+
payload={"call": call_text},
|
|
448
|
+
))
|
|
449
|
+
|
|
450
|
+
signals.sort(key=lambda s: (s.line, s.kind))
|
|
451
|
+
return signals
|
|
452
|
+
|
|
453
|
+
# ------------------------------------------------------------------
|
|
454
|
+
# Authority writes
|
|
455
|
+
# ------------------------------------------------------------------
|
|
456
|
+
|
|
457
|
+
def extract_writer_calls(
|
|
458
|
+
self, content: str, path: Path
|
|
459
|
+
) -> list[AuthorityWriteCandidate]:
|
|
460
|
+
"""Detect write operations in Go source via tree-sitter AST.
|
|
461
|
+
|
|
462
|
+
Walks all ``call_expression`` nodes and matches writer patterns by the
|
|
463
|
+
called function (a ``selector_expression`` with ``operand`` + ``field``):
|
|
464
|
+
|
|
465
|
+
- ``os.WriteFile(name, ...)`` / ``ioutil.WriteFile(name, ...)``
|
|
466
|
+
→ ``write_kind="fs_write"``, target_hint = first arg text
|
|
467
|
+
- ``os.Create(name)`` / ``os.OpenFile(name, ...)``
|
|
468
|
+
→ ``write_kind="fs_write"``, target_hint = first arg text
|
|
469
|
+
- ``recv.Write(...)`` / ``recv.WriteString(...)`` (any receiver)
|
|
470
|
+
→ ``write_kind="fs_write"``, target_hint = receiver text
|
|
471
|
+
- ``recv.Exec(...)`` (any receiver, db exec pattern)
|
|
472
|
+
→ ``write_kind="db_write"``, target_hint = first arg text (best-effort)
|
|
473
|
+
|
|
474
|
+
Test files (path ending with ``_test.go``) return ``[]``.
|
|
475
|
+
All results carry ``confidence=1.0``.
|
|
476
|
+
Results are sorted by ``(line, write_kind)``.
|
|
477
|
+
"""
|
|
478
|
+
if Path(path).name.endswith("_test.go"):
|
|
479
|
+
return []
|
|
480
|
+
|
|
481
|
+
_log.debug("extract_writer_calls (tree-sitter): %s (%d chars)", path, len(content))
|
|
482
|
+
src: bytes = content.encode("utf-8", errors="replace")
|
|
483
|
+
root = parse_bytes(_LANGUAGE, src)
|
|
484
|
+
|
|
485
|
+
candidates: list[AuthorityWriteCandidate] = []
|
|
486
|
+
|
|
487
|
+
def _hint(text: str) -> str:
|
|
488
|
+
"""Strip surrounding quotes and cap at 30 chars."""
|
|
489
|
+
t = text.strip().strip('"\'`').strip()
|
|
490
|
+
return t[:30]
|
|
491
|
+
|
|
492
|
+
def _first_arg_text(call_node) -> str:
|
|
493
|
+
"""Return the text of the first argument of a call_expression, or ''."""
|
|
494
|
+
args = call_node.child_by_field_name("arguments")
|
|
495
|
+
if args is None:
|
|
496
|
+
return ""
|
|
497
|
+
named = [c for c in args.children if c.is_named]
|
|
498
|
+
if named:
|
|
499
|
+
return node_text(named[0], src)
|
|
500
|
+
return ""
|
|
501
|
+
|
|
502
|
+
for call in walk_named(root, "call_expression"):
|
|
503
|
+
fn = call.child_by_field_name("function")
|
|
504
|
+
if fn is None or fn.type != "selector_expression":
|
|
505
|
+
continue
|
|
506
|
+
|
|
507
|
+
operand = fn.child_by_field_name("operand")
|
|
508
|
+
field = fn.child_by_field_name("field")
|
|
509
|
+
if operand is None or field is None:
|
|
510
|
+
continue
|
|
511
|
+
|
|
512
|
+
pkg = node_text(operand, src)
|
|
513
|
+
method = node_text(field, src)
|
|
514
|
+
line = node_line(call)
|
|
515
|
+
|
|
516
|
+
# os.WriteFile / ioutil.WriteFile
|
|
517
|
+
if method == "WriteFile" and pkg in ("os", "ioutil"):
|
|
518
|
+
candidates.append(AuthorityWriteCandidate(
|
|
519
|
+
write_kind="fs_write",
|
|
520
|
+
target_hint=_hint(_first_arg_text(call)),
|
|
521
|
+
line=line,
|
|
522
|
+
confidence=1.0,
|
|
523
|
+
))
|
|
524
|
+
|
|
525
|
+
# os.Create / os.OpenFile
|
|
526
|
+
elif method in ("Create", "OpenFile") and pkg == "os":
|
|
527
|
+
candidates.append(AuthorityWriteCandidate(
|
|
528
|
+
write_kind="fs_write",
|
|
529
|
+
target_hint=_hint(_first_arg_text(call)),
|
|
530
|
+
line=line,
|
|
531
|
+
confidence=1.0,
|
|
532
|
+
))
|
|
533
|
+
|
|
534
|
+
# *.Write / *.WriteString (any receiver — IO writer pattern)
|
|
535
|
+
elif method in ("Write", "WriteString"):
|
|
536
|
+
candidates.append(AuthorityWriteCandidate(
|
|
537
|
+
write_kind="fs_write",
|
|
538
|
+
target_hint=_hint(pkg),
|
|
539
|
+
line=line,
|
|
540
|
+
confidence=1.0,
|
|
541
|
+
))
|
|
542
|
+
|
|
543
|
+
# *.Exec (any receiver — DB exec pattern)
|
|
544
|
+
elif method == "Exec":
|
|
545
|
+
candidates.append(AuthorityWriteCandidate(
|
|
546
|
+
write_kind="db_write",
|
|
547
|
+
target_hint=_hint(_first_arg_text(call)),
|
|
548
|
+
line=line,
|
|
549
|
+
confidence=1.0,
|
|
550
|
+
))
|
|
551
|
+
|
|
552
|
+
candidates.sort(key=lambda c: (c.line, c.write_kind))
|
|
553
|
+
return candidates
|