vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,541 @@
|
|
|
1
|
+
"""Java source adapter -- tree-sitter AST-based structural extractor.
|
|
2
|
+
|
|
3
|
+
Parses ``.java`` files via tree-sitter for true AST accuracy, replacing the
|
|
4
|
+
former regex+lexer approach. All extracted IR items carry ``confidence=1.0``.
|
|
5
|
+
|
|
6
|
+
Capabilities (L5 scope + runtime):
|
|
7
|
+
- supports_structural = True (extract_imports + extract_symbols)
|
|
8
|
+
- supports_contracts = True (extract_contracts: class/record/interface/enum)
|
|
9
|
+
- supports_runtime_signals = True (extract_runtime: static_block/spring/thread)
|
|
10
|
+
- supports_authority_writes = True (extract_writer_calls)
|
|
11
|
+
|
|
12
|
+
Import forms handled:
|
|
13
|
+
``import com.example.Foo;`` -- regular
|
|
14
|
+
``import static com.example.Foo.m;`` -- static
|
|
15
|
+
``import com.example.*;`` -- wildcard
|
|
16
|
+
``import static com.example.Foo.*;`` -- static wildcard
|
|
17
|
+
|
|
18
|
+
Symbol kinds extracted (top-level type declarations only):
|
|
19
|
+
class -- ``class_declaration`` and ``record_declaration`` (Java 16+)
|
|
20
|
+
interface -- ``interface_declaration`` and ``annotation_type_declaration``
|
|
21
|
+
enum -- ``enum_declaration``
|
|
22
|
+
|
|
23
|
+
Visibility rule (Java):
|
|
24
|
+
- ``"public"`` -- declaration has an explicit ``public`` modifier
|
|
25
|
+
- ``"module"`` -- no ``public`` modifier (package-private default)
|
|
26
|
+
|
|
27
|
+
Uses shared ``_treesitter`` helpers; the public interface (class name,
|
|
28
|
+
method signatures, flags, file_extensions) is identical to the former
|
|
29
|
+
regex adapter.
|
|
30
|
+
"""
|
|
31
|
+
from __future__ import annotations
|
|
32
|
+
|
|
33
|
+
import logging
|
|
34
|
+
from pathlib import Path
|
|
35
|
+
|
|
36
|
+
from ._base import RegexAdapterBase
|
|
37
|
+
from ._ir import AuthorityWriteCandidate, ContractCandidate, ImportEdge, SymbolDef, TSRuntimeSignal
|
|
38
|
+
from ._treesitter import (
|
|
39
|
+
iter_named_children,
|
|
40
|
+
node_line,
|
|
41
|
+
node_text,
|
|
42
|
+
parse_bytes,
|
|
43
|
+
walk_named,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
__all__ = ["JavaAdapter"]
|
|
47
|
+
|
|
48
|
+
_log = logging.getLogger(__name__)
|
|
49
|
+
|
|
50
|
+
_LANGUAGE = "java"
|
|
51
|
+
|
|
52
|
+
# Top-level declaration node types that map to SymbolDef entries.
|
|
53
|
+
_TYPE_DECL_NODES = frozenset({
|
|
54
|
+
"class_declaration",
|
|
55
|
+
"interface_declaration",
|
|
56
|
+
"enum_declaration",
|
|
57
|
+
"record_declaration",
|
|
58
|
+
"annotation_type_declaration",
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
# ---------------------------------------------------------------------------
|
|
63
|
+
# Internal helpers
|
|
64
|
+
# ---------------------------------------------------------------------------
|
|
65
|
+
|
|
66
|
+
def _fqn_from_import(decl_node, src: bytes) -> str:
|
|
67
|
+
"""Reconstruct the fully-qualified import name from an import_declaration node.
|
|
68
|
+
|
|
69
|
+
Handles regular, static, and wildcard (``*``) forms.
|
|
70
|
+
|
|
71
|
+
Returns the fqn string, e.g. ``"com.example.Foo"`` or ``"com.example.*"``.
|
|
72
|
+
Returns empty string if the structure is unexpected.
|
|
73
|
+
"""
|
|
74
|
+
# Collect named children; filter unnamed punctuation.
|
|
75
|
+
# Children layout (unnamed tokens are `import`, `static`, `.`, `;`):
|
|
76
|
+
# regular: import <scoped_identifier> ;
|
|
77
|
+
# static: import static <scoped_identifier> ;
|
|
78
|
+
# wildcard: import <scoped_identifier> . <asterisk> ;
|
|
79
|
+
# static wildcard: import static <scoped_identifier> . <asterisk> ;
|
|
80
|
+
|
|
81
|
+
fqn_parts: list[str] = []
|
|
82
|
+
is_wildcard = False
|
|
83
|
+
|
|
84
|
+
for child in decl_node.children:
|
|
85
|
+
ctype = child.type
|
|
86
|
+
if ctype == "scoped_identifier" or ctype == "identifier":
|
|
87
|
+
fqn_parts.append(node_text(child, src))
|
|
88
|
+
elif ctype == "asterisk":
|
|
89
|
+
is_wildcard = True
|
|
90
|
+
|
|
91
|
+
if not fqn_parts:
|
|
92
|
+
return ""
|
|
93
|
+
|
|
94
|
+
fqn = fqn_parts[0] # scoped_identifier already contains dots
|
|
95
|
+
if is_wildcard:
|
|
96
|
+
fqn = fqn + ".*"
|
|
97
|
+
return fqn
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _visibility_from_modifiers(decl_node, src: bytes) -> str:
|
|
101
|
+
"""Return visibility string for a type declaration node.
|
|
102
|
+
|
|
103
|
+
Java convention used by this adapter (matches prior regex adapter):
|
|
104
|
+
- ``"public"`` if a ``modifiers`` child contains a ``public`` token
|
|
105
|
+
- ``"module"`` otherwise (package-private default)
|
|
106
|
+
"""
|
|
107
|
+
for child in decl_node.children:
|
|
108
|
+
if child.type == "modifiers":
|
|
109
|
+
mods_text = node_text(child, src)
|
|
110
|
+
if "public" in mods_text.split():
|
|
111
|
+
return "public"
|
|
112
|
+
return "module"
|
|
113
|
+
return "module"
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _kind_from_node_type(node_type: str) -> str:
|
|
117
|
+
"""Map a tree-sitter declaration node type to an IR kind string."""
|
|
118
|
+
if node_type == "class_declaration":
|
|
119
|
+
return "class"
|
|
120
|
+
if node_type == "record_declaration":
|
|
121
|
+
return "class" # records map to "class" for parity with prior adapter
|
|
122
|
+
if node_type == "interface_declaration":
|
|
123
|
+
return "interface"
|
|
124
|
+
if node_type == "annotation_type_declaration":
|
|
125
|
+
return "interface" # @interface maps to "interface" for parity
|
|
126
|
+
if node_type == "enum_declaration":
|
|
127
|
+
return "enum"
|
|
128
|
+
return "class" # unreachable given _TYPE_DECL_NODES guard
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _name_from_decl(decl_node, src: bytes) -> str:
|
|
132
|
+
"""Extract the simple name identifier from a type declaration node."""
|
|
133
|
+
for child in decl_node.children:
|
|
134
|
+
if child.type == "identifier" and child.is_named:
|
|
135
|
+
return node_text(child, src)
|
|
136
|
+
return ""
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
# ---------------------------------------------------------------------------
|
|
140
|
+
# Adapter
|
|
141
|
+
# ---------------------------------------------------------------------------
|
|
142
|
+
|
|
143
|
+
class JavaAdapter(RegexAdapterBase):
|
|
144
|
+
"""Java adapter -- AST-based structural extractor via tree-sitter.
|
|
145
|
+
|
|
146
|
+
Operates on ``.java`` files. Structural capability only for L5; all other
|
|
147
|
+
supports_* flags remain False until later phases wire the corresponding
|
|
148
|
+
builders to IR dispatch.
|
|
149
|
+
|
|
150
|
+
Public interface (class name, method signatures, attributes, flags)
|
|
151
|
+
is preserved exactly from the prior regex-based JavaAdapter.
|
|
152
|
+
"""
|
|
153
|
+
|
|
154
|
+
language = "java"
|
|
155
|
+
file_extensions = (".java",)
|
|
156
|
+
supports_structural = True
|
|
157
|
+
supports_contracts = True
|
|
158
|
+
supports_runtime_signals = True
|
|
159
|
+
supports_authority_writes = True
|
|
160
|
+
|
|
161
|
+
# ------------------------------------------------------------------
|
|
162
|
+
# Structural: imports
|
|
163
|
+
# ------------------------------------------------------------------
|
|
164
|
+
|
|
165
|
+
def extract_imports(self, content: str, path: Path) -> list[ImportEdge]:
|
|
166
|
+
"""Return one ImportEdge per import statement found in *content*.
|
|
167
|
+
|
|
168
|
+
Handled forms:
|
|
169
|
+
``import com.example.Foo;`` -- confidence 1.0
|
|
170
|
+
``import static com.example.Foo.m;`` -- confidence 1.0
|
|
171
|
+
``import com.example.*;`` -- confidence 1.0
|
|
172
|
+
``import static com.example.Foo.*;`` -- confidence 1.0
|
|
173
|
+
|
|
174
|
+
All Java imports are absolute (no relative import syntax).
|
|
175
|
+
"""
|
|
176
|
+
_log.debug("extract_imports (tree-sitter): %s (%d chars)", path, len(content))
|
|
177
|
+
src: bytes = content.encode("utf-8", errors="replace")
|
|
178
|
+
root = parse_bytes(_LANGUAGE, src)
|
|
179
|
+
from_path = Path(path).as_posix()
|
|
180
|
+
|
|
181
|
+
edges: list[ImportEdge] = []
|
|
182
|
+
seen: set[tuple[int, str]] = set()
|
|
183
|
+
|
|
184
|
+
for decl in iter_named_children(root, "import_declaration"):
|
|
185
|
+
fqn = _fqn_from_import(decl, src)
|
|
186
|
+
if not fqn:
|
|
187
|
+
continue
|
|
188
|
+
line = node_line(decl)
|
|
189
|
+
key = (line, fqn)
|
|
190
|
+
if key in seen:
|
|
191
|
+
continue
|
|
192
|
+
seen.add(key)
|
|
193
|
+
edges.append(ImportEdge(
|
|
194
|
+
from_file=from_path,
|
|
195
|
+
to_module=fqn,
|
|
196
|
+
kind="absolute",
|
|
197
|
+
line=line,
|
|
198
|
+
confidence=1.0,
|
|
199
|
+
))
|
|
200
|
+
|
|
201
|
+
edges.sort(key=lambda e: (e.line, e.to_module, e.kind))
|
|
202
|
+
return edges
|
|
203
|
+
|
|
204
|
+
# ------------------------------------------------------------------
|
|
205
|
+
# Structural: symbols
|
|
206
|
+
# ------------------------------------------------------------------
|
|
207
|
+
|
|
208
|
+
def extract_symbols(self, content: str, path: Path) -> list[SymbolDef]:
|
|
209
|
+
"""Return one SymbolDef per top-level type declaration in *content*.
|
|
210
|
+
|
|
211
|
+
Detected kinds:
|
|
212
|
+
class -- ``class`` declarations and ``record`` declarations
|
|
213
|
+
interface -- ``interface`` declarations and ``@interface`` (annotation)
|
|
214
|
+
enum -- ``enum`` declarations
|
|
215
|
+
|
|
216
|
+
Visibility:
|
|
217
|
+
- ``"public"`` if the declaration has a ``public`` modifier.
|
|
218
|
+
- ``"module"`` otherwise (package-private default).
|
|
219
|
+
|
|
220
|
+
Inner types are NOT emitted: tree-sitter nests them inside a
|
|
221
|
+
``class_body`` so they do not appear as direct children of ``program``.
|
|
222
|
+
"""
|
|
223
|
+
_log.debug("extract_symbols (tree-sitter): %s (%d chars)", path, len(content))
|
|
224
|
+
src: bytes = content.encode("utf-8", errors="replace")
|
|
225
|
+
root = parse_bytes(_LANGUAGE, src)
|
|
226
|
+
|
|
227
|
+
syms: list[SymbolDef] = []
|
|
228
|
+
|
|
229
|
+
for node in root.children:
|
|
230
|
+
if not node.is_named:
|
|
231
|
+
continue
|
|
232
|
+
if node.type not in _TYPE_DECL_NODES:
|
|
233
|
+
continue
|
|
234
|
+
|
|
235
|
+
name = _name_from_decl(node, src)
|
|
236
|
+
if not name:
|
|
237
|
+
continue
|
|
238
|
+
|
|
239
|
+
kind = _kind_from_node_type(node.type)
|
|
240
|
+
visibility = _visibility_from_modifiers(node, src)
|
|
241
|
+
syms.append(SymbolDef(
|
|
242
|
+
name=name,
|
|
243
|
+
kind=kind,
|
|
244
|
+
line=node_line(node),
|
|
245
|
+
visibility=visibility,
|
|
246
|
+
confidence=1.0,
|
|
247
|
+
))
|
|
248
|
+
|
|
249
|
+
syms.sort(key=lambda s: (s.line, s.name))
|
|
250
|
+
return syms
|
|
251
|
+
|
|
252
|
+
# ------------------------------------------------------------------
|
|
253
|
+
# Contracts: class, record, interface, enum type declarations
|
|
254
|
+
# ------------------------------------------------------------------
|
|
255
|
+
|
|
256
|
+
def extract_contracts(self, content: str, path: Path) -> list[ContractCandidate]:
|
|
257
|
+
"""Return one ContractCandidate per top-level declared type.
|
|
258
|
+
|
|
259
|
+
Handled forms:
|
|
260
|
+
``public class X { ... }`` → contract_kind="class"
|
|
261
|
+
``public record X(...) { }`` → contract_kind="record" (Java 16+)
|
|
262
|
+
``public interface X { ... }`` → contract_kind="interface"
|
|
263
|
+
``public enum X { ... }`` → contract_kind="enum"
|
|
264
|
+
|
|
265
|
+
Top-level types only: inner types are nested inside a ``class_body``
|
|
266
|
+
so they do not appear as direct children of ``program``.
|
|
267
|
+
|
|
268
|
+
Test files (path name ending with ``Test.java``) return ``[]``.
|
|
269
|
+
|
|
270
|
+
All results carry ``confidence=1.0`` (AST-based extraction).
|
|
271
|
+
Results are sorted by ``(line, name)``.
|
|
272
|
+
"""
|
|
273
|
+
if Path(path).name.endswith("Test.java"):
|
|
274
|
+
return []
|
|
275
|
+
|
|
276
|
+
_log.debug("extract_contracts (tree-sitter): %s (%d chars)", path, len(content))
|
|
277
|
+
src: bytes = content.encode("utf-8", errors="replace")
|
|
278
|
+
root = parse_bytes(_LANGUAGE, src)
|
|
279
|
+
|
|
280
|
+
# Map tree-sitter node type → contract_kind string.
|
|
281
|
+
_CONTRACT_KIND: dict[str, str] = {
|
|
282
|
+
"class_declaration": "class",
|
|
283
|
+
"record_declaration": "record",
|
|
284
|
+
"interface_declaration": "interface",
|
|
285
|
+
"enum_declaration": "enum",
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
candidates: list[ContractCandidate] = []
|
|
289
|
+
|
|
290
|
+
for node in root.children:
|
|
291
|
+
if not node.is_named:
|
|
292
|
+
continue
|
|
293
|
+
contract_kind = _CONTRACT_KIND.get(node.type)
|
|
294
|
+
if contract_kind is None:
|
|
295
|
+
continue
|
|
296
|
+
|
|
297
|
+
name = _name_from_decl(node, src)
|
|
298
|
+
if not name:
|
|
299
|
+
continue
|
|
300
|
+
|
|
301
|
+
candidates.append(ContractCandidate(
|
|
302
|
+
name=name,
|
|
303
|
+
contract_kind=contract_kind,
|
|
304
|
+
line=node_line(node),
|
|
305
|
+
confidence=1.0,
|
|
306
|
+
))
|
|
307
|
+
|
|
308
|
+
candidates.sort(key=lambda c: (c.line, c.name))
|
|
309
|
+
return candidates
|
|
310
|
+
|
|
311
|
+
# ------------------------------------------------------------------
|
|
312
|
+
# Runtime signals: static initializer blocks, Spring stereotypes,
|
|
313
|
+
# thread / executor spawns.
|
|
314
|
+
# ------------------------------------------------------------------
|
|
315
|
+
|
|
316
|
+
#: Spring stereotype annotation names that indicate DI registration.
|
|
317
|
+
_SPRING_STEREOTYPES: frozenset[str] = frozenset({
|
|
318
|
+
"Component", "Service", "Repository", "Configuration",
|
|
319
|
+
"Controller", "RestController",
|
|
320
|
+
})
|
|
321
|
+
|
|
322
|
+
def extract_runtime(self, content: str, path: Path) -> list[TSRuntimeSignal]:
|
|
323
|
+
"""Detect Java import-time and concurrency side effects via tree-sitter AST.
|
|
324
|
+
|
|
325
|
+
Emits TSRuntimeSignal (confidence=1.0) for:
|
|
326
|
+
``static { ... }`` initializer block
|
|
327
|
+
→ kind="static_block", payload={"call": "static_init"}
|
|
328
|
+
class annotated with a Spring stereotype
|
|
329
|
+
(@Component/@Service/@Repository/@Configuration/@Controller/@RestController)
|
|
330
|
+
→ kind="spring_component", payload={"call": <class name>}
|
|
331
|
+
``new Thread(...)`` (object_creation_expression of type Thread)
|
|
332
|
+
→ kind="thread_spawn", payload={"call": "new Thread"}
|
|
333
|
+
``*.submit(...)`` / ``*.execute(...)`` on an executor
|
|
334
|
+
→ kind="thread_spawn", payload={"call": "<receiver>.submit"} etc.
|
|
335
|
+
|
|
336
|
+
Test files (path name ending with ``Test.java``) return ``[]``.
|
|
337
|
+
Results are sorted by ``(line, kind)``.
|
|
338
|
+
"""
|
|
339
|
+
if Path(path).name.endswith("Test.java"):
|
|
340
|
+
return []
|
|
341
|
+
|
|
342
|
+
_log.debug("extract_runtime (tree-sitter): %s (%d chars)", path, len(content))
|
|
343
|
+
src: bytes = content.encode("utf-8", errors="replace")
|
|
344
|
+
root = parse_bytes(_LANGUAGE, src)
|
|
345
|
+
file_posix = Path(path).as_posix()
|
|
346
|
+
|
|
347
|
+
signals: list[TSRuntimeSignal] = []
|
|
348
|
+
|
|
349
|
+
# ------------------------------------------------------------------
|
|
350
|
+
# Pass 1: top-level class_declaration nodes.
|
|
351
|
+
# Check modifiers for static_initializer blocks and Spring annotations.
|
|
352
|
+
# ------------------------------------------------------------------
|
|
353
|
+
for node in root.children:
|
|
354
|
+
if not node.is_named or node.type != "class_declaration":
|
|
355
|
+
continue
|
|
356
|
+
|
|
357
|
+
class_name = _name_from_decl(node, src)
|
|
358
|
+
|
|
359
|
+
# --- Spring stereotype detection ---
|
|
360
|
+
# Annotations live in the modifiers child; each annotation is either
|
|
361
|
+
# a marker_annotation (no args) or annotation (with args).
|
|
362
|
+
for child in node.children:
|
|
363
|
+
if child.type != "modifiers":
|
|
364
|
+
continue
|
|
365
|
+
for mod in child.children:
|
|
366
|
+
if mod.type not in ("marker_annotation", "annotation"):
|
|
367
|
+
continue
|
|
368
|
+
# The first named child of an annotation node is the identifier.
|
|
369
|
+
for id_child in mod.children:
|
|
370
|
+
if id_child.is_named and id_child.type == "identifier":
|
|
371
|
+
ann_name = node_text(id_child, src)
|
|
372
|
+
if ann_name in self._SPRING_STEREOTYPES:
|
|
373
|
+
signals.append(TSRuntimeSignal(
|
|
374
|
+
kind="spring_component",
|
|
375
|
+
file=file_posix,
|
|
376
|
+
line=node_line(node),
|
|
377
|
+
confidence=1.0,
|
|
378
|
+
payload={"call": class_name},
|
|
379
|
+
))
|
|
380
|
+
break # only first identifier per annotation node
|
|
381
|
+
|
|
382
|
+
# --- static_initializer detection inside class_body ---
|
|
383
|
+
for child in node.children:
|
|
384
|
+
if child.type != "class_body":
|
|
385
|
+
continue
|
|
386
|
+
for body_child in child.children:
|
|
387
|
+
if not body_child.is_named or body_child.type != "static_initializer":
|
|
388
|
+
continue
|
|
389
|
+
signals.append(TSRuntimeSignal(
|
|
390
|
+
kind="static_block",
|
|
391
|
+
file=file_posix,
|
|
392
|
+
line=node_line(body_child),
|
|
393
|
+
confidence=1.0,
|
|
394
|
+
payload={"call": "static_init"},
|
|
395
|
+
))
|
|
396
|
+
|
|
397
|
+
# ------------------------------------------------------------------
|
|
398
|
+
# Pass 2: walk entire tree for thread/executor spawn patterns.
|
|
399
|
+
# ------------------------------------------------------------------
|
|
400
|
+
|
|
401
|
+
# new Thread(...)
|
|
402
|
+
for creation in walk_named(root, "object_creation_expression"):
|
|
403
|
+
type_node = creation.child_by_field_name("type")
|
|
404
|
+
if type_node is None:
|
|
405
|
+
continue
|
|
406
|
+
if node_text(type_node, src) == "Thread":
|
|
407
|
+
signals.append(TSRuntimeSignal(
|
|
408
|
+
kind="thread_spawn",
|
|
409
|
+
file=file_posix,
|
|
410
|
+
line=node_line(creation),
|
|
411
|
+
confidence=1.0,
|
|
412
|
+
payload={"call": "new Thread"},
|
|
413
|
+
))
|
|
414
|
+
|
|
415
|
+
# *.submit(...) / *.execute(...)
|
|
416
|
+
for call in walk_named(root, "method_invocation"):
|
|
417
|
+
name_node = call.child_by_field_name("name")
|
|
418
|
+
obj_node = call.child_by_field_name("object")
|
|
419
|
+
if name_node is None or obj_node is None:
|
|
420
|
+
continue
|
|
421
|
+
method = node_text(name_node, src)
|
|
422
|
+
if method in ("submit", "execute"):
|
|
423
|
+
receiver = node_text(obj_node, src)
|
|
424
|
+
signals.append(TSRuntimeSignal(
|
|
425
|
+
kind="thread_spawn",
|
|
426
|
+
file=file_posix,
|
|
427
|
+
line=node_line(call),
|
|
428
|
+
confidence=1.0,
|
|
429
|
+
payload={"call": f"{receiver}.{method}"},
|
|
430
|
+
))
|
|
431
|
+
|
|
432
|
+
signals.sort(key=lambda s: (s.line, s.kind))
|
|
433
|
+
return signals
|
|
434
|
+
|
|
435
|
+
# ------------------------------------------------------------------
|
|
436
|
+
# Authority writes
|
|
437
|
+
# ------------------------------------------------------------------
|
|
438
|
+
|
|
439
|
+
def extract_writer_calls(
|
|
440
|
+
self, content: str, path: Path
|
|
441
|
+
) -> list[AuthorityWriteCandidate]:
|
|
442
|
+
"""Detect write operations in Java source via tree-sitter AST.
|
|
443
|
+
|
|
444
|
+
Walks ``method_invocation`` nodes and ``object_creation_expression``
|
|
445
|
+
nodes to match writer patterns:
|
|
446
|
+
|
|
447
|
+
``method_invocation`` (object.name(args)):
|
|
448
|
+
- ``Files.write(...)`` / ``Files.writeString(...)`` (java.nio)
|
|
449
|
+
→ ``write_kind="fs_write"``, target_hint = first arg text
|
|
450
|
+
- ``*.write(...)`` / ``*.append(...)`` (any receiver, writer/stream)
|
|
451
|
+
→ ``write_kind="fs_write"``, target_hint = receiver (object) text
|
|
452
|
+
- ``*.save(...)`` / ``*.persist(...)`` (JPA/Spring repo)
|
|
453
|
+
→ ``write_kind="orm_save"``, target_hint = receiver text
|
|
454
|
+
|
|
455
|
+
``object_creation_expression`` (new Type(args)):
|
|
456
|
+
- ``new FileWriter(...)`` / ``new FileOutputStream(...)``
|
|
457
|
+
→ ``write_kind="fs_write"``, target_hint = first arg text
|
|
458
|
+
|
|
459
|
+
Test files (path name ending with ``Test.java``) return ``[]``.
|
|
460
|
+
All results carry ``confidence=1.0``.
|
|
461
|
+
Results are sorted by ``(line, write_kind)``.
|
|
462
|
+
"""
|
|
463
|
+
if Path(path).name.endswith("Test.java"):
|
|
464
|
+
return []
|
|
465
|
+
|
|
466
|
+
_log.debug("extract_writer_calls (tree-sitter): %s (%d chars)", path, len(content))
|
|
467
|
+
src: bytes = content.encode("utf-8", errors="replace")
|
|
468
|
+
root = parse_bytes(_LANGUAGE, src)
|
|
469
|
+
|
|
470
|
+
candidates: list[AuthorityWriteCandidate] = []
|
|
471
|
+
|
|
472
|
+
def _hint(text: str) -> str:
|
|
473
|
+
"""Strip surrounding quotes and cap at 30 chars."""
|
|
474
|
+
t = text.strip().strip('"\'').strip()
|
|
475
|
+
return t[:30]
|
|
476
|
+
|
|
477
|
+
def _first_arg_text(args_node) -> str:
|
|
478
|
+
"""Return the text of the first argument from an argument_list node."""
|
|
479
|
+
if args_node is None:
|
|
480
|
+
return ""
|
|
481
|
+
named = [c for c in args_node.children if c.is_named]
|
|
482
|
+
return node_text(named[0], src) if named else ""
|
|
483
|
+
|
|
484
|
+
# --- method_invocation: object.method(args) ---
|
|
485
|
+
for call in walk_named(root, "method_invocation"):
|
|
486
|
+
obj = call.child_by_field_name("object")
|
|
487
|
+
name_node = call.child_by_field_name("name")
|
|
488
|
+
args = call.child_by_field_name("arguments")
|
|
489
|
+
if obj is None or name_node is None:
|
|
490
|
+
continue
|
|
491
|
+
|
|
492
|
+
receiver = node_text(obj, src)
|
|
493
|
+
method = node_text(name_node, src)
|
|
494
|
+
line = node_line(call)
|
|
495
|
+
|
|
496
|
+
# Files.write / Files.writeString (java.nio)
|
|
497
|
+
if receiver == "Files" and method in ("write", "writeString"):
|
|
498
|
+
candidates.append(AuthorityWriteCandidate(
|
|
499
|
+
write_kind="fs_write",
|
|
500
|
+
target_hint=_hint(_first_arg_text(args)),
|
|
501
|
+
line=line,
|
|
502
|
+
confidence=1.0,
|
|
503
|
+
))
|
|
504
|
+
|
|
505
|
+
# *.write / *.append (any other receiver — stream/writer)
|
|
506
|
+
elif method in ("write", "append") and receiver != "Files":
|
|
507
|
+
candidates.append(AuthorityWriteCandidate(
|
|
508
|
+
write_kind="fs_write",
|
|
509
|
+
target_hint=_hint(receiver),
|
|
510
|
+
line=line,
|
|
511
|
+
confidence=1.0,
|
|
512
|
+
))
|
|
513
|
+
|
|
514
|
+
# *.save / *.persist (JPA/Spring)
|
|
515
|
+
elif method in ("save", "persist"):
|
|
516
|
+
candidates.append(AuthorityWriteCandidate(
|
|
517
|
+
write_kind="orm_save",
|
|
518
|
+
target_hint=_hint(receiver),
|
|
519
|
+
line=line,
|
|
520
|
+
confidence=1.0,
|
|
521
|
+
))
|
|
522
|
+
|
|
523
|
+
# --- object_creation_expression: new Type(args) ---
|
|
524
|
+
_WRITER_TYPES = frozenset({"FileWriter", "FileOutputStream"})
|
|
525
|
+
for creation in walk_named(root, "object_creation_expression"):
|
|
526
|
+
type_node = creation.child_by_field_name("type")
|
|
527
|
+
args = creation.child_by_field_name("arguments")
|
|
528
|
+
if type_node is None:
|
|
529
|
+
continue
|
|
530
|
+
type_name = node_text(type_node, src)
|
|
531
|
+
if type_name not in _WRITER_TYPES:
|
|
532
|
+
continue
|
|
533
|
+
candidates.append(AuthorityWriteCandidate(
|
|
534
|
+
write_kind="fs_write",
|
|
535
|
+
target_hint=_hint(_first_arg_text(args)),
|
|
536
|
+
line=node_line(creation),
|
|
537
|
+
confidence=1.0,
|
|
538
|
+
))
|
|
539
|
+
|
|
540
|
+
candidates.sort(key=lambda c: (c.line, c.write_kind))
|
|
541
|
+
return candidates
|