watchllm-kernel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- watchllm_kernel/__init__.py +23 -0
- watchllm_kernel/__main__.py +4 -0
- watchllm_kernel/cli.py +214 -0
- watchllm_kernel/config_loader.py +43 -0
- watchllm_kernel/engine.py +115 -0
- watchllm_kernel/models.py +100 -0
- watchllm_kernel/parser.py +128 -0
- watchllm_kernel/reporting.py +186 -0
- watchllm_kernel/rules/__init__.py +17 -0
- watchllm_kernel/rules/_ast_utils.py +96 -0
- watchllm_kernel/rules/auth_flow.py +300 -0
- watchllm_kernel/rules/boundary.py +231 -0
- watchllm_kernel/rules/forbidden_imports.py +202 -0
- watchllm_kernel/rules/secrets.py +190 -0
- watchllm_kernel-0.1.0.dist-info/METADATA +138 -0
- watchllm_kernel-0.1.0.dist-info/RECORD +19 -0
- watchllm_kernel-0.1.0.dist-info/WHEEL +5 -0
- watchllm_kernel-0.1.0.dist-info/entry_points.txt +2 -0
- watchllm_kernel-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
|
2
|
+
|
|
3
|
+
from watchllm_kernel.models import (
|
|
4
|
+
Decision,
|
|
5
|
+
KernelResult,
|
|
6
|
+
Rule,
|
|
7
|
+
RuleDecision,
|
|
8
|
+
RuleResult,
|
|
9
|
+
Severity,
|
|
10
|
+
SourceLocation,
|
|
11
|
+
Violation,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"Decision",
|
|
16
|
+
"KernelResult",
|
|
17
|
+
"Rule",
|
|
18
|
+
"RuleDecision",
|
|
19
|
+
"RuleResult",
|
|
20
|
+
"Severity",
|
|
21
|
+
"SourceLocation",
|
|
22
|
+
"Violation",
|
|
23
|
+
]
|
watchllm_kernel/cli.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import dataclasses
|
|
3
|
+
import enum
|
|
4
|
+
import json
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from watchllm_kernel.engine import ENFORCE_MODE, SHADOW_MODE, evaluate_source
|
|
10
|
+
from watchllm_kernel.models import Decision
|
|
11
|
+
from watchllm_kernel.reporting import format_human_report, write_block_log
|
|
12
|
+
from watchllm_kernel.rules.auth_flow import AuthFlowRule
|
|
13
|
+
from watchllm_kernel.rules.boundary import BoundaryRule
|
|
14
|
+
from watchllm_kernel.rules.forbidden_imports import ForbiddenImportRule
|
|
15
|
+
from watchllm_kernel.config_loader import load_config
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# ---------------------------------------------------------------------------
|
|
19
|
+
# Helpers
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _to_jsonable(obj: Any) -> Any:
|
|
24
|
+
"""Convert dataclasses and enums to JSON‑serialisable primitives."""
|
|
25
|
+
if dataclasses.is_dataclass(obj):
|
|
26
|
+
return {f.name: _to_jsonable(getattr(obj, f.name)) for f in dataclasses.fields(obj)}
|
|
27
|
+
if isinstance(obj, enum.Enum):
|
|
28
|
+
return obj.value
|
|
29
|
+
if isinstance(obj, list):
|
|
30
|
+
return [_to_jsonable(item) for item in obj]
|
|
31
|
+
return obj
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def build_default_rules(config: dict | None = None):
|
|
35
|
+
"""Return the default rule set for the kernel CLI, applying overrides from config.
|
|
36
|
+
"""
|
|
37
|
+
config = config or {}
|
|
38
|
+
rules = []
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
from watchllm_kernel.rules.secrets import SecretLiteralRule
|
|
42
|
+
except ModuleNotFoundError:
|
|
43
|
+
SecretLiteralRule = None
|
|
44
|
+
|
|
45
|
+
if SecretLiteralRule is not None:
|
|
46
|
+
secrets_cfg = config.get("rules", {}).get("secrets", {})
|
|
47
|
+
if secrets_cfg.get("enabled", True):
|
|
48
|
+
rules.append(SecretLiteralRule())
|
|
49
|
+
|
|
50
|
+
# Build Forbidden Import Rule
|
|
51
|
+
fi_cfg = config.get("rules", {}).get("forbidden_imports", {})
|
|
52
|
+
if fi_cfg.get("enabled", True):
|
|
53
|
+
rules.append(ForbiddenImportRule(
|
|
54
|
+
forbidden_modules=fi_cfg.get("modules"),
|
|
55
|
+
forbidden_prefixes=fi_cfg.get("forbidden_prefixes"),
|
|
56
|
+
allowed_relative_prefixes=fi_cfg.get("allowed_relative_prefixes")
|
|
57
|
+
))
|
|
58
|
+
|
|
59
|
+
# Build Boundary Rule
|
|
60
|
+
boundary_cfg = config.get("rules", {}).get("boundary", {})
|
|
61
|
+
if boundary_cfg.get("enabled", True):
|
|
62
|
+
rules.append(BoundaryRule(
|
|
63
|
+
boundary_map=boundary_cfg.get("map")
|
|
64
|
+
))
|
|
65
|
+
|
|
66
|
+
# Build Auth Flow Rule
|
|
67
|
+
auth_cfg = config.get("rules", {}).get("auth_flow", {})
|
|
68
|
+
if auth_cfg.get("enabled", True):
|
|
69
|
+
rules.append(AuthFlowRule())
|
|
70
|
+
|
|
71
|
+
return rules
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
# Argument parser
|
|
76
|
+
# ---------------------------------------------------------------------------
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
80
|
+
parser = argparse.ArgumentParser(
|
|
81
|
+
prog="watchllm-kernel",
|
|
82
|
+
description="Deterministic local write-path governance kernel for autonomous coding agents.",
|
|
83
|
+
)
|
|
84
|
+
parser.add_argument(
|
|
85
|
+
"--version",
|
|
86
|
+
action="version",
|
|
87
|
+
version="%(prog)s 0.1.0",
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
sub = parser.add_subparsers(dest="command", help="sub-command")
|
|
91
|
+
|
|
92
|
+
# check
|
|
93
|
+
check_parser = sub.add_parser("check", help="Check source against rules")
|
|
94
|
+
check_parser.add_argument(
|
|
95
|
+
"--stdin",
|
|
96
|
+
action="store_true",
|
|
97
|
+
help="Read source from stdin instead of a file path",
|
|
98
|
+
)
|
|
99
|
+
check_parser.add_argument(
|
|
100
|
+
"--filepath",
|
|
101
|
+
default=None,
|
|
102
|
+
help="Path to source file (ignored when --stdin is used)",
|
|
103
|
+
)
|
|
104
|
+
check_parser.add_argument(
|
|
105
|
+
"--language",
|
|
106
|
+
choices=["js", "ts"],
|
|
107
|
+
default=None,
|
|
108
|
+
help="Language identifier (js or ts). Inferred from file extension when omitted.",
|
|
109
|
+
)
|
|
110
|
+
check_parser.add_argument(
|
|
111
|
+
"--mode",
|
|
112
|
+
choices=[ENFORCE_MODE, SHADOW_MODE],
|
|
113
|
+
default=ENFORCE_MODE,
|
|
114
|
+
help="Evaluation mode (default: enforce)",
|
|
115
|
+
)
|
|
116
|
+
check_parser.add_argument(
|
|
117
|
+
"--json",
|
|
118
|
+
action="store_true",
|
|
119
|
+
help="Output result as JSON",
|
|
120
|
+
)
|
|
121
|
+
return parser
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
# ---------------------------------------------------------------------------
|
|
125
|
+
# Language resolution
|
|
126
|
+
# ---------------------------------------------------------------------------
|
|
127
|
+
|
|
128
|
+
_LANGUAGE_SHORT_MAP = {
|
|
129
|
+
"js": "javascript",
|
|
130
|
+
"ts": "typescript",
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _resolve_language(language: str | None, file_path: str | None) -> str | None:
|
|
135
|
+
if language and language in _LANGUAGE_SHORT_MAP:
|
|
136
|
+
return _LANGUAGE_SHORT_MAP[language]
|
|
137
|
+
if language:
|
|
138
|
+
return language
|
|
139
|
+
if file_path is None:
|
|
140
|
+
return None
|
|
141
|
+
suffix = Path(file_path).suffix.lower()
|
|
142
|
+
if suffix in (".ts", ".tsx"):
|
|
143
|
+
return "typescript"
|
|
144
|
+
if suffix in (".js", ".jsx", ".mjs", ".cjs"):
|
|
145
|
+
return "javascript"
|
|
146
|
+
return None
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# ---------------------------------------------------------------------------
|
|
150
|
+
# Main
|
|
151
|
+
# ---------------------------------------------------------------------------
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def main(argv: list[str] | None = None) -> int:
|
|
155
|
+
parser = build_parser()
|
|
156
|
+
if argv is None:
|
|
157
|
+
argv = sys.argv[1:]
|
|
158
|
+
|
|
159
|
+
if not argv:
|
|
160
|
+
parser.print_help()
|
|
161
|
+
return 0
|
|
162
|
+
|
|
163
|
+
args = parser.parse_args(argv)
|
|
164
|
+
|
|
165
|
+
if args.command != "check":
|
|
166
|
+
parser.print_help()
|
|
167
|
+
return 0
|
|
168
|
+
|
|
169
|
+
# --- read source ---
|
|
170
|
+
if args.stdin:
|
|
171
|
+
source = sys.stdin.read()
|
|
172
|
+
file_path = None
|
|
173
|
+
else:
|
|
174
|
+
if args.filepath is None:
|
|
175
|
+
print("Error: either --stdin or --filepath is required", file=sys.stderr)
|
|
176
|
+
return 2
|
|
177
|
+
file_path = args.filepath
|
|
178
|
+
try:
|
|
179
|
+
source = Path(file_path).read_text(encoding="utf-8")
|
|
180
|
+
except Exception as exc:
|
|
181
|
+
print(f"Error reading file {file_path}: {exc}", file=sys.stderr)
|
|
182
|
+
return 2
|
|
183
|
+
|
|
184
|
+
language = _resolve_language(args.language, file_path)
|
|
185
|
+
|
|
186
|
+
# --- load config ---
|
|
187
|
+
start_path = str(Path(file_path).parent) if file_path else "."
|
|
188
|
+
config = load_config(start_path=start_path)
|
|
189
|
+
|
|
190
|
+
# --- evaluate ---
|
|
191
|
+
rules = build_default_rules(config=config)
|
|
192
|
+
result = evaluate_source(
|
|
193
|
+
source,
|
|
194
|
+
file_path=file_path,
|
|
195
|
+
language=language,
|
|
196
|
+
rules=rules,
|
|
197
|
+
mode=args.mode,
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# --- local blocked-event logging ---
|
|
201
|
+
write_block_log(result)
|
|
202
|
+
|
|
203
|
+
# --- output ---
|
|
204
|
+
if args.json:
|
|
205
|
+
payload = _to_jsonable(result)
|
|
206
|
+
json.dump(payload, sys.stdout, indent=2)
|
|
207
|
+
sys.stdout.write("\n")
|
|
208
|
+
else:
|
|
209
|
+
print(format_human_report(result))
|
|
210
|
+
|
|
211
|
+
# exit code
|
|
212
|
+
if result.decision == Decision.BLOCK:
|
|
213
|
+
return 1
|
|
214
|
+
return 0
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import yaml
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
|
+
|
|
5
|
+
CONFIG_FILENAME = ".watchllm.yaml"
|
|
6
|
+
|
|
7
|
+
def find_config(start_path: str = ".") -> Optional[str]:
|
|
8
|
+
"""Search for .watchllm.yaml starting from start_path and moving upwards."""
|
|
9
|
+
current_path = os.path.abspath(start_path)
|
|
10
|
+
|
|
11
|
+
while True:
|
|
12
|
+
potential_config = os.path.join(current_path, CONFIG_FILENAME)
|
|
13
|
+
if os.path.isfile(potential_config):
|
|
14
|
+
return potential_config
|
|
15
|
+
|
|
16
|
+
parent = os.path.dirname(current_path)
|
|
17
|
+
if parent == current_path:
|
|
18
|
+
break
|
|
19
|
+
current_path = parent
|
|
20
|
+
|
|
21
|
+
return None
|
|
22
|
+
|
|
23
|
+
def load_config(config_path: Optional[str] = None, start_path: str = ".") -> Dict[str, Any]:
|
|
24
|
+
"""Load the WatchLLM configuration from the given path or auto-discover it."""
|
|
25
|
+
if not config_path:
|
|
26
|
+
config_path = find_config(start_path)
|
|
27
|
+
|
|
28
|
+
if not config_path or not os.path.isfile(config_path):
|
|
29
|
+
return {}
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
with open(config_path, "r", encoding="utf-8") as f:
|
|
33
|
+
config = yaml.safe_load(f)
|
|
34
|
+
return config if config else {}
|
|
35
|
+
except Exception as e:
|
|
36
|
+
print(f"Warning: Failed to load config from {config_path}: {e}")
|
|
37
|
+
return {}
|
|
38
|
+
|
|
39
|
+
def get_rule_config(config: Dict[str, Any], rule_name: str) -> Dict[str, Any]:
|
|
40
|
+
"""Extract configuration for a specific rule."""
|
|
41
|
+
if not config or "rules" not in config or not config["rules"]:
|
|
42
|
+
return {}
|
|
43
|
+
return config["rules"].get(rule_name, {})
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""Deterministic decision engine for the WatchLLM kernel.
|
|
2
|
+
|
|
3
|
+
Combines rule results into a single kernel-level decision.
|
|
4
|
+
|
|
5
|
+
The engine now parses source **once** via ``parser.parse_source`` and
|
|
6
|
+
passes the resulting ``ParseResult`` to every rule, eliminating the
|
|
7
|
+
redundant per-rule parsing that existed previously.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
from watchllm_kernel.models import Decision, KernelResult, Rule, RuleDecision, RuleResult, Violation
|
|
15
|
+
from watchllm_kernel.parser import parse_source
|
|
16
|
+
from watchllm_kernel.rules._ast_utils import infer_language_from_path
|
|
17
|
+
|
|
18
|
+
# ---------------------------------------------------------------------------
|
|
19
|
+
# Mode constants
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
ENFORCE_MODE = "enforce"
|
|
23
|
+
SHADOW_MODE = "shadow"
|
|
24
|
+
VALID_MODES = frozenset({ENFORCE_MODE, SHADOW_MODE})
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
# Helpers
|
|
29
|
+
# ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def has_blocking_failure(rule_results: list[RuleResult]) -> bool:
|
|
33
|
+
"""Return True if any rule result is a FAIL.
|
|
34
|
+
|
|
35
|
+
PASS and INCONCLUSIVE are not considered blocking for Task 10.
|
|
36
|
+
"""
|
|
37
|
+
return any(rr.status == RuleDecision.FAIL for rr in rule_results)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def collect_violations(rule_results: list[RuleResult]) -> list[Violation]:
|
|
41
|
+
"""Return a flat list of all violations from *rule_results*.
|
|
42
|
+
|
|
43
|
+
Preserves rule execution order and violation order inside each rule.
|
|
44
|
+
"""
|
|
45
|
+
violations: list[Violation] = []
|
|
46
|
+
for rr in rule_results:
|
|
47
|
+
violations.extend(rr.violations)
|
|
48
|
+
return violations
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def reduce_decision(
|
|
52
|
+
rule_results: list[RuleResult], mode: str = ENFORCE_MODE
|
|
53
|
+
) -> Decision:
|
|
54
|
+
"""Reduce a list of rule results into a single kernel decision.
|
|
55
|
+
|
|
56
|
+
Raises ValueError if *mode* is not a recognised mode.
|
|
57
|
+
"""
|
|
58
|
+
if mode not in VALID_MODES:
|
|
59
|
+
raise ValueError(
|
|
60
|
+
f"Unknown mode {mode!r}. Valid modes: {sorted(VALID_MODES)}"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
if mode == SHADOW_MODE:
|
|
64
|
+
return Decision.ALLOW
|
|
65
|
+
|
|
66
|
+
# enforce mode
|
|
67
|
+
if has_blocking_failure(rule_results):
|
|
68
|
+
return Decision.BLOCK
|
|
69
|
+
return Decision.ALLOW
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# ---------------------------------------------------------------------------
|
|
73
|
+
# Evaluator
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def evaluate_source(
|
|
78
|
+
source: str,
|
|
79
|
+
*,
|
|
80
|
+
file_path: str | None = None,
|
|
81
|
+
language: str | None = None,
|
|
82
|
+
rules: list[Rule] | tuple[Rule, ...],
|
|
83
|
+
mode: str = ENFORCE_MODE,
|
|
84
|
+
) -> KernelResult:
|
|
85
|
+
"""Run *rules* against *source* and return a coherent KernelResult.
|
|
86
|
+
|
|
87
|
+
The engine parses the source **once** and shares the ``ParseResult``
|
|
88
|
+
with every rule, eliminating redundant tree-sitter work.
|
|
89
|
+
|
|
90
|
+
Rules are evaluated in the given order. Exceptions are not caught in
|
|
91
|
+
Task 10 – failing fast is acceptable for now.
|
|
92
|
+
"""
|
|
93
|
+
if mode not in VALID_MODES:
|
|
94
|
+
raise ValueError(
|
|
95
|
+
f"Unknown mode {mode!r}. Valid modes: {sorted(VALID_MODES)}"
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Parse once — infer language from file_path if not given explicitly.
|
|
99
|
+
resolved_language = language or infer_language_from_path(file_path)
|
|
100
|
+
parse_result = parse_source(source, language=resolved_language, file_path=file_path)
|
|
101
|
+
|
|
102
|
+
rule_results: list[RuleResult] = []
|
|
103
|
+
for rule in rules:
|
|
104
|
+
result = rule.evaluate(source, file_path=file_path, parse_result=parse_result)
|
|
105
|
+
rule_results.append(result)
|
|
106
|
+
|
|
107
|
+
decision = reduce_decision(rule_results, mode=mode)
|
|
108
|
+
|
|
109
|
+
return KernelResult(
|
|
110
|
+
decision=decision,
|
|
111
|
+
rule_results=rule_results,
|
|
112
|
+
file_path=file_path,
|
|
113
|
+
language=resolved_language,
|
|
114
|
+
mode=mode,
|
|
115
|
+
)
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import dataclasses
|
|
4
|
+
import enum
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from watchllm_kernel.parser import ParseResult
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Decision(enum.Enum):
|
|
12
|
+
"""Top-level decision returned by the kernel."""
|
|
13
|
+
ALLOW = "ALLOW"
|
|
14
|
+
BLOCK = "BLOCK"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class RuleDecision(enum.Enum):
|
|
18
|
+
"""Per-rule decision."""
|
|
19
|
+
PASS = "PASS"
|
|
20
|
+
FAIL = "FAIL"
|
|
21
|
+
INCONCLUSIVE = "INCONCLUSIVE"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Severity(enum.Enum):
|
|
25
|
+
"""Severity of a rule violation."""
|
|
26
|
+
CRITICAL = "CRITICAL"
|
|
27
|
+
HIGH = "HIGH"
|
|
28
|
+
MEDIUM = "MEDIUM"
|
|
29
|
+
LOW = "LOW"
|
|
30
|
+
INFO = "INFO"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclasses.dataclass
|
|
34
|
+
class SourceLocation:
|
|
35
|
+
"""Location span in source code."""
|
|
36
|
+
line: int
|
|
37
|
+
column: int
|
|
38
|
+
end_line: Optional[int] = None
|
|
39
|
+
end_column: Optional[int] = None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclasses.dataclass
|
|
43
|
+
class Violation:
|
|
44
|
+
"""A single rule violation."""
|
|
45
|
+
rule_id: str
|
|
46
|
+
message: str
|
|
47
|
+
location: Optional[SourceLocation] = None
|
|
48
|
+
severity: Severity = Severity.HIGH
|
|
49
|
+
evidence: Optional[str] = None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclasses.dataclass
|
|
53
|
+
class RuleResult:
|
|
54
|
+
"""Result of evaluating a single rule."""
|
|
55
|
+
rule_id: str
|
|
56
|
+
status: RuleDecision
|
|
57
|
+
violations: list[Violation] = dataclasses.field(default_factory=list)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclasses.dataclass
|
|
61
|
+
class KernelResult:
|
|
62
|
+
"""Aggregated result from the kernel."""
|
|
63
|
+
decision: Decision
|
|
64
|
+
rule_results: list[RuleResult] = dataclasses.field(default_factory=list)
|
|
65
|
+
file_path: Optional[str] = None
|
|
66
|
+
language: Optional[str] = None
|
|
67
|
+
mode: str = "enforce"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class Rule:
|
|
71
|
+
"""Abstract base for a deterministic rule.
|
|
72
|
+
|
|
73
|
+
Every rule must implement ``evaluate`` and return a ``RuleResult``.
|
|
74
|
+
|
|
75
|
+
Parameters passed to ``evaluate``:
|
|
76
|
+
source: The raw source text.
|
|
77
|
+
file_path: Optional path of the file being evaluated.
|
|
78
|
+
parse_result: Optional pre-parsed ``ParseResult`` from the engine.
|
|
79
|
+
When provided, rules should use it instead of parsing
|
|
80
|
+
the source again. Rules must still work correctly
|
|
81
|
+
when ``parse_result`` is ``None``.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
def __init__(self, rule_id: str, name: str, description: str = ""):
|
|
85
|
+
self.rule_id = rule_id
|
|
86
|
+
self.name = name
|
|
87
|
+
self.description = description
|
|
88
|
+
|
|
89
|
+
def evaluate(
|
|
90
|
+
self,
|
|
91
|
+
source: str,
|
|
92
|
+
file_path: Optional[str] = None,
|
|
93
|
+
parse_result: Optional[ParseResult] = None,
|
|
94
|
+
) -> RuleResult:
|
|
95
|
+
"""Evaluate the rule against the given source text.
|
|
96
|
+
|
|
97
|
+
Subclasses must override this method.
|
|
98
|
+
"""
|
|
99
|
+
raise NotImplementedError("Subclasses must implement evaluate()")
|
|
100
|
+
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""Parser abstraction for the WatchLLM kernel.
|
|
2
|
+
|
|
3
|
+
Provides a minimal, deterministic interface to Tree-sitter for JavaScript
|
|
4
|
+
and TypeScript source files. The module exposes a single `parse_source`
|
|
5
|
+
function that returns a structured parse result containing the raw
|
|
6
|
+
Tree-sitter tree and a convenience traversal helper.
|
|
7
|
+
|
|
8
|
+
This module is intentionally narrow: it does not perform rule evaluation,
|
|
9
|
+
enforcement, or any decision-making. It exists solely to give rule
|
|
10
|
+
implementations a stable AST surface.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import dataclasses
|
|
16
|
+
from typing import Any, Optional
|
|
17
|
+
|
|
18
|
+
import tree_sitter_javascript
|
|
19
|
+
import tree_sitter_typescript
|
|
20
|
+
from tree_sitter import Language, Parser, Node, Tree
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
# Language registry
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
|
|
27
|
+
_LANGUAGE_MAP: dict[str, Language] = {
|
|
28
|
+
"javascript": Language(tree_sitter_javascript.language()),
|
|
29
|
+
"typescript": Language(tree_sitter_typescript.language_typescript()),
|
|
30
|
+
"tsx": Language(tree_sitter_typescript.language_tsx()),
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _resolve_language(language: str) -> Language:
|
|
35
|
+
"""Return the Tree-sitter Language for *language*.
|
|
36
|
+
|
|
37
|
+
Raises `ValueError` when the language is not supported.
|
|
38
|
+
"""
|
|
39
|
+
lang = _LANGUAGE_MAP.get(language)
|
|
40
|
+
if lang is None:
|
|
41
|
+
raise ValueError(
|
|
42
|
+
f"Unsupported language '{language}'. "
|
|
43
|
+
f"Supported: {', '.join(sorted(_LANGUAGE_MAP))}"
|
|
44
|
+
)
|
|
45
|
+
return lang
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# ---------------------------------------------------------------------------
|
|
49
|
+
# Parse result
|
|
50
|
+
# ---------------------------------------------------------------------------
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclasses.dataclass
|
|
54
|
+
class ParseResult:
|
|
55
|
+
"""Result of parsing a single source file.
|
|
56
|
+
|
|
57
|
+
Attributes:
|
|
58
|
+
tree: The raw Tree-sitter parse tree. Callers may traverse it
|
|
59
|
+
directly or use the helper methods on this class.
|
|
60
|
+
source: The original source text (bytes).
|
|
61
|
+
language: The language identifier that was used for parsing.
|
|
62
|
+
file_path: Optional path of the file that was parsed.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
tree: Tree
|
|
66
|
+
source: bytes
|
|
67
|
+
language: str
|
|
68
|
+
file_path: Optional[str] = None
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def root_node(self) -> Node:
|
|
72
|
+
"""Convenience accessor for the root syntax node."""
|
|
73
|
+
return self.tree.root_node
|
|
74
|
+
|
|
75
|
+
def walk(self):
|
|
76
|
+
"""Return a Tree-sitter TreeCursor for depth-first traversal."""
|
|
77
|
+
return self.tree.walk()
|
|
78
|
+
|
|
79
|
+
def query(self, query_string: str) -> dict[str, list[Node]]:
|
|
80
|
+
"""Execute a Tree-sitter query against the parse tree.
|
|
81
|
+
|
|
82
|
+
Returns a dictionary mapping capture names to lists of captured
|
|
83
|
+
nodes. Raises ``tree_sitter.QueryError`` if the query is malformed.
|
|
84
|
+
"""
|
|
85
|
+
from tree_sitter import Query as _Query, QueryCursor as _QueryCursor
|
|
86
|
+
lang = _resolve_language(self.language)
|
|
87
|
+
q = _Query(lang, query_string)
|
|
88
|
+
return _QueryCursor(q).captures(self.root_node)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# ---------------------------------------------------------------------------
|
|
92
|
+
# Public API
|
|
93
|
+
# ---------------------------------------------------------------------------
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def parse_source(
|
|
97
|
+
source: str,
|
|
98
|
+
language: str,
|
|
99
|
+
file_path: Optional[str] = None,
|
|
100
|
+
) -> ParseResult:
|
|
101
|
+
"""Parse *source* text using the Tree-sitter grammar for *language*.
|
|
102
|
+
|
|
103
|
+
Parameters:
|
|
104
|
+
source: The source code to parse (a Python string).
|
|
105
|
+
language: One of ``"javascript"``, ``"typescript"``, or ``"tsx"``.
|
|
106
|
+
file_path: Optional path used for reporting; not used for parsing.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
A `ParseResult` wrapping the raw Tree-sitter tree and the original
|
|
110
|
+
source bytes.
|
|
111
|
+
|
|
112
|
+
Raises:
|
|
113
|
+
ValueError: If *language* is not supported.
|
|
114
|
+
tree_sitter.LanguageError: If the grammar cannot be loaded (should
|
|
115
|
+
not happen with the bundled grammars).
|
|
116
|
+
"""
|
|
117
|
+
lang = _resolve_language(language)
|
|
118
|
+
parser = Parser(lang)
|
|
119
|
+
|
|
120
|
+
source_bytes = source.encode("utf-8")
|
|
121
|
+
tree = parser.parse(source_bytes)
|
|
122
|
+
|
|
123
|
+
return ParseResult(
|
|
124
|
+
tree=tree,
|
|
125
|
+
source=source_bytes,
|
|
126
|
+
language=language,
|
|
127
|
+
file_path=file_path,
|
|
128
|
+
)
|