watchllm-kernel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,23 @@
1
+ __version__ = "0.1.0"
2
+
3
+ from watchllm_kernel.models import (
4
+ Decision,
5
+ KernelResult,
6
+ Rule,
7
+ RuleDecision,
8
+ RuleResult,
9
+ Severity,
10
+ SourceLocation,
11
+ Violation,
12
+ )
13
+
14
+ __all__ = [
15
+ "Decision",
16
+ "KernelResult",
17
+ "Rule",
18
+ "RuleDecision",
19
+ "RuleResult",
20
+ "Severity",
21
+ "SourceLocation",
22
+ "Violation",
23
+ ]
@@ -0,0 +1,4 @@
1
+ from .cli import main
2
+
3
+ if __name__ == "__main__":
4
+ raise SystemExit(main())
watchllm_kernel/cli.py ADDED
@@ -0,0 +1,214 @@
1
+ import argparse
2
+ import dataclasses
3
+ import enum
4
+ import json
5
+ import sys
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from watchllm_kernel.engine import ENFORCE_MODE, SHADOW_MODE, evaluate_source
10
+ from watchllm_kernel.models import Decision
11
+ from watchllm_kernel.reporting import format_human_report, write_block_log
12
+ from watchllm_kernel.rules.auth_flow import AuthFlowRule
13
+ from watchllm_kernel.rules.boundary import BoundaryRule
14
+ from watchllm_kernel.rules.forbidden_imports import ForbiddenImportRule
15
+ from watchllm_kernel.config_loader import load_config
16
+
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # Helpers
20
+ # ---------------------------------------------------------------------------
21
+
22
+
23
+ def _to_jsonable(obj: Any) -> Any:
24
+ """Convert dataclasses and enums to JSON‑serialisable primitives."""
25
+ if dataclasses.is_dataclass(obj):
26
+ return {f.name: _to_jsonable(getattr(obj, f.name)) for f in dataclasses.fields(obj)}
27
+ if isinstance(obj, enum.Enum):
28
+ return obj.value
29
+ if isinstance(obj, list):
30
+ return [_to_jsonable(item) for item in obj]
31
+ return obj
32
+
33
+
34
+ def build_default_rules(config: dict | None = None):
35
+ """Return the default rule set for the kernel CLI, applying overrides from config.
36
+ """
37
+ config = config or {}
38
+ rules = []
39
+
40
+ try:
41
+ from watchllm_kernel.rules.secrets import SecretLiteralRule
42
+ except ModuleNotFoundError:
43
+ SecretLiteralRule = None
44
+
45
+ if SecretLiteralRule is not None:
46
+ secrets_cfg = config.get("rules", {}).get("secrets", {})
47
+ if secrets_cfg.get("enabled", True):
48
+ rules.append(SecretLiteralRule())
49
+
50
+ # Build Forbidden Import Rule
51
+ fi_cfg = config.get("rules", {}).get("forbidden_imports", {})
52
+ if fi_cfg.get("enabled", True):
53
+ rules.append(ForbiddenImportRule(
54
+ forbidden_modules=fi_cfg.get("modules"),
55
+ forbidden_prefixes=fi_cfg.get("forbidden_prefixes"),
56
+ allowed_relative_prefixes=fi_cfg.get("allowed_relative_prefixes")
57
+ ))
58
+
59
+ # Build Boundary Rule
60
+ boundary_cfg = config.get("rules", {}).get("boundary", {})
61
+ if boundary_cfg.get("enabled", True):
62
+ rules.append(BoundaryRule(
63
+ boundary_map=boundary_cfg.get("map")
64
+ ))
65
+
66
+ # Build Auth Flow Rule
67
+ auth_cfg = config.get("rules", {}).get("auth_flow", {})
68
+ if auth_cfg.get("enabled", True):
69
+ rules.append(AuthFlowRule())
70
+
71
+ return rules
72
+
73
+
74
+ # ---------------------------------------------------------------------------
75
+ # Argument parser
76
+ # ---------------------------------------------------------------------------
77
+
78
+
79
+ def build_parser() -> argparse.ArgumentParser:
80
+ parser = argparse.ArgumentParser(
81
+ prog="watchllm-kernel",
82
+ description="Deterministic local write-path governance kernel for autonomous coding agents.",
83
+ )
84
+ parser.add_argument(
85
+ "--version",
86
+ action="version",
87
+ version="%(prog)s 0.1.0",
88
+ )
89
+
90
+ sub = parser.add_subparsers(dest="command", help="sub-command")
91
+
92
+ # check
93
+ check_parser = sub.add_parser("check", help="Check source against rules")
94
+ check_parser.add_argument(
95
+ "--stdin",
96
+ action="store_true",
97
+ help="Read source from stdin instead of a file path",
98
+ )
99
+ check_parser.add_argument(
100
+ "--filepath",
101
+ default=None,
102
+ help="Path to source file (ignored when --stdin is used)",
103
+ )
104
+ check_parser.add_argument(
105
+ "--language",
106
+ choices=["js", "ts"],
107
+ default=None,
108
+ help="Language identifier (js or ts). Inferred from file extension when omitted.",
109
+ )
110
+ check_parser.add_argument(
111
+ "--mode",
112
+ choices=[ENFORCE_MODE, SHADOW_MODE],
113
+ default=ENFORCE_MODE,
114
+ help="Evaluation mode (default: enforce)",
115
+ )
116
+ check_parser.add_argument(
117
+ "--json",
118
+ action="store_true",
119
+ help="Output result as JSON",
120
+ )
121
+ return parser
122
+
123
+
124
+ # ---------------------------------------------------------------------------
125
+ # Language resolution
126
+ # ---------------------------------------------------------------------------
127
+
128
+ _LANGUAGE_SHORT_MAP = {
129
+ "js": "javascript",
130
+ "ts": "typescript",
131
+ }
132
+
133
+
134
+ def _resolve_language(language: str | None, file_path: str | None) -> str | None:
135
+ if language and language in _LANGUAGE_SHORT_MAP:
136
+ return _LANGUAGE_SHORT_MAP[language]
137
+ if language:
138
+ return language
139
+ if file_path is None:
140
+ return None
141
+ suffix = Path(file_path).suffix.lower()
142
+ if suffix in (".ts", ".tsx"):
143
+ return "typescript"
144
+ if suffix in (".js", ".jsx", ".mjs", ".cjs"):
145
+ return "javascript"
146
+ return None
147
+
148
+
149
+ # ---------------------------------------------------------------------------
150
+ # Main
151
+ # ---------------------------------------------------------------------------
152
+
153
+
154
+ def main(argv: list[str] | None = None) -> int:
155
+ parser = build_parser()
156
+ if argv is None:
157
+ argv = sys.argv[1:]
158
+
159
+ if not argv:
160
+ parser.print_help()
161
+ return 0
162
+
163
+ args = parser.parse_args(argv)
164
+
165
+ if args.command != "check":
166
+ parser.print_help()
167
+ return 0
168
+
169
+ # --- read source ---
170
+ if args.stdin:
171
+ source = sys.stdin.read()
172
+ file_path = None
173
+ else:
174
+ if args.filepath is None:
175
+ print("Error: either --stdin or --filepath is required", file=sys.stderr)
176
+ return 2
177
+ file_path = args.filepath
178
+ try:
179
+ source = Path(file_path).read_text(encoding="utf-8")
180
+ except Exception as exc:
181
+ print(f"Error reading file {file_path}: {exc}", file=sys.stderr)
182
+ return 2
183
+
184
+ language = _resolve_language(args.language, file_path)
185
+
186
+ # --- load config ---
187
+ start_path = str(Path(file_path).parent) if file_path else "."
188
+ config = load_config(start_path=start_path)
189
+
190
+ # --- evaluate ---
191
+ rules = build_default_rules(config=config)
192
+ result = evaluate_source(
193
+ source,
194
+ file_path=file_path,
195
+ language=language,
196
+ rules=rules,
197
+ mode=args.mode,
198
+ )
199
+
200
+ # --- local blocked-event logging ---
201
+ write_block_log(result)
202
+
203
+ # --- output ---
204
+ if args.json:
205
+ payload = _to_jsonable(result)
206
+ json.dump(payload, sys.stdout, indent=2)
207
+ sys.stdout.write("\n")
208
+ else:
209
+ print(format_human_report(result))
210
+
211
+ # exit code
212
+ if result.decision == Decision.BLOCK:
213
+ return 1
214
+ return 0
@@ -0,0 +1,43 @@
1
+ import os
2
+ import yaml
3
+ from typing import Any, Dict, Optional
4
+
5
+ CONFIG_FILENAME = ".watchllm.yaml"
6
+
7
+ def find_config(start_path: str = ".") -> Optional[str]:
8
+ """Search for .watchllm.yaml starting from start_path and moving upwards."""
9
+ current_path = os.path.abspath(start_path)
10
+
11
+ while True:
12
+ potential_config = os.path.join(current_path, CONFIG_FILENAME)
13
+ if os.path.isfile(potential_config):
14
+ return potential_config
15
+
16
+ parent = os.path.dirname(current_path)
17
+ if parent == current_path:
18
+ break
19
+ current_path = parent
20
+
21
+ return None
22
+
23
+ def load_config(config_path: Optional[str] = None, start_path: str = ".") -> Dict[str, Any]:
24
+ """Load the WatchLLM configuration from the given path or auto-discover it."""
25
+ if not config_path:
26
+ config_path = find_config(start_path)
27
+
28
+ if not config_path or not os.path.isfile(config_path):
29
+ return {}
30
+
31
+ try:
32
+ with open(config_path, "r", encoding="utf-8") as f:
33
+ config = yaml.safe_load(f)
34
+ return config if config else {}
35
+ except Exception as e:
36
+ print(f"Warning: Failed to load config from {config_path}: {e}")
37
+ return {}
38
+
39
+ def get_rule_config(config: Dict[str, Any], rule_name: str) -> Dict[str, Any]:
40
+ """Extract configuration for a specific rule."""
41
+ if not config or "rules" not in config or not config["rules"]:
42
+ return {}
43
+ return config["rules"].get(rule_name, {})
@@ -0,0 +1,115 @@
1
+ """Deterministic decision engine for the WatchLLM kernel.
2
+
3
+ Combines rule results into a single kernel-level decision.
4
+
5
+ The engine now parses source **once** via ``parser.parse_source`` and
6
+ passes the resulting ``ParseResult`` to every rule, eliminating the
7
+ redundant per-rule parsing that existed previously.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from typing import Optional
13
+
14
+ from watchllm_kernel.models import Decision, KernelResult, Rule, RuleDecision, RuleResult, Violation
15
+ from watchllm_kernel.parser import parse_source
16
+ from watchllm_kernel.rules._ast_utils import infer_language_from_path
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # Mode constants
20
+ # ---------------------------------------------------------------------------
21
+
22
+ ENFORCE_MODE = "enforce"
23
+ SHADOW_MODE = "shadow"
24
+ VALID_MODES = frozenset({ENFORCE_MODE, SHADOW_MODE})
25
+
26
+
27
+ # ---------------------------------------------------------------------------
28
+ # Helpers
29
+ # ---------------------------------------------------------------------------
30
+
31
+
32
+ def has_blocking_failure(rule_results: list[RuleResult]) -> bool:
33
+ """Return True if any rule result is a FAIL.
34
+
35
+ PASS and INCONCLUSIVE are not considered blocking for Task 10.
36
+ """
37
+ return any(rr.status == RuleDecision.FAIL for rr in rule_results)
38
+
39
+
40
+ def collect_violations(rule_results: list[RuleResult]) -> list[Violation]:
41
+ """Return a flat list of all violations from *rule_results*.
42
+
43
+ Preserves rule execution order and violation order inside each rule.
44
+ """
45
+ violations: list[Violation] = []
46
+ for rr in rule_results:
47
+ violations.extend(rr.violations)
48
+ return violations
49
+
50
+
51
+ def reduce_decision(
52
+ rule_results: list[RuleResult], mode: str = ENFORCE_MODE
53
+ ) -> Decision:
54
+ """Reduce a list of rule results into a single kernel decision.
55
+
56
+ Raises ValueError if *mode* is not a recognised mode.
57
+ """
58
+ if mode not in VALID_MODES:
59
+ raise ValueError(
60
+ f"Unknown mode {mode!r}. Valid modes: {sorted(VALID_MODES)}"
61
+ )
62
+
63
+ if mode == SHADOW_MODE:
64
+ return Decision.ALLOW
65
+
66
+ # enforce mode
67
+ if has_blocking_failure(rule_results):
68
+ return Decision.BLOCK
69
+ return Decision.ALLOW
70
+
71
+
72
+ # ---------------------------------------------------------------------------
73
+ # Evaluator
74
+ # ---------------------------------------------------------------------------
75
+
76
+
77
+ def evaluate_source(
78
+ source: str,
79
+ *,
80
+ file_path: str | None = None,
81
+ language: str | None = None,
82
+ rules: list[Rule] | tuple[Rule, ...],
83
+ mode: str = ENFORCE_MODE,
84
+ ) -> KernelResult:
85
+ """Run *rules* against *source* and return a coherent KernelResult.
86
+
87
+ The engine parses the source **once** and shares the ``ParseResult``
88
+ with every rule, eliminating redundant tree-sitter work.
89
+
90
+ Rules are evaluated in the given order. Exceptions are not caught in
91
+ Task 10 – failing fast is acceptable for now.
92
+ """
93
+ if mode not in VALID_MODES:
94
+ raise ValueError(
95
+ f"Unknown mode {mode!r}. Valid modes: {sorted(VALID_MODES)}"
96
+ )
97
+
98
+ # Parse once — infer language from file_path if not given explicitly.
99
+ resolved_language = language or infer_language_from_path(file_path)
100
+ parse_result = parse_source(source, language=resolved_language, file_path=file_path)
101
+
102
+ rule_results: list[RuleResult] = []
103
+ for rule in rules:
104
+ result = rule.evaluate(source, file_path=file_path, parse_result=parse_result)
105
+ rule_results.append(result)
106
+
107
+ decision = reduce_decision(rule_results, mode=mode)
108
+
109
+ return KernelResult(
110
+ decision=decision,
111
+ rule_results=rule_results,
112
+ file_path=file_path,
113
+ language=resolved_language,
114
+ mode=mode,
115
+ )
@@ -0,0 +1,100 @@
1
+ from __future__ import annotations
2
+
3
+ import dataclasses
4
+ import enum
5
+ from typing import TYPE_CHECKING, Any, Optional
6
+
7
+ if TYPE_CHECKING:
8
+ from watchllm_kernel.parser import ParseResult
9
+
10
+
11
+ class Decision(enum.Enum):
12
+ """Top-level decision returned by the kernel."""
13
+ ALLOW = "ALLOW"
14
+ BLOCK = "BLOCK"
15
+
16
+
17
+ class RuleDecision(enum.Enum):
18
+ """Per-rule decision."""
19
+ PASS = "PASS"
20
+ FAIL = "FAIL"
21
+ INCONCLUSIVE = "INCONCLUSIVE"
22
+
23
+
24
+ class Severity(enum.Enum):
25
+ """Severity of a rule violation."""
26
+ CRITICAL = "CRITICAL"
27
+ HIGH = "HIGH"
28
+ MEDIUM = "MEDIUM"
29
+ LOW = "LOW"
30
+ INFO = "INFO"
31
+
32
+
33
+ @dataclasses.dataclass
34
+ class SourceLocation:
35
+ """Location span in source code."""
36
+ line: int
37
+ column: int
38
+ end_line: Optional[int] = None
39
+ end_column: Optional[int] = None
40
+
41
+
42
+ @dataclasses.dataclass
43
+ class Violation:
44
+ """A single rule violation."""
45
+ rule_id: str
46
+ message: str
47
+ location: Optional[SourceLocation] = None
48
+ severity: Severity = Severity.HIGH
49
+ evidence: Optional[str] = None
50
+
51
+
52
+ @dataclasses.dataclass
53
+ class RuleResult:
54
+ """Result of evaluating a single rule."""
55
+ rule_id: str
56
+ status: RuleDecision
57
+ violations: list[Violation] = dataclasses.field(default_factory=list)
58
+
59
+
60
+ @dataclasses.dataclass
61
+ class KernelResult:
62
+ """Aggregated result from the kernel."""
63
+ decision: Decision
64
+ rule_results: list[RuleResult] = dataclasses.field(default_factory=list)
65
+ file_path: Optional[str] = None
66
+ language: Optional[str] = None
67
+ mode: str = "enforce"
68
+
69
+
70
+ class Rule:
71
+ """Abstract base for a deterministic rule.
72
+
73
+ Every rule must implement ``evaluate`` and return a ``RuleResult``.
74
+
75
+ Parameters passed to ``evaluate``:
76
+ source: The raw source text.
77
+ file_path: Optional path of the file being evaluated.
78
+ parse_result: Optional pre-parsed ``ParseResult`` from the engine.
79
+ When provided, rules should use it instead of parsing
80
+ the source again. Rules must still work correctly
81
+ when ``parse_result`` is ``None``.
82
+ """
83
+
84
+ def __init__(self, rule_id: str, name: str, description: str = ""):
85
+ self.rule_id = rule_id
86
+ self.name = name
87
+ self.description = description
88
+
89
+ def evaluate(
90
+ self,
91
+ source: str,
92
+ file_path: Optional[str] = None,
93
+ parse_result: Optional[ParseResult] = None,
94
+ ) -> RuleResult:
95
+ """Evaluate the rule against the given source text.
96
+
97
+ Subclasses must override this method.
98
+ """
99
+ raise NotImplementedError("Subclasses must implement evaluate()")
100
+
@@ -0,0 +1,128 @@
1
+ """Parser abstraction for the WatchLLM kernel.
2
+
3
+ Provides a minimal, deterministic interface to Tree-sitter for JavaScript
4
+ and TypeScript source files. The module exposes a single `parse_source`
5
+ function that returns a structured parse result containing the raw
6
+ Tree-sitter tree and a convenience traversal helper.
7
+
8
+ This module is intentionally narrow: it does not perform rule evaluation,
9
+ enforcement, or any decision-making. It exists solely to give rule
10
+ implementations a stable AST surface.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import dataclasses
16
+ from typing import Any, Optional
17
+
18
+ import tree_sitter_javascript
19
+ import tree_sitter_typescript
20
+ from tree_sitter import Language, Parser, Node, Tree
21
+
22
+
23
+ # ---------------------------------------------------------------------------
24
+ # Language registry
25
+ # ---------------------------------------------------------------------------
26
+
27
+ _LANGUAGE_MAP: dict[str, Language] = {
28
+ "javascript": Language(tree_sitter_javascript.language()),
29
+ "typescript": Language(tree_sitter_typescript.language_typescript()),
30
+ "tsx": Language(tree_sitter_typescript.language_tsx()),
31
+ }
32
+
33
+
34
+ def _resolve_language(language: str) -> Language:
35
+ """Return the Tree-sitter Language for *language*.
36
+
37
+ Raises `ValueError` when the language is not supported.
38
+ """
39
+ lang = _LANGUAGE_MAP.get(language)
40
+ if lang is None:
41
+ raise ValueError(
42
+ f"Unsupported language '{language}'. "
43
+ f"Supported: {', '.join(sorted(_LANGUAGE_MAP))}"
44
+ )
45
+ return lang
46
+
47
+
48
+ # ---------------------------------------------------------------------------
49
+ # Parse result
50
+ # ---------------------------------------------------------------------------
51
+
52
+
53
+ @dataclasses.dataclass
54
+ class ParseResult:
55
+ """Result of parsing a single source file.
56
+
57
+ Attributes:
58
+ tree: The raw Tree-sitter parse tree. Callers may traverse it
59
+ directly or use the helper methods on this class.
60
+ source: The original source text (bytes).
61
+ language: The language identifier that was used for parsing.
62
+ file_path: Optional path of the file that was parsed.
63
+ """
64
+
65
+ tree: Tree
66
+ source: bytes
67
+ language: str
68
+ file_path: Optional[str] = None
69
+
70
+ @property
71
+ def root_node(self) -> Node:
72
+ """Convenience accessor for the root syntax node."""
73
+ return self.tree.root_node
74
+
75
+ def walk(self):
76
+ """Return a Tree-sitter TreeCursor for depth-first traversal."""
77
+ return self.tree.walk()
78
+
79
+ def query(self, query_string: str) -> dict[str, list[Node]]:
80
+ """Execute a Tree-sitter query against the parse tree.
81
+
82
+ Returns a dictionary mapping capture names to lists of captured
83
+ nodes. Raises ``tree_sitter.QueryError`` if the query is malformed.
84
+ """
85
+ from tree_sitter import Query as _Query, QueryCursor as _QueryCursor
86
+ lang = _resolve_language(self.language)
87
+ q = _Query(lang, query_string)
88
+ return _QueryCursor(q).captures(self.root_node)
89
+
90
+
91
+ # ---------------------------------------------------------------------------
92
+ # Public API
93
+ # ---------------------------------------------------------------------------
94
+
95
+
96
+ def parse_source(
97
+ source: str,
98
+ language: str,
99
+ file_path: Optional[str] = None,
100
+ ) -> ParseResult:
101
+ """Parse *source* text using the Tree-sitter grammar for *language*.
102
+
103
+ Parameters:
104
+ source: The source code to parse (a Python string).
105
+ language: One of ``"javascript"``, ``"typescript"``, or ``"tsx"``.
106
+ file_path: Optional path used for reporting; not used for parsing.
107
+
108
+ Returns:
109
+ A `ParseResult` wrapping the raw Tree-sitter tree and the original
110
+ source bytes.
111
+
112
+ Raises:
113
+ ValueError: If *language* is not supported.
114
+ tree_sitter.LanguageError: If the grammar cannot be loaded (should
115
+ not happen with the bundled grammars).
116
+ """
117
+ lang = _resolve_language(language)
118
+ parser = Parser(lang)
119
+
120
+ source_bytes = source.encode("utf-8")
121
+ tree = parser.parse(source_bytes)
122
+
123
+ return ParseResult(
124
+ tree=tree,
125
+ source=source_bytes,
126
+ language=language,
127
+ file_path=file_path,
128
+ )