vallm 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vallm/__init__.py +16 -0
- vallm/__main__.py +5 -0
- vallm/config.py +59 -0
- vallm/core/__init__.py +1 -0
- vallm/core/ast_compare.py +135 -0
- vallm/core/graph_builder.py +117 -0
- vallm/core/graph_diff.py +86 -0
- vallm/core/proposal.py +37 -0
- vallm/hookspecs.py +33 -0
- vallm/scoring.py +191 -0
- vallm/validators/__init__.py +1 -0
- vallm/validators/base.py +21 -0
- vallm/validators/complexity.py +170 -0
- vallm/validators/imports.py +116 -0
- vallm/validators/security.py +158 -0
- vallm/validators/semantic.py +249 -0
- vallm/validators/syntax.py +96 -0
- vallm-0.1.1.dist-info/METADATA +61 -0
- vallm-0.1.1.dist-info/RECORD +22 -0
- vallm-0.1.1.dist-info/WHEEL +4 -0
- vallm-0.1.1.dist-info/entry_points.txt +9 -0
- vallm-0.1.1.dist-info/licenses/LICENSE +201 -0
vallm/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""vallm - A complete toolkit for validating LLM-generated code."""
|
|
2
|
+
|
|
3
|
+
from vallm.config import VallmSettings
|
|
4
|
+
from vallm.core.proposal import Proposal
|
|
5
|
+
from vallm.scoring import ValidationResult, Verdict, Issue, validate
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"validate",
|
|
9
|
+
"Proposal",
|
|
10
|
+
"ValidationResult",
|
|
11
|
+
"Verdict",
|
|
12
|
+
"Issue",
|
|
13
|
+
"VallmSettings",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
__version__ = "0.1.1"
|
vallm/__main__.py
ADDED
vallm/config.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Configuration management via pydantic-settings."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
from pydantic import Field
|
|
9
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class VallmSettings(BaseSettings):
|
|
13
|
+
"""vallm configuration with layered sources: defaults → TOML → env → CLI."""
|
|
14
|
+
|
|
15
|
+
model_config = SettingsConfigDict(
|
|
16
|
+
env_prefix="VALLM_",
|
|
17
|
+
toml_file="vallm.toml",
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# Scoring thresholds
|
|
21
|
+
pass_threshold: float = Field(default=0.8, ge=0.0, le=1.0)
|
|
22
|
+
review_threshold: float = Field(default=0.5, ge=0.0, le=1.0)
|
|
23
|
+
|
|
24
|
+
# Validator toggles
|
|
25
|
+
enable_syntax: bool = True
|
|
26
|
+
enable_imports: bool = True
|
|
27
|
+
enable_complexity: bool = True
|
|
28
|
+
enable_security: bool = False
|
|
29
|
+
enable_semantic: bool = False
|
|
30
|
+
|
|
31
|
+
# Complexity limits
|
|
32
|
+
max_cyclomatic_complexity: int = 15
|
|
33
|
+
max_cognitive_complexity: int = 20
|
|
34
|
+
max_function_length: int = 100
|
|
35
|
+
|
|
36
|
+
# LLM settings (for semantic validator)
|
|
37
|
+
llm_provider: str = "ollama"
|
|
38
|
+
llm_model: str = "qwen2.5-coder:7b"
|
|
39
|
+
llm_base_url: str = "http://localhost:11434"
|
|
40
|
+
llm_temperature: float = 0.1
|
|
41
|
+
|
|
42
|
+
# Sandbox settings
|
|
43
|
+
sandbox_backend: str = "subprocess"
|
|
44
|
+
sandbox_timeout: int = 30
|
|
45
|
+
sandbox_memory_limit: str = "256m"
|
|
46
|
+
|
|
47
|
+
# Output
|
|
48
|
+
output_format: str = "rich"
|
|
49
|
+
verbose: bool = False
|
|
50
|
+
|
|
51
|
+
# Language detection
|
|
52
|
+
default_language: str = "python"
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
def from_toml(cls, path: Optional[Path] = None) -> VallmSettings:
|
|
56
|
+
"""Load settings, optionally from a specific TOML file."""
|
|
57
|
+
if path and path.exists():
|
|
58
|
+
return cls(_toml_file=str(path))
|
|
59
|
+
return cls()
|
vallm/core/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Core modules for vallm."""
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""AST comparison utilities using tree-sitter and edit distance."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import ast
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
from tree_sitter_language_pack import get_parser
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def parse_code(code: str, language: str = "python"):
|
|
12
|
+
"""Parse code using tree-sitter and return the tree."""
|
|
13
|
+
parser = get_parser(language)
|
|
14
|
+
return parser.parse(code.encode("utf-8"))
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def parse_python_ast(code: str) -> Optional[ast.AST]:
|
|
18
|
+
"""Parse Python code using the built-in ast module. Returns None on failure."""
|
|
19
|
+
try:
|
|
20
|
+
return ast.parse(code)
|
|
21
|
+
except SyntaxError:
|
|
22
|
+
return None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def normalize_python_ast(tree: ast.AST) -> str:
|
|
26
|
+
"""Normalize a Python AST by replacing identifiers with canonical names.
|
|
27
|
+
|
|
28
|
+
This enables fingerprint-based comparison that ignores variable naming.
|
|
29
|
+
"""
|
|
30
|
+
class Normalizer(ast.NodeTransformer):
|
|
31
|
+
def __init__(self):
|
|
32
|
+
self._name_map: dict[str, str] = {}
|
|
33
|
+
self._counter = 0
|
|
34
|
+
|
|
35
|
+
def _canonical(self, name: str) -> str:
|
|
36
|
+
if name not in self._name_map:
|
|
37
|
+
self._name_map[name] = f"var_{self._counter}"
|
|
38
|
+
self._counter += 1
|
|
39
|
+
return self._name_map[name]
|
|
40
|
+
|
|
41
|
+
def visit_Name(self, node):
|
|
42
|
+
node.id = self._canonical(node.id)
|
|
43
|
+
return self.generic_visit(node)
|
|
44
|
+
|
|
45
|
+
def visit_FunctionDef(self, node):
|
|
46
|
+
node.name = self._canonical(node.name)
|
|
47
|
+
return self.generic_visit(node)
|
|
48
|
+
|
|
49
|
+
def visit_arg(self, node):
|
|
50
|
+
node.arg = self._canonical(node.arg)
|
|
51
|
+
return self.generic_visit(node)
|
|
52
|
+
|
|
53
|
+
normalized = Normalizer().visit(ast.parse(ast.unparse(tree)))
|
|
54
|
+
return ast.dump(normalized)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def python_ast_similarity(code1: str, code2: str) -> float:
|
|
58
|
+
"""Compute structural similarity between two Python code snippets.
|
|
59
|
+
|
|
60
|
+
Returns a float between 0.0 and 1.0.
|
|
61
|
+
"""
|
|
62
|
+
tree1 = parse_python_ast(code1)
|
|
63
|
+
tree2 = parse_python_ast(code2)
|
|
64
|
+
|
|
65
|
+
if tree1 is None or tree2 is None:
|
|
66
|
+
return 0.0
|
|
67
|
+
|
|
68
|
+
norm1 = normalize_python_ast(tree1)
|
|
69
|
+
norm2 = normalize_python_ast(tree2)
|
|
70
|
+
|
|
71
|
+
if norm1 == norm2:
|
|
72
|
+
return 1.0
|
|
73
|
+
|
|
74
|
+
# Use SequenceMatcher for similarity on AST dumps
|
|
75
|
+
from difflib import SequenceMatcher
|
|
76
|
+
|
|
77
|
+
return SequenceMatcher(None, norm1, norm2).ratio()
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def tree_sitter_node_count(code: str, language: str = "python") -> int:
|
|
81
|
+
"""Count the number of nodes in a tree-sitter parse tree."""
|
|
82
|
+
tree = parse_code(code, language)
|
|
83
|
+
count = 0
|
|
84
|
+
|
|
85
|
+
def _walk(node):
|
|
86
|
+
nonlocal count
|
|
87
|
+
count += 1
|
|
88
|
+
for child in node.children:
|
|
89
|
+
_walk(child)
|
|
90
|
+
|
|
91
|
+
_walk(tree.root_node)
|
|
92
|
+
return count
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def tree_sitter_error_count(code: str, language: str = "python") -> int:
|
|
96
|
+
"""Count syntax errors reported by tree-sitter."""
|
|
97
|
+
tree = parse_code(code, language)
|
|
98
|
+
errors = 0
|
|
99
|
+
|
|
100
|
+
def _walk(node):
|
|
101
|
+
nonlocal errors
|
|
102
|
+
if node.type == "ERROR" or node.is_missing:
|
|
103
|
+
errors += 1
|
|
104
|
+
for child in node.children:
|
|
105
|
+
_walk(child)
|
|
106
|
+
|
|
107
|
+
_walk(tree.root_node)
|
|
108
|
+
return errors
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def structural_diff_summary(code1: str, code2: str, language: str = "python") -> dict:
|
|
112
|
+
"""Return a summary of structural differences between two code snippets."""
|
|
113
|
+
tree1 = parse_code(code1, language)
|
|
114
|
+
tree2 = parse_code(code2, language)
|
|
115
|
+
|
|
116
|
+
def _collect_types(node):
|
|
117
|
+
types = []
|
|
118
|
+
def _walk(n):
|
|
119
|
+
types.append(n.type)
|
|
120
|
+
for child in n.children:
|
|
121
|
+
_walk(child)
|
|
122
|
+
_walk(node)
|
|
123
|
+
return types
|
|
124
|
+
|
|
125
|
+
types1 = _collect_types(tree1.root_node)
|
|
126
|
+
types2 = _collect_types(tree2.root_node)
|
|
127
|
+
|
|
128
|
+
set1, set2 = set(types1), set(types2)
|
|
129
|
+
return {
|
|
130
|
+
"nodes_before": len(types1),
|
|
131
|
+
"nodes_after": len(types2),
|
|
132
|
+
"added_types": set2 - set1,
|
|
133
|
+
"removed_types": set1 - set2,
|
|
134
|
+
"common_types": set1 & set2,
|
|
135
|
+
}
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Code graph analysis: import and call graph construction."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import ast
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class ImportEdge:
|
|
11
|
+
"""Represents an import dependency."""
|
|
12
|
+
|
|
13
|
+
source_module: str
|
|
14
|
+
imported_name: str
|
|
15
|
+
alias: str | None = None
|
|
16
|
+
line: int = 0
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class CallEdge:
|
|
21
|
+
"""Represents a function call relationship."""
|
|
22
|
+
|
|
23
|
+
caller: str
|
|
24
|
+
callee: str
|
|
25
|
+
line: int = 0
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class CodeGraph:
|
|
30
|
+
"""A graph of code relationships (imports and calls)."""
|
|
31
|
+
|
|
32
|
+
imports: list[ImportEdge] = field(default_factory=list)
|
|
33
|
+
calls: list[CallEdge] = field(default_factory=list)
|
|
34
|
+
functions: list[str] = field(default_factory=list)
|
|
35
|
+
classes: list[str] = field(default_factory=list)
|
|
36
|
+
|
|
37
|
+
def to_dict(self) -> dict:
|
|
38
|
+
"""Serialize to dict for comparison."""
|
|
39
|
+
return {
|
|
40
|
+
"imports": [(e.source_module, e.imported_name) for e in self.imports],
|
|
41
|
+
"calls": [(e.caller, e.callee) for e in self.calls],
|
|
42
|
+
"functions": sorted(self.functions),
|
|
43
|
+
"classes": sorted(self.classes),
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def build_python_graph(code: str, module_name: str = "<module>") -> CodeGraph:
|
|
48
|
+
"""Build an import/call graph from Python source code."""
|
|
49
|
+
try:
|
|
50
|
+
tree = ast.parse(code)
|
|
51
|
+
except SyntaxError:
|
|
52
|
+
return CodeGraph()
|
|
53
|
+
|
|
54
|
+
graph = CodeGraph()
|
|
55
|
+
current_scope = module_name
|
|
56
|
+
|
|
57
|
+
class GraphVisitor(ast.NodeVisitor):
|
|
58
|
+
def visit_Import(self, node):
|
|
59
|
+
for alias in node.names:
|
|
60
|
+
graph.imports.append(
|
|
61
|
+
ImportEdge(
|
|
62
|
+
source_module=alias.name,
|
|
63
|
+
imported_name=alias.name,
|
|
64
|
+
alias=alias.asname,
|
|
65
|
+
line=node.lineno,
|
|
66
|
+
)
|
|
67
|
+
)
|
|
68
|
+
self.generic_visit(node)
|
|
69
|
+
|
|
70
|
+
def visit_ImportFrom(self, node):
|
|
71
|
+
module = node.module or ""
|
|
72
|
+
for alias in node.names:
|
|
73
|
+
graph.imports.append(
|
|
74
|
+
ImportEdge(
|
|
75
|
+
source_module=module,
|
|
76
|
+
imported_name=alias.name,
|
|
77
|
+
alias=alias.asname,
|
|
78
|
+
line=node.lineno,
|
|
79
|
+
)
|
|
80
|
+
)
|
|
81
|
+
self.generic_visit(node)
|
|
82
|
+
|
|
83
|
+
def visit_FunctionDef(self, node):
|
|
84
|
+
nonlocal current_scope
|
|
85
|
+
graph.functions.append(node.name)
|
|
86
|
+
old_scope = current_scope
|
|
87
|
+
current_scope = node.name
|
|
88
|
+
self.generic_visit(node)
|
|
89
|
+
current_scope = old_scope
|
|
90
|
+
|
|
91
|
+
def visit_AsyncFunctionDef(self, node):
|
|
92
|
+
self.visit_FunctionDef(node)
|
|
93
|
+
|
|
94
|
+
def visit_ClassDef(self, node):
|
|
95
|
+
graph.classes.append(node.name)
|
|
96
|
+
self.generic_visit(node)
|
|
97
|
+
|
|
98
|
+
def visit_Call(self, node):
|
|
99
|
+
callee = _get_call_name(node.func)
|
|
100
|
+
if callee:
|
|
101
|
+
graph.calls.append(
|
|
102
|
+
CallEdge(caller=current_scope, callee=callee, line=node.lineno)
|
|
103
|
+
)
|
|
104
|
+
self.generic_visit(node)
|
|
105
|
+
|
|
106
|
+
def _get_call_name(node) -> str | None:
|
|
107
|
+
if isinstance(node, ast.Name):
|
|
108
|
+
return node.id
|
|
109
|
+
if isinstance(node, ast.Attribute):
|
|
110
|
+
prefix = _get_call_name(node.value)
|
|
111
|
+
if prefix:
|
|
112
|
+
return f"{prefix}.{node.attr}"
|
|
113
|
+
return node.attr
|
|
114
|
+
return None
|
|
115
|
+
|
|
116
|
+
GraphVisitor().visit(tree)
|
|
117
|
+
return graph
|
vallm/core/graph_diff.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Graph comparison for detecting structural regressions."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
|
|
7
|
+
from vallm.core.graph_builder import CodeGraph, build_python_graph
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class GraphDiffResult:
|
|
12
|
+
"""Result of comparing two code graphs."""
|
|
13
|
+
|
|
14
|
+
added_imports: list[tuple[str, str]] = field(default_factory=list)
|
|
15
|
+
removed_imports: list[tuple[str, str]] = field(default_factory=list)
|
|
16
|
+
added_functions: list[str] = field(default_factory=list)
|
|
17
|
+
removed_functions: list[str] = field(default_factory=list)
|
|
18
|
+
added_classes: list[str] = field(default_factory=list)
|
|
19
|
+
removed_classes: list[str] = field(default_factory=list)
|
|
20
|
+
added_calls: list[tuple[str, str]] = field(default_factory=list)
|
|
21
|
+
removed_calls: list[tuple[str, str]] = field(default_factory=list)
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def has_changes(self) -> bool:
|
|
25
|
+
return bool(
|
|
26
|
+
self.added_imports
|
|
27
|
+
or self.removed_imports
|
|
28
|
+
or self.added_functions
|
|
29
|
+
or self.removed_functions
|
|
30
|
+
or self.added_classes
|
|
31
|
+
or self.removed_classes
|
|
32
|
+
or self.added_calls
|
|
33
|
+
or self.removed_calls
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def breaking_changes(self) -> list[str]:
|
|
38
|
+
"""Identify potentially breaking changes."""
|
|
39
|
+
issues = []
|
|
40
|
+
for fn in self.removed_functions:
|
|
41
|
+
issues.append(f"Removed function: {fn}")
|
|
42
|
+
for cls in self.removed_classes:
|
|
43
|
+
issues.append(f"Removed class: {cls}")
|
|
44
|
+
for mod, name in self.removed_imports:
|
|
45
|
+
issues.append(f"Removed import: {name} from {mod}")
|
|
46
|
+
return issues
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def diff_graphs(before: CodeGraph, after: CodeGraph) -> GraphDiffResult:
|
|
50
|
+
"""Compare two CodeGraphs and return the diff."""
|
|
51
|
+
before_d = before.to_dict()
|
|
52
|
+
after_d = after.to_dict()
|
|
53
|
+
|
|
54
|
+
return GraphDiffResult(
|
|
55
|
+
added_imports=_diff_list(before_d["imports"], after_d["imports"], added=True),
|
|
56
|
+
removed_imports=_diff_list(before_d["imports"], after_d["imports"], added=False),
|
|
57
|
+
added_functions=_diff_list(before_d["functions"], after_d["functions"], added=True),
|
|
58
|
+
removed_functions=_diff_list(before_d["functions"], after_d["functions"], added=False),
|
|
59
|
+
added_classes=_diff_list(before_d["classes"], after_d["classes"], added=True),
|
|
60
|
+
removed_classes=_diff_list(before_d["classes"], after_d["classes"], added=False),
|
|
61
|
+
added_calls=_diff_list(
|
|
62
|
+
[(c.caller, c.callee) for c in before.calls],
|
|
63
|
+
[(c.caller, c.callee) for c in after.calls],
|
|
64
|
+
added=True,
|
|
65
|
+
),
|
|
66
|
+
removed_calls=_diff_list(
|
|
67
|
+
[(c.caller, c.callee) for c in before.calls],
|
|
68
|
+
[(c.caller, c.callee) for c in after.calls],
|
|
69
|
+
added=False,
|
|
70
|
+
),
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def diff_python_code(before_code: str, after_code: str) -> GraphDiffResult:
|
|
75
|
+
"""Convenience function: build graphs from code strings and diff them."""
|
|
76
|
+
before_graph = build_python_graph(before_code, "before")
|
|
77
|
+
after_graph = build_python_graph(after_code, "after")
|
|
78
|
+
return diff_graphs(before_graph, after_graph)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _diff_list(before, after, added: bool):
|
|
82
|
+
set_before = set(before) if not isinstance(before[0] if before else "", tuple) else set(before)
|
|
83
|
+
set_after = set(after) if not isinstance(after[0] if after else "", tuple) else set(after)
|
|
84
|
+
if added:
|
|
85
|
+
return sorted(set_after - set_before)
|
|
86
|
+
return sorted(set_before - set_after)
|
vallm/core/proposal.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Proposal model representing code to be validated."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class Proposal:
|
|
11
|
+
"""A code proposal to be validated.
|
|
12
|
+
|
|
13
|
+
Attributes:
|
|
14
|
+
code: The proposed source code string.
|
|
15
|
+
language: Programming language (e.g., 'python', 'javascript', 'c').
|
|
16
|
+
reference_code: Optional reference/existing code for comparison.
|
|
17
|
+
filename: Optional filename for context.
|
|
18
|
+
metadata: Additional metadata (e.g., prompt, model name).
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
code: str
|
|
22
|
+
language: str = "python"
|
|
23
|
+
reference_code: Optional[str] = None
|
|
24
|
+
filename: Optional[str] = None
|
|
25
|
+
metadata: dict = field(default_factory=dict)
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def code_bytes(self) -> bytes:
|
|
29
|
+
"""Return code as bytes for tree-sitter parsing."""
|
|
30
|
+
return self.code.encode("utf-8")
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def reference_bytes(self) -> Optional[bytes]:
|
|
34
|
+
"""Return reference code as bytes for tree-sitter parsing."""
|
|
35
|
+
if self.reference_code is None:
|
|
36
|
+
return None
|
|
37
|
+
return self.reference_code.encode("utf-8")
|
vallm/hookspecs.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""pluggy hook specifications for vallm validators."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
import pluggy
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from vallm.core.proposal import Proposal
|
|
11
|
+
from vallm.scoring import ValidationResult
|
|
12
|
+
|
|
13
|
+
hookspec = pluggy.HookspecMarker("vallm")
|
|
14
|
+
hookimpl = pluggy.HookimplMarker("vallm")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class VallmSpec:
|
|
18
|
+
"""Hook specifications that validators must implement."""
|
|
19
|
+
|
|
20
|
+
@hookspec
|
|
21
|
+
def validate_proposal(self, proposal: Proposal, context: dict) -> ValidationResult:
|
|
22
|
+
"""Validate a code proposal and return a ValidationResult."""
|
|
23
|
+
...
|
|
24
|
+
|
|
25
|
+
@hookspec
|
|
26
|
+
def get_validator_name(self) -> str:
|
|
27
|
+
"""Return the name of this validator."""
|
|
28
|
+
...
|
|
29
|
+
|
|
30
|
+
@hookspec
|
|
31
|
+
def get_validator_tier(self) -> int:
|
|
32
|
+
"""Return the tier (1-4) of this validator for pipeline ordering."""
|
|
33
|
+
...
|
vallm/scoring.py
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
"""Weighted scoring and verdict engine."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
from vallm.config import VallmSettings
|
|
10
|
+
from vallm.core.proposal import Proposal
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Verdict(Enum):
|
|
14
|
+
PASS = "pass"
|
|
15
|
+
REVIEW = "review"
|
|
16
|
+
FAIL = "fail"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Severity(Enum):
|
|
20
|
+
ERROR = "error"
|
|
21
|
+
WARNING = "warning"
|
|
22
|
+
INFO = "info"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class Issue:
|
|
27
|
+
"""A single issue found during validation."""
|
|
28
|
+
|
|
29
|
+
message: str
|
|
30
|
+
severity: Severity = Severity.WARNING
|
|
31
|
+
line: Optional[int] = None
|
|
32
|
+
column: Optional[int] = None
|
|
33
|
+
rule: Optional[str] = None
|
|
34
|
+
|
|
35
|
+
def __str__(self) -> str:
|
|
36
|
+
loc = ""
|
|
37
|
+
if self.line is not None:
|
|
38
|
+
loc = f":{self.line}"
|
|
39
|
+
if self.column is not None:
|
|
40
|
+
loc += f":{self.column}"
|
|
41
|
+
return f"[{self.severity.value}]{loc} {self.message}"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class ValidationResult:
|
|
46
|
+
"""Result from a single validator."""
|
|
47
|
+
|
|
48
|
+
validator: str
|
|
49
|
+
score: float # 0.0–1.0
|
|
50
|
+
weight: float = 1.0 # configurable importance
|
|
51
|
+
confidence: float = 1.0 # validator's self-assessed confidence
|
|
52
|
+
issues: list[Issue] = field(default_factory=list)
|
|
53
|
+
details: dict = field(default_factory=dict)
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def weighted_score(self) -> float:
|
|
57
|
+
return self.score * self.weight * self.confidence
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def has_errors(self) -> bool:
|
|
61
|
+
return any(i.severity == Severity.ERROR for i in self.issues)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass
|
|
65
|
+
class PipelineResult:
|
|
66
|
+
"""Aggregated result from all validators."""
|
|
67
|
+
|
|
68
|
+
results: list[ValidationResult] = field(default_factory=list)
|
|
69
|
+
verdict: Verdict = Verdict.FAIL
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def weighted_score(self) -> float:
|
|
73
|
+
if not self.results:
|
|
74
|
+
return 0.0
|
|
75
|
+
total_weight = sum(r.weight * r.confidence for r in self.results)
|
|
76
|
+
if total_weight == 0:
|
|
77
|
+
return 0.0
|
|
78
|
+
return sum(r.weighted_score for r in self.results) / total_weight
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def all_issues(self) -> list[Issue]:
|
|
82
|
+
issues = []
|
|
83
|
+
for r in self.results:
|
|
84
|
+
issues.extend(r.issues)
|
|
85
|
+
return issues
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def error_count(self) -> int:
|
|
89
|
+
return sum(1 for i in self.all_issues if i.severity == Severity.ERROR)
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def warning_count(self) -> int:
|
|
93
|
+
return sum(1 for i in self.all_issues if i.severity == Severity.WARNING)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def compute_verdict(
|
|
97
|
+
results: list[ValidationResult],
|
|
98
|
+
settings: Optional[VallmSettings] = None,
|
|
99
|
+
) -> PipelineResult:
|
|
100
|
+
"""Compute the aggregate verdict from a list of validation results."""
|
|
101
|
+
if settings is None:
|
|
102
|
+
settings = VallmSettings()
|
|
103
|
+
|
|
104
|
+
pipeline = PipelineResult(results=results)
|
|
105
|
+
|
|
106
|
+
# Hard gate: any error-severity issue → FAIL
|
|
107
|
+
if any(r.has_errors for r in results):
|
|
108
|
+
pipeline.verdict = Verdict.FAIL
|
|
109
|
+
return pipeline
|
|
110
|
+
|
|
111
|
+
score = pipeline.weighted_score
|
|
112
|
+
if score >= settings.pass_threshold:
|
|
113
|
+
pipeline.verdict = Verdict.PASS
|
|
114
|
+
elif score >= settings.review_threshold:
|
|
115
|
+
pipeline.verdict = Verdict.REVIEW
|
|
116
|
+
else:
|
|
117
|
+
pipeline.verdict = Verdict.FAIL
|
|
118
|
+
|
|
119
|
+
return pipeline
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def validate(
|
|
123
|
+
proposal: Proposal,
|
|
124
|
+
settings: Optional[VallmSettings] = None,
|
|
125
|
+
validators: Optional[list] = None,
|
|
126
|
+
context: Optional[dict] = None,
|
|
127
|
+
) -> PipelineResult:
|
|
128
|
+
"""Run the full validation pipeline on a proposal.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
proposal: The code proposal to validate.
|
|
132
|
+
settings: Optional settings override.
|
|
133
|
+
validators: Optional list of validator instances. If None, uses defaults.
|
|
134
|
+
context: Optional additional context dict passed to validators.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
PipelineResult with verdict and all validation results.
|
|
138
|
+
"""
|
|
139
|
+
if settings is None:
|
|
140
|
+
settings = VallmSettings()
|
|
141
|
+
if context is None:
|
|
142
|
+
context = {}
|
|
143
|
+
|
|
144
|
+
if validators is None:
|
|
145
|
+
validators = _get_default_validators(settings)
|
|
146
|
+
|
|
147
|
+
# Sort validators by tier for fail-fast behavior
|
|
148
|
+
validators.sort(key=lambda v: v.tier)
|
|
149
|
+
|
|
150
|
+
results = []
|
|
151
|
+
for validator in validators:
|
|
152
|
+
result = validator.validate(proposal, context)
|
|
153
|
+
results.append(result)
|
|
154
|
+
|
|
155
|
+
# Fail fast on errors in tier 1
|
|
156
|
+
if result.has_errors and validator.tier == 1:
|
|
157
|
+
return compute_verdict(results, settings)
|
|
158
|
+
|
|
159
|
+
return compute_verdict(results, settings)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _get_default_validators(settings: VallmSettings) -> list:
|
|
163
|
+
"""Build the default validator list based on settings."""
|
|
164
|
+
validators = []
|
|
165
|
+
|
|
166
|
+
if settings.enable_syntax:
|
|
167
|
+
from vallm.validators.syntax import SyntaxValidator
|
|
168
|
+
|
|
169
|
+
validators.append(SyntaxValidator())
|
|
170
|
+
|
|
171
|
+
if settings.enable_imports:
|
|
172
|
+
from vallm.validators.imports import ImportValidator
|
|
173
|
+
|
|
174
|
+
validators.append(ImportValidator())
|
|
175
|
+
|
|
176
|
+
if settings.enable_complexity:
|
|
177
|
+
from vallm.validators.complexity import ComplexityValidator
|
|
178
|
+
|
|
179
|
+
validators.append(ComplexityValidator(settings))
|
|
180
|
+
|
|
181
|
+
if settings.enable_security:
|
|
182
|
+
from vallm.validators.security import SecurityValidator
|
|
183
|
+
|
|
184
|
+
validators.append(SecurityValidator())
|
|
185
|
+
|
|
186
|
+
if settings.enable_semantic:
|
|
187
|
+
from vallm.validators.semantic import SemanticValidator
|
|
188
|
+
|
|
189
|
+
validators.append(SemanticValidator(settings))
|
|
190
|
+
|
|
191
|
+
return validators
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Built-in validators for vallm."""
|