vallm 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vallm/__init__.py ADDED
@@ -0,0 +1,16 @@
1
+ """vallm - A complete toolkit for validating LLM-generated code."""
2
+
3
+ from vallm.config import VallmSettings
4
+ from vallm.core.proposal import Proposal
5
+ from vallm.scoring import ValidationResult, Verdict, Issue, validate
6
+
7
+ __all__ = [
8
+ "validate",
9
+ "Proposal",
10
+ "ValidationResult",
11
+ "Verdict",
12
+ "Issue",
13
+ "VallmSettings",
14
+ ]
15
+
16
+ __version__ = "0.1.1"
vallm/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Allow running vallm as: python -m vallm"""
2
+
3
+ from vallm.cli import app
4
+
5
+ app()
vallm/config.py ADDED
@@ -0,0 +1,59 @@
1
+ """Configuration management via pydantic-settings."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Optional
7
+
8
+ from pydantic import Field
9
+ from pydantic_settings import BaseSettings, SettingsConfigDict
10
+
11
+
12
+ class VallmSettings(BaseSettings):
13
+ """vallm configuration with layered sources: defaults → TOML → env → CLI."""
14
+
15
+ model_config = SettingsConfigDict(
16
+ env_prefix="VALLM_",
17
+ toml_file="vallm.toml",
18
+ )
19
+
20
+ # Scoring thresholds
21
+ pass_threshold: float = Field(default=0.8, ge=0.0, le=1.0)
22
+ review_threshold: float = Field(default=0.5, ge=0.0, le=1.0)
23
+
24
+ # Validator toggles
25
+ enable_syntax: bool = True
26
+ enable_imports: bool = True
27
+ enable_complexity: bool = True
28
+ enable_security: bool = False
29
+ enable_semantic: bool = False
30
+
31
+ # Complexity limits
32
+ max_cyclomatic_complexity: int = 15
33
+ max_cognitive_complexity: int = 20
34
+ max_function_length: int = 100
35
+
36
+ # LLM settings (for semantic validator)
37
+ llm_provider: str = "ollama"
38
+ llm_model: str = "qwen2.5-coder:7b"
39
+ llm_base_url: str = "http://localhost:11434"
40
+ llm_temperature: float = 0.1
41
+
42
+ # Sandbox settings
43
+ sandbox_backend: str = "subprocess"
44
+ sandbox_timeout: int = 30
45
+ sandbox_memory_limit: str = "256m"
46
+
47
+ # Output
48
+ output_format: str = "rich"
49
+ verbose: bool = False
50
+
51
+ # Language detection
52
+ default_language: str = "python"
53
+
54
+ @classmethod
55
+ def from_toml(cls, path: Optional[Path] = None) -> VallmSettings:
56
+ """Load settings, optionally from a specific TOML file."""
57
+ if path and path.exists():
58
+ return cls(_toml_file=str(path))
59
+ return cls()
vallm/core/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """Core modules for vallm."""
@@ -0,0 +1,135 @@
1
+ """AST comparison utilities using tree-sitter and edit distance."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import ast
6
+ from typing import Optional
7
+
8
+ from tree_sitter_language_pack import get_parser
9
+
10
+
11
+ def parse_code(code: str, language: str = "python"):
12
+ """Parse code using tree-sitter and return the tree."""
13
+ parser = get_parser(language)
14
+ return parser.parse(code.encode("utf-8"))
15
+
16
+
17
+ def parse_python_ast(code: str) -> Optional[ast.AST]:
18
+ """Parse Python code using the built-in ast module. Returns None on failure."""
19
+ try:
20
+ return ast.parse(code)
21
+ except SyntaxError:
22
+ return None
23
+
24
+
25
+ def normalize_python_ast(tree: ast.AST) -> str:
26
+ """Normalize a Python AST by replacing identifiers with canonical names.
27
+
28
+ This enables fingerprint-based comparison that ignores variable naming.
29
+ """
30
+ class Normalizer(ast.NodeTransformer):
31
+ def __init__(self):
32
+ self._name_map: dict[str, str] = {}
33
+ self._counter = 0
34
+
35
+ def _canonical(self, name: str) -> str:
36
+ if name not in self._name_map:
37
+ self._name_map[name] = f"var_{self._counter}"
38
+ self._counter += 1
39
+ return self._name_map[name]
40
+
41
+ def visit_Name(self, node):
42
+ node.id = self._canonical(node.id)
43
+ return self.generic_visit(node)
44
+
45
+ def visit_FunctionDef(self, node):
46
+ node.name = self._canonical(node.name)
47
+ return self.generic_visit(node)
48
+
49
+ def visit_arg(self, node):
50
+ node.arg = self._canonical(node.arg)
51
+ return self.generic_visit(node)
52
+
53
+ normalized = Normalizer().visit(ast.parse(ast.unparse(tree)))
54
+ return ast.dump(normalized)
55
+
56
+
57
+ def python_ast_similarity(code1: str, code2: str) -> float:
58
+ """Compute structural similarity between two Python code snippets.
59
+
60
+ Returns a float between 0.0 and 1.0.
61
+ """
62
+ tree1 = parse_python_ast(code1)
63
+ tree2 = parse_python_ast(code2)
64
+
65
+ if tree1 is None or tree2 is None:
66
+ return 0.0
67
+
68
+ norm1 = normalize_python_ast(tree1)
69
+ norm2 = normalize_python_ast(tree2)
70
+
71
+ if norm1 == norm2:
72
+ return 1.0
73
+
74
+ # Use SequenceMatcher for similarity on AST dumps
75
+ from difflib import SequenceMatcher
76
+
77
+ return SequenceMatcher(None, norm1, norm2).ratio()
78
+
79
+
80
+ def tree_sitter_node_count(code: str, language: str = "python") -> int:
81
+ """Count the number of nodes in a tree-sitter parse tree."""
82
+ tree = parse_code(code, language)
83
+ count = 0
84
+
85
+ def _walk(node):
86
+ nonlocal count
87
+ count += 1
88
+ for child in node.children:
89
+ _walk(child)
90
+
91
+ _walk(tree.root_node)
92
+ return count
93
+
94
+
95
+ def tree_sitter_error_count(code: str, language: str = "python") -> int:
96
+ """Count syntax errors reported by tree-sitter."""
97
+ tree = parse_code(code, language)
98
+ errors = 0
99
+
100
+ def _walk(node):
101
+ nonlocal errors
102
+ if node.type == "ERROR" or node.is_missing:
103
+ errors += 1
104
+ for child in node.children:
105
+ _walk(child)
106
+
107
+ _walk(tree.root_node)
108
+ return errors
109
+
110
+
111
+ def structural_diff_summary(code1: str, code2: str, language: str = "python") -> dict:
112
+ """Return a summary of structural differences between two code snippets."""
113
+ tree1 = parse_code(code1, language)
114
+ tree2 = parse_code(code2, language)
115
+
116
+ def _collect_types(node):
117
+ types = []
118
+ def _walk(n):
119
+ types.append(n.type)
120
+ for child in n.children:
121
+ _walk(child)
122
+ _walk(node)
123
+ return types
124
+
125
+ types1 = _collect_types(tree1.root_node)
126
+ types2 = _collect_types(tree2.root_node)
127
+
128
+ set1, set2 = set(types1), set(types2)
129
+ return {
130
+ "nodes_before": len(types1),
131
+ "nodes_after": len(types2),
132
+ "added_types": set2 - set1,
133
+ "removed_types": set1 - set2,
134
+ "common_types": set1 & set2,
135
+ }
@@ -0,0 +1,117 @@
1
+ """Code graph analysis: import and call graph construction."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import ast
6
+ from dataclasses import dataclass, field
7
+
8
+
9
+ @dataclass
10
+ class ImportEdge:
11
+ """Represents an import dependency."""
12
+
13
+ source_module: str
14
+ imported_name: str
15
+ alias: str | None = None
16
+ line: int = 0
17
+
18
+
19
+ @dataclass
20
+ class CallEdge:
21
+ """Represents a function call relationship."""
22
+
23
+ caller: str
24
+ callee: str
25
+ line: int = 0
26
+
27
+
28
+ @dataclass
29
+ class CodeGraph:
30
+ """A graph of code relationships (imports and calls)."""
31
+
32
+ imports: list[ImportEdge] = field(default_factory=list)
33
+ calls: list[CallEdge] = field(default_factory=list)
34
+ functions: list[str] = field(default_factory=list)
35
+ classes: list[str] = field(default_factory=list)
36
+
37
+ def to_dict(self) -> dict:
38
+ """Serialize to dict for comparison."""
39
+ return {
40
+ "imports": [(e.source_module, e.imported_name) for e in self.imports],
41
+ "calls": [(e.caller, e.callee) for e in self.calls],
42
+ "functions": sorted(self.functions),
43
+ "classes": sorted(self.classes),
44
+ }
45
+
46
+
47
+ def build_python_graph(code: str, module_name: str = "<module>") -> CodeGraph:
48
+ """Build an import/call graph from Python source code."""
49
+ try:
50
+ tree = ast.parse(code)
51
+ except SyntaxError:
52
+ return CodeGraph()
53
+
54
+ graph = CodeGraph()
55
+ current_scope = module_name
56
+
57
+ class GraphVisitor(ast.NodeVisitor):
58
+ def visit_Import(self, node):
59
+ for alias in node.names:
60
+ graph.imports.append(
61
+ ImportEdge(
62
+ source_module=alias.name,
63
+ imported_name=alias.name,
64
+ alias=alias.asname,
65
+ line=node.lineno,
66
+ )
67
+ )
68
+ self.generic_visit(node)
69
+
70
+ def visit_ImportFrom(self, node):
71
+ module = node.module or ""
72
+ for alias in node.names:
73
+ graph.imports.append(
74
+ ImportEdge(
75
+ source_module=module,
76
+ imported_name=alias.name,
77
+ alias=alias.asname,
78
+ line=node.lineno,
79
+ )
80
+ )
81
+ self.generic_visit(node)
82
+
83
+ def visit_FunctionDef(self, node):
84
+ nonlocal current_scope
85
+ graph.functions.append(node.name)
86
+ old_scope = current_scope
87
+ current_scope = node.name
88
+ self.generic_visit(node)
89
+ current_scope = old_scope
90
+
91
+ def visit_AsyncFunctionDef(self, node):
92
+ self.visit_FunctionDef(node)
93
+
94
+ def visit_ClassDef(self, node):
95
+ graph.classes.append(node.name)
96
+ self.generic_visit(node)
97
+
98
+ def visit_Call(self, node):
99
+ callee = _get_call_name(node.func)
100
+ if callee:
101
+ graph.calls.append(
102
+ CallEdge(caller=current_scope, callee=callee, line=node.lineno)
103
+ )
104
+ self.generic_visit(node)
105
+
106
+ def _get_call_name(node) -> str | None:
107
+ if isinstance(node, ast.Name):
108
+ return node.id
109
+ if isinstance(node, ast.Attribute):
110
+ prefix = _get_call_name(node.value)
111
+ if prefix:
112
+ return f"{prefix}.{node.attr}"
113
+ return node.attr
114
+ return None
115
+
116
+ GraphVisitor().visit(tree)
117
+ return graph
@@ -0,0 +1,86 @@
1
+ """Graph comparison for detecting structural regressions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+
7
+ from vallm.core.graph_builder import CodeGraph, build_python_graph
8
+
9
+
10
+ @dataclass
11
+ class GraphDiffResult:
12
+ """Result of comparing two code graphs."""
13
+
14
+ added_imports: list[tuple[str, str]] = field(default_factory=list)
15
+ removed_imports: list[tuple[str, str]] = field(default_factory=list)
16
+ added_functions: list[str] = field(default_factory=list)
17
+ removed_functions: list[str] = field(default_factory=list)
18
+ added_classes: list[str] = field(default_factory=list)
19
+ removed_classes: list[str] = field(default_factory=list)
20
+ added_calls: list[tuple[str, str]] = field(default_factory=list)
21
+ removed_calls: list[tuple[str, str]] = field(default_factory=list)
22
+
23
+ @property
24
+ def has_changes(self) -> bool:
25
+ return bool(
26
+ self.added_imports
27
+ or self.removed_imports
28
+ or self.added_functions
29
+ or self.removed_functions
30
+ or self.added_classes
31
+ or self.removed_classes
32
+ or self.added_calls
33
+ or self.removed_calls
34
+ )
35
+
36
+ @property
37
+ def breaking_changes(self) -> list[str]:
38
+ """Identify potentially breaking changes."""
39
+ issues = []
40
+ for fn in self.removed_functions:
41
+ issues.append(f"Removed function: {fn}")
42
+ for cls in self.removed_classes:
43
+ issues.append(f"Removed class: {cls}")
44
+ for mod, name in self.removed_imports:
45
+ issues.append(f"Removed import: {name} from {mod}")
46
+ return issues
47
+
48
+
49
+ def diff_graphs(before: CodeGraph, after: CodeGraph) -> GraphDiffResult:
50
+ """Compare two CodeGraphs and return the diff."""
51
+ before_d = before.to_dict()
52
+ after_d = after.to_dict()
53
+
54
+ return GraphDiffResult(
55
+ added_imports=_diff_list(before_d["imports"], after_d["imports"], added=True),
56
+ removed_imports=_diff_list(before_d["imports"], after_d["imports"], added=False),
57
+ added_functions=_diff_list(before_d["functions"], after_d["functions"], added=True),
58
+ removed_functions=_diff_list(before_d["functions"], after_d["functions"], added=False),
59
+ added_classes=_diff_list(before_d["classes"], after_d["classes"], added=True),
60
+ removed_classes=_diff_list(before_d["classes"], after_d["classes"], added=False),
61
+ added_calls=_diff_list(
62
+ [(c.caller, c.callee) for c in before.calls],
63
+ [(c.caller, c.callee) for c in after.calls],
64
+ added=True,
65
+ ),
66
+ removed_calls=_diff_list(
67
+ [(c.caller, c.callee) for c in before.calls],
68
+ [(c.caller, c.callee) for c in after.calls],
69
+ added=False,
70
+ ),
71
+ )
72
+
73
+
74
+ def diff_python_code(before_code: str, after_code: str) -> GraphDiffResult:
75
+ """Convenience function: build graphs from code strings and diff them."""
76
+ before_graph = build_python_graph(before_code, "before")
77
+ after_graph = build_python_graph(after_code, "after")
78
+ return diff_graphs(before_graph, after_graph)
79
+
80
+
81
+ def _diff_list(before, after, added: bool):
82
+ set_before = set(before) if not isinstance(before[0] if before else "", tuple) else set(before)
83
+ set_after = set(after) if not isinstance(after[0] if after else "", tuple) else set(after)
84
+ if added:
85
+ return sorted(set_after - set_before)
86
+ return sorted(set_before - set_after)
vallm/core/proposal.py ADDED
@@ -0,0 +1,37 @@
1
+ """Proposal model representing code to be validated."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Optional
7
+
8
+
9
+ @dataclass
10
+ class Proposal:
11
+ """A code proposal to be validated.
12
+
13
+ Attributes:
14
+ code: The proposed source code string.
15
+ language: Programming language (e.g., 'python', 'javascript', 'c').
16
+ reference_code: Optional reference/existing code for comparison.
17
+ filename: Optional filename for context.
18
+ metadata: Additional metadata (e.g., prompt, model name).
19
+ """
20
+
21
+ code: str
22
+ language: str = "python"
23
+ reference_code: Optional[str] = None
24
+ filename: Optional[str] = None
25
+ metadata: dict = field(default_factory=dict)
26
+
27
+ @property
28
+ def code_bytes(self) -> bytes:
29
+ """Return code as bytes for tree-sitter parsing."""
30
+ return self.code.encode("utf-8")
31
+
32
+ @property
33
+ def reference_bytes(self) -> Optional[bytes]:
34
+ """Return reference code as bytes for tree-sitter parsing."""
35
+ if self.reference_code is None:
36
+ return None
37
+ return self.reference_code.encode("utf-8")
vallm/hookspecs.py ADDED
@@ -0,0 +1,33 @@
1
+ """pluggy hook specifications for vallm validators."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ import pluggy
8
+
9
+ if TYPE_CHECKING:
10
+ from vallm.core.proposal import Proposal
11
+ from vallm.scoring import ValidationResult
12
+
13
+ hookspec = pluggy.HookspecMarker("vallm")
14
+ hookimpl = pluggy.HookimplMarker("vallm")
15
+
16
+
17
+ class VallmSpec:
18
+ """Hook specifications that validators must implement."""
19
+
20
+ @hookspec
21
+ def validate_proposal(self, proposal: Proposal, context: dict) -> ValidationResult:
22
+ """Validate a code proposal and return a ValidationResult."""
23
+ ...
24
+
25
+ @hookspec
26
+ def get_validator_name(self) -> str:
27
+ """Return the name of this validator."""
28
+ ...
29
+
30
+ @hookspec
31
+ def get_validator_tier(self) -> int:
32
+ """Return the tier (1-4) of this validator for pipeline ordering."""
33
+ ...
vallm/scoring.py ADDED
@@ -0,0 +1,191 @@
1
+ """Weighted scoring and verdict engine."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from enum import Enum
7
+ from typing import Optional
8
+
9
+ from vallm.config import VallmSettings
10
+ from vallm.core.proposal import Proposal
11
+
12
+
13
+ class Verdict(Enum):
14
+ PASS = "pass"
15
+ REVIEW = "review"
16
+ FAIL = "fail"
17
+
18
+
19
+ class Severity(Enum):
20
+ ERROR = "error"
21
+ WARNING = "warning"
22
+ INFO = "info"
23
+
24
+
25
+ @dataclass
26
+ class Issue:
27
+ """A single issue found during validation."""
28
+
29
+ message: str
30
+ severity: Severity = Severity.WARNING
31
+ line: Optional[int] = None
32
+ column: Optional[int] = None
33
+ rule: Optional[str] = None
34
+
35
+ def __str__(self) -> str:
36
+ loc = ""
37
+ if self.line is not None:
38
+ loc = f":{self.line}"
39
+ if self.column is not None:
40
+ loc += f":{self.column}"
41
+ return f"[{self.severity.value}]{loc} {self.message}"
42
+
43
+
44
+ @dataclass
45
+ class ValidationResult:
46
+ """Result from a single validator."""
47
+
48
+ validator: str
49
+ score: float # 0.0–1.0
50
+ weight: float = 1.0 # configurable importance
51
+ confidence: float = 1.0 # validator's self-assessed confidence
52
+ issues: list[Issue] = field(default_factory=list)
53
+ details: dict = field(default_factory=dict)
54
+
55
+ @property
56
+ def weighted_score(self) -> float:
57
+ return self.score * self.weight * self.confidence
58
+
59
+ @property
60
+ def has_errors(self) -> bool:
61
+ return any(i.severity == Severity.ERROR for i in self.issues)
62
+
63
+
64
+ @dataclass
65
+ class PipelineResult:
66
+ """Aggregated result from all validators."""
67
+
68
+ results: list[ValidationResult] = field(default_factory=list)
69
+ verdict: Verdict = Verdict.FAIL
70
+
71
+ @property
72
+ def weighted_score(self) -> float:
73
+ if not self.results:
74
+ return 0.0
75
+ total_weight = sum(r.weight * r.confidence for r in self.results)
76
+ if total_weight == 0:
77
+ return 0.0
78
+ return sum(r.weighted_score for r in self.results) / total_weight
79
+
80
+ @property
81
+ def all_issues(self) -> list[Issue]:
82
+ issues = []
83
+ for r in self.results:
84
+ issues.extend(r.issues)
85
+ return issues
86
+
87
+ @property
88
+ def error_count(self) -> int:
89
+ return sum(1 for i in self.all_issues if i.severity == Severity.ERROR)
90
+
91
+ @property
92
+ def warning_count(self) -> int:
93
+ return sum(1 for i in self.all_issues if i.severity == Severity.WARNING)
94
+
95
+
96
+ def compute_verdict(
97
+ results: list[ValidationResult],
98
+ settings: Optional[VallmSettings] = None,
99
+ ) -> PipelineResult:
100
+ """Compute the aggregate verdict from a list of validation results."""
101
+ if settings is None:
102
+ settings = VallmSettings()
103
+
104
+ pipeline = PipelineResult(results=results)
105
+
106
+ # Hard gate: any error-severity issue → FAIL
107
+ if any(r.has_errors for r in results):
108
+ pipeline.verdict = Verdict.FAIL
109
+ return pipeline
110
+
111
+ score = pipeline.weighted_score
112
+ if score >= settings.pass_threshold:
113
+ pipeline.verdict = Verdict.PASS
114
+ elif score >= settings.review_threshold:
115
+ pipeline.verdict = Verdict.REVIEW
116
+ else:
117
+ pipeline.verdict = Verdict.FAIL
118
+
119
+ return pipeline
120
+
121
+
122
+ def validate(
123
+ proposal: Proposal,
124
+ settings: Optional[VallmSettings] = None,
125
+ validators: Optional[list] = None,
126
+ context: Optional[dict] = None,
127
+ ) -> PipelineResult:
128
+ """Run the full validation pipeline on a proposal.
129
+
130
+ Args:
131
+ proposal: The code proposal to validate.
132
+ settings: Optional settings override.
133
+ validators: Optional list of validator instances. If None, uses defaults.
134
+ context: Optional additional context dict passed to validators.
135
+
136
+ Returns:
137
+ PipelineResult with verdict and all validation results.
138
+ """
139
+ if settings is None:
140
+ settings = VallmSettings()
141
+ if context is None:
142
+ context = {}
143
+
144
+ if validators is None:
145
+ validators = _get_default_validators(settings)
146
+
147
+ # Sort validators by tier for fail-fast behavior
148
+ validators.sort(key=lambda v: v.tier)
149
+
150
+ results = []
151
+ for validator in validators:
152
+ result = validator.validate(proposal, context)
153
+ results.append(result)
154
+
155
+ # Fail fast on errors in tier 1
156
+ if result.has_errors and validator.tier == 1:
157
+ return compute_verdict(results, settings)
158
+
159
+ return compute_verdict(results, settings)
160
+
161
+
162
+ def _get_default_validators(settings: VallmSettings) -> list:
163
+ """Build the default validator list based on settings."""
164
+ validators = []
165
+
166
+ if settings.enable_syntax:
167
+ from vallm.validators.syntax import SyntaxValidator
168
+
169
+ validators.append(SyntaxValidator())
170
+
171
+ if settings.enable_imports:
172
+ from vallm.validators.imports import ImportValidator
173
+
174
+ validators.append(ImportValidator())
175
+
176
+ if settings.enable_complexity:
177
+ from vallm.validators.complexity import ComplexityValidator
178
+
179
+ validators.append(ComplexityValidator(settings))
180
+
181
+ if settings.enable_security:
182
+ from vallm.validators.security import SecurityValidator
183
+
184
+ validators.append(SecurityValidator())
185
+
186
+ if settings.enable_semantic:
187
+ from vallm.validators.semantic import SemanticValidator
188
+
189
+ validators.append(SemanticValidator(settings))
190
+
191
+ return validators
@@ -0,0 +1 @@
1
+ """Built-in validators for vallm."""