specfact-cli 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- specfact_cli/__init__.py +14 -0
- specfact_cli/agents/__init__.py +24 -0
- specfact_cli/agents/analyze_agent.py +392 -0
- specfact_cli/agents/base.py +95 -0
- specfact_cli/agents/plan_agent.py +202 -0
- specfact_cli/agents/registry.py +176 -0
- specfact_cli/agents/sync_agent.py +133 -0
- specfact_cli/analyzers/__init__.py +11 -0
- specfact_cli/analyzers/code_analyzer.py +796 -0
- specfact_cli/cli.py +396 -0
- specfact_cli/commands/__init__.py +7 -0
- specfact_cli/commands/enforce.py +88 -0
- specfact_cli/commands/import_cmd.py +365 -0
- specfact_cli/commands/init.py +125 -0
- specfact_cli/commands/plan.py +1089 -0
- specfact_cli/commands/repro.py +192 -0
- specfact_cli/commands/sync.py +408 -0
- specfact_cli/common/__init__.py +25 -0
- specfact_cli/common/logger_setup.py +654 -0
- specfact_cli/common/logging_utils.py +41 -0
- specfact_cli/common/text_utils.py +52 -0
- specfact_cli/common/utils.py +48 -0
- specfact_cli/comparators/__init__.py +11 -0
- specfact_cli/comparators/plan_comparator.py +391 -0
- specfact_cli/generators/__init__.py +14 -0
- specfact_cli/generators/plan_generator.py +105 -0
- specfact_cli/generators/protocol_generator.py +115 -0
- specfact_cli/generators/report_generator.py +200 -0
- specfact_cli/generators/workflow_generator.py +120 -0
- specfact_cli/importers/__init__.py +7 -0
- specfact_cli/importers/speckit_converter.py +773 -0
- specfact_cli/importers/speckit_scanner.py +711 -0
- specfact_cli/models/__init__.py +33 -0
- specfact_cli/models/deviation.py +105 -0
- specfact_cli/models/enforcement.py +150 -0
- specfact_cli/models/plan.py +97 -0
- specfact_cli/models/protocol.py +28 -0
- specfact_cli/modes/__init__.py +19 -0
- specfact_cli/modes/detector.py +126 -0
- specfact_cli/modes/router.py +153 -0
- specfact_cli/resources/semgrep/async.yml +285 -0
- specfact_cli/sync/__init__.py +12 -0
- specfact_cli/sync/repository_sync.py +279 -0
- specfact_cli/sync/speckit_sync.py +388 -0
- specfact_cli/utils/__init__.py +58 -0
- specfact_cli/utils/console.py +70 -0
- specfact_cli/utils/feature_keys.py +212 -0
- specfact_cli/utils/git.py +241 -0
- specfact_cli/utils/github_annotations.py +399 -0
- specfact_cli/utils/ide_setup.py +382 -0
- specfact_cli/utils/prompts.py +180 -0
- specfact_cli/utils/structure.py +497 -0
- specfact_cli/utils/yaml_utils.py +200 -0
- specfact_cli/validators/__init__.py +20 -0
- specfact_cli/validators/fsm.py +262 -0
- specfact_cli/validators/repro_checker.py +759 -0
- specfact_cli/validators/schema.py +196 -0
- specfact_cli-0.4.2.dist-info/METADATA +370 -0
- specfact_cli-0.4.2.dist-info/RECORD +62 -0
- specfact_cli-0.4.2.dist-info/WHEEL +4 -0
- specfact_cli-0.4.2.dist-info/entry_points.txt +2 -0
- specfact_cli-0.4.2.dist-info/licenses/LICENSE.md +61 -0
|
@@ -0,0 +1,759 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Reproducibility checker - Runs various validation tools and aggregates results.
|
|
3
|
+
|
|
4
|
+
This module provides functionality to run linting, type checking, contract
|
|
5
|
+
exploration, and test suites with time budgets and result aggregation.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
import shutil
|
|
12
|
+
import subprocess
|
|
13
|
+
import time
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
from enum import Enum
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
from beartype import beartype
|
|
21
|
+
from icontract import ensure, require
|
|
22
|
+
from rich.console import Console
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
console = Console()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class CheckStatus(Enum):
|
|
29
|
+
"""Status of a validation check."""
|
|
30
|
+
|
|
31
|
+
PENDING = "pending"
|
|
32
|
+
RUNNING = "running"
|
|
33
|
+
PASSED = "passed"
|
|
34
|
+
FAILED = "failed"
|
|
35
|
+
TIMEOUT = "timeout"
|
|
36
|
+
SKIPPED = "skipped"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@beartype
|
|
40
|
+
@require(lambda text: isinstance(text, str), "Text must be string")
|
|
41
|
+
@ensure(lambda result: isinstance(result, str), "Must return string")
|
|
42
|
+
def _strip_ansi_codes(text: str) -> str:
|
|
43
|
+
"""Remove ANSI escape codes from text."""
|
|
44
|
+
ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
|
|
45
|
+
return ansi_escape.sub("", text)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@beartype
|
|
49
|
+
@require(lambda output: isinstance(output, str), "Output must be string")
|
|
50
|
+
@ensure(lambda result: isinstance(result, dict), "Must return dictionary")
|
|
51
|
+
@ensure(
|
|
52
|
+
lambda result: "violations" in result and "total_violations" in result,
|
|
53
|
+
"Must include violations and total_violations",
|
|
54
|
+
)
|
|
55
|
+
def _extract_ruff_findings(output: str) -> dict[str, Any]:
|
|
56
|
+
"""Extract structured findings from ruff output."""
|
|
57
|
+
findings: dict[str, Any] = {
|
|
58
|
+
"violations": [],
|
|
59
|
+
"total_violations": 0,
|
|
60
|
+
"files_checked": 0,
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
# Strip ANSI codes
|
|
64
|
+
clean_output = _strip_ansi_codes(output)
|
|
65
|
+
|
|
66
|
+
# Parse ruff output format:
|
|
67
|
+
# Format 1: "W293 [*] Blank line contains whitespace\n--> src/file.py:240:1"
|
|
68
|
+
# Format 2: "src/file.py:240:1: W293 Blank line contains whitespace"
|
|
69
|
+
lines = clean_output.split("\n")
|
|
70
|
+
i = 0
|
|
71
|
+
while i < len(lines):
|
|
72
|
+
line_stripped = lines[i].strip()
|
|
73
|
+
if not line_stripped:
|
|
74
|
+
i += 1
|
|
75
|
+
continue
|
|
76
|
+
|
|
77
|
+
# Skip help lines and code block markers
|
|
78
|
+
if line_stripped.startswith(("help:", "|", " |")):
|
|
79
|
+
i += 1
|
|
80
|
+
continue
|
|
81
|
+
|
|
82
|
+
# Try format 1: "W293 [*] message" followed by "--> file:line:col"
|
|
83
|
+
code_match = re.match(r"^([A-Z]\d+)\s+\[[^\]]+\]\s+(.+)$", line_stripped)
|
|
84
|
+
if code_match:
|
|
85
|
+
code = code_match.group(1)
|
|
86
|
+
message = code_match.group(2)
|
|
87
|
+
# Look for location line: "--> file:line:col"
|
|
88
|
+
if i + 1 < len(lines):
|
|
89
|
+
location_line = lines[i + 1].strip()
|
|
90
|
+
location_match = re.match(r"-->\s+([^:]+):(\d+):(\d+)", location_line)
|
|
91
|
+
if location_match:
|
|
92
|
+
file_path = location_match.group(1)
|
|
93
|
+
line_num = int(location_match.group(2))
|
|
94
|
+
col_num = int(location_match.group(3))
|
|
95
|
+
findings["violations"].append(
|
|
96
|
+
{
|
|
97
|
+
"file": file_path,
|
|
98
|
+
"line": line_num,
|
|
99
|
+
"column": col_num,
|
|
100
|
+
"code": code,
|
|
101
|
+
"message": message,
|
|
102
|
+
}
|
|
103
|
+
)
|
|
104
|
+
i += 2 # Skip both lines
|
|
105
|
+
continue
|
|
106
|
+
|
|
107
|
+
# Try format 2: "file:line:col: code message"
|
|
108
|
+
pattern = r"^([^:]+):(\d+):(\d+):\s+([A-Z]\d+)\s+(.+)$"
|
|
109
|
+
match = re.match(pattern, line_stripped)
|
|
110
|
+
if match:
|
|
111
|
+
file_path, line_num, col_num, code, message = match.groups()
|
|
112
|
+
findings["violations"].append(
|
|
113
|
+
{
|
|
114
|
+
"file": file_path,
|
|
115
|
+
"line": int(line_num),
|
|
116
|
+
"column": int(col_num),
|
|
117
|
+
"code": code,
|
|
118
|
+
"message": message,
|
|
119
|
+
}
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
i += 1
|
|
123
|
+
|
|
124
|
+
# Set total_violations from list length
|
|
125
|
+
findings["total_violations"] = len(findings["violations"])
|
|
126
|
+
|
|
127
|
+
# Extract files checked count
|
|
128
|
+
files_match = re.search(r"(\d+)\s+files?\s+checked", clean_output, re.IGNORECASE)
|
|
129
|
+
if files_match:
|
|
130
|
+
findings["files_checked"] = int(files_match.group(1))
|
|
131
|
+
|
|
132
|
+
return findings
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@beartype
|
|
136
|
+
@require(lambda output: isinstance(output, str), "Output must be string")
|
|
137
|
+
@require(lambda error: isinstance(error, str), "Error must be string")
|
|
138
|
+
@ensure(lambda result: isinstance(result, dict), "Must return dictionary")
|
|
139
|
+
@ensure(lambda result: "total_findings" in result, "Must include total_findings")
|
|
140
|
+
def _extract_semgrep_findings(output: str, error: str) -> dict[str, Any]:
|
|
141
|
+
"""Extract structured findings from semgrep output."""
|
|
142
|
+
findings: dict[str, Any] = {
|
|
143
|
+
"findings": [],
|
|
144
|
+
"total_findings": 0,
|
|
145
|
+
"rules_run": 0,
|
|
146
|
+
"targets_scanned": 0,
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
# Combine output and error (semgrep uses stderr for status)
|
|
150
|
+
combined = _strip_ansi_codes((output + "\n" + error).strip())
|
|
151
|
+
|
|
152
|
+
# Extract findings count
|
|
153
|
+
findings_match = re.search(r"Findings:\s*(\d+)", combined, re.IGNORECASE)
|
|
154
|
+
if findings_match:
|
|
155
|
+
findings["total_findings"] = int(findings_match.group(1))
|
|
156
|
+
|
|
157
|
+
# Extract rules run
|
|
158
|
+
rules_match = re.search(r"Rules\s+run:\s*(\d+)", combined, re.IGNORECASE)
|
|
159
|
+
if rules_match:
|
|
160
|
+
findings["rules_run"] = int(rules_match.group(1))
|
|
161
|
+
|
|
162
|
+
# Extract targets scanned
|
|
163
|
+
targets_match = re.search(r"Targets\s+scanned:\s*(\d+)", combined, re.IGNORECASE)
|
|
164
|
+
if targets_match:
|
|
165
|
+
findings["targets_scanned"] = int(targets_match.group(1))
|
|
166
|
+
|
|
167
|
+
return findings
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@beartype
|
|
171
|
+
@require(lambda output: isinstance(output, str), "Output must be string")
|
|
172
|
+
@ensure(lambda result: isinstance(result, dict), "Must return dictionary")
|
|
173
|
+
@ensure(lambda result: "errors" in result and "warnings" in result, "Must include errors and warnings")
|
|
174
|
+
def _extract_basedpyright_findings(output: str) -> dict[str, Any]:
|
|
175
|
+
"""Extract structured findings from basedpyright output."""
|
|
176
|
+
findings: dict[str, Any] = {
|
|
177
|
+
"errors": [],
|
|
178
|
+
"warnings": [],
|
|
179
|
+
"total_errors": 0,
|
|
180
|
+
"total_warnings": 0,
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
# Strip ANSI codes
|
|
184
|
+
clean_output = _strip_ansi_codes(output)
|
|
185
|
+
|
|
186
|
+
# Parse basedpyright output: "path:line:col: error|warning: message"
|
|
187
|
+
pattern = r"^([^:]+):(\d+):(\d+):\s+(error|warning):\s+(.+)$"
|
|
188
|
+
for line in clean_output.split("\n"):
|
|
189
|
+
line_stripped = line.strip()
|
|
190
|
+
if not line_stripped:
|
|
191
|
+
continue
|
|
192
|
+
match = re.match(pattern, line_stripped)
|
|
193
|
+
if match:
|
|
194
|
+
file_path, line_num, col_num, level, message = match.groups()
|
|
195
|
+
finding = {
|
|
196
|
+
"file": file_path,
|
|
197
|
+
"line": int(line_num),
|
|
198
|
+
"column": int(col_num),
|
|
199
|
+
"message": message,
|
|
200
|
+
}
|
|
201
|
+
if level == "error":
|
|
202
|
+
findings["errors"].append(finding)
|
|
203
|
+
findings["total_errors"] += 1
|
|
204
|
+
else:
|
|
205
|
+
findings["warnings"].append(finding)
|
|
206
|
+
findings["total_warnings"] += 1
|
|
207
|
+
|
|
208
|
+
return findings
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
@beartype
|
|
212
|
+
@require(lambda output: isinstance(output, str), "Output must be string")
|
|
213
|
+
@ensure(lambda result: isinstance(result, dict), "Must return dictionary")
|
|
214
|
+
@ensure(lambda result: "counterexamples" in result, "Must include counterexamples")
|
|
215
|
+
def _extract_crosshair_findings(output: str) -> dict[str, Any]:
|
|
216
|
+
"""Extract structured findings from CrossHair output."""
|
|
217
|
+
findings: dict[str, Any] = {
|
|
218
|
+
"counterexamples": [],
|
|
219
|
+
"total_counterexamples": 0,
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
# Strip ANSI codes
|
|
223
|
+
clean_output = _strip_ansi_codes(output)
|
|
224
|
+
|
|
225
|
+
# CrossHair typically outputs counterexamples
|
|
226
|
+
# Format varies, but we can extract basic info
|
|
227
|
+
if "counterexample" in clean_output.lower() or "failed" in clean_output.lower():
|
|
228
|
+
# Try to extract file and line info
|
|
229
|
+
pattern = r"([^:]+):(\d+):.*?(counterexample|failed)"
|
|
230
|
+
matches = re.finditer(pattern, clean_output, re.IGNORECASE)
|
|
231
|
+
for match in matches:
|
|
232
|
+
findings["counterexamples"].append(
|
|
233
|
+
{
|
|
234
|
+
"file": match.group(1),
|
|
235
|
+
"line": int(match.group(2)),
|
|
236
|
+
"type": match.group(3).lower(),
|
|
237
|
+
}
|
|
238
|
+
)
|
|
239
|
+
findings["total_counterexamples"] += 1
|
|
240
|
+
|
|
241
|
+
return findings
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
@beartype
|
|
245
|
+
@require(lambda output: isinstance(output, str), "Output must be string")
|
|
246
|
+
@ensure(lambda result: isinstance(result, dict), "Must return dictionary")
|
|
247
|
+
@ensure(lambda result: "tests_run" in result, "Must include tests_run")
|
|
248
|
+
@ensure(lambda result: result["tests_run"] >= 0, "tests_run must be non-negative")
|
|
249
|
+
def _extract_pytest_findings(output: str) -> dict[str, Any]:
|
|
250
|
+
"""Extract structured findings from pytest output."""
|
|
251
|
+
findings: dict[str, Any] = {
|
|
252
|
+
"tests_run": 0,
|
|
253
|
+
"tests_passed": 0,
|
|
254
|
+
"tests_failed": 0,
|
|
255
|
+
"tests_skipped": 0,
|
|
256
|
+
"failures": [],
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
# Strip ANSI codes
|
|
260
|
+
clean_output = _strip_ansi_codes(output)
|
|
261
|
+
|
|
262
|
+
# Extract test summary
|
|
263
|
+
summary_match = re.search(r"(\d+)\s+passed", clean_output, re.IGNORECASE)
|
|
264
|
+
if summary_match:
|
|
265
|
+
findings["tests_passed"] = int(summary_match.group(1))
|
|
266
|
+
|
|
267
|
+
failed_match = re.search(r"(\d+)\s+failed", clean_output, re.IGNORECASE)
|
|
268
|
+
if failed_match:
|
|
269
|
+
findings["tests_failed"] = int(failed_match.group(1))
|
|
270
|
+
|
|
271
|
+
skipped_match = re.search(r"(\d+)\s+skipped", clean_output, re.IGNORECASE)
|
|
272
|
+
if skipped_match:
|
|
273
|
+
findings["tests_skipped"] = int(skipped_match.group(1))
|
|
274
|
+
|
|
275
|
+
findings["tests_run"] = findings["tests_passed"] + findings["tests_failed"] + findings["tests_skipped"]
|
|
276
|
+
|
|
277
|
+
return findings
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
@beartype
|
|
281
|
+
@require(lambda tool: isinstance(tool, str) and len(tool) > 0, "Tool must be non-empty string")
|
|
282
|
+
@require(lambda output: isinstance(output, str), "Output must be string")
|
|
283
|
+
@require(lambda error: isinstance(error, str), "Error must be string")
|
|
284
|
+
@ensure(lambda result: isinstance(result, dict), "Must return dictionary")
|
|
285
|
+
def _extract_findings(tool: str, output: str, error: str) -> dict[str, Any]:
|
|
286
|
+
"""
|
|
287
|
+
Extract structured findings from tool output based on tool type.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
tool: Tool name (ruff, semgrep, basedpyright, crosshair, pytest)
|
|
291
|
+
output: Tool stdout output
|
|
292
|
+
error: Tool stderr output
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
Dictionary with structured findings for the specific tool
|
|
296
|
+
"""
|
|
297
|
+
tool_lower = tool.lower()
|
|
298
|
+
if tool_lower == "ruff":
|
|
299
|
+
return _extract_ruff_findings(output)
|
|
300
|
+
if tool_lower == "semgrep":
|
|
301
|
+
return _extract_semgrep_findings(output, error)
|
|
302
|
+
if tool_lower == "basedpyright":
|
|
303
|
+
return _extract_basedpyright_findings(output)
|
|
304
|
+
if tool_lower == "crosshair":
|
|
305
|
+
return _extract_crosshair_findings(output)
|
|
306
|
+
if tool_lower == "pytest":
|
|
307
|
+
return _extract_pytest_findings(output)
|
|
308
|
+
# Unknown tool - return empty findings
|
|
309
|
+
return {}
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
@dataclass
|
|
313
|
+
class CheckResult:
|
|
314
|
+
"""Result of a single validation check."""
|
|
315
|
+
|
|
316
|
+
name: str
|
|
317
|
+
tool: str
|
|
318
|
+
status: CheckStatus
|
|
319
|
+
duration: float | None = None
|
|
320
|
+
exit_code: int | None = None
|
|
321
|
+
output: str = ""
|
|
322
|
+
error: str = ""
|
|
323
|
+
timeout: bool = False
|
|
324
|
+
|
|
325
|
+
def __post_init__(self) -> None:
|
|
326
|
+
"""Validate that tool is non-empty if findings extraction is needed."""
|
|
327
|
+
if not self.tool:
|
|
328
|
+
self.tool = "unknown" # Default to "unknown" if tool is empty
|
|
329
|
+
|
|
330
|
+
@beartype
|
|
331
|
+
@require(lambda max_output_length: max_output_length > 0, "max_output_length must be positive")
|
|
332
|
+
@ensure(lambda result: isinstance(result, dict), "Must return dictionary")
|
|
333
|
+
@ensure(
|
|
334
|
+
lambda result: "name" in result and "tool" in result and "status" in result,
|
|
335
|
+
"Must include name, tool, and status",
|
|
336
|
+
)
|
|
337
|
+
def to_dict(self, include_findings: bool = True, max_output_length: int = 50000) -> dict[str, Any]:
|
|
338
|
+
"""
|
|
339
|
+
Convert result to dictionary with structured findings.
|
|
340
|
+
|
|
341
|
+
Args:
|
|
342
|
+
include_findings: Whether to include structured findings (default: True)
|
|
343
|
+
max_output_length: Maximum length of raw output/error to include if findings unavailable (truncates if longer)
|
|
344
|
+
|
|
345
|
+
Returns:
|
|
346
|
+
Dictionary representation of the check result with structured findings
|
|
347
|
+
"""
|
|
348
|
+
result = {
|
|
349
|
+
"name": self.name,
|
|
350
|
+
"tool": self.tool,
|
|
351
|
+
"status": self.status.value,
|
|
352
|
+
"duration": self.duration,
|
|
353
|
+
"exit_code": self.exit_code,
|
|
354
|
+
"timeout": self.timeout,
|
|
355
|
+
"output_length": len(self.output),
|
|
356
|
+
"error_length": len(self.error),
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
# Extract structured findings based on tool type
|
|
360
|
+
if include_findings and self.tool:
|
|
361
|
+
try:
|
|
362
|
+
findings = _extract_findings(self.tool, self.output, self.error)
|
|
363
|
+
if findings:
|
|
364
|
+
result["findings"] = findings
|
|
365
|
+
except Exception:
|
|
366
|
+
# If extraction fails, fall back to raw output (truncated)
|
|
367
|
+
if self.output:
|
|
368
|
+
if len(self.output) <= max_output_length:
|
|
369
|
+
result["output"] = _strip_ansi_codes(self.output)
|
|
370
|
+
else:
|
|
371
|
+
result["output"] = _strip_ansi_codes(self.output[:max_output_length])
|
|
372
|
+
result["output_truncated"] = True
|
|
373
|
+
else:
|
|
374
|
+
result["output"] = ""
|
|
375
|
+
|
|
376
|
+
if self.error:
|
|
377
|
+
if len(self.error) <= max_output_length:
|
|
378
|
+
result["error"] = _strip_ansi_codes(self.error)
|
|
379
|
+
else:
|
|
380
|
+
result["error"] = _strip_ansi_codes(self.error[:max_output_length])
|
|
381
|
+
result["error_truncated"] = True
|
|
382
|
+
else:
|
|
383
|
+
result["error"] = ""
|
|
384
|
+
|
|
385
|
+
return result
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
@dataclass
|
|
389
|
+
class ReproReport:
|
|
390
|
+
"""Aggregated report of all validation checks."""
|
|
391
|
+
|
|
392
|
+
checks: list[CheckResult] = field(default_factory=list)
|
|
393
|
+
total_duration: float = 0.0
|
|
394
|
+
total_checks: int = 0
|
|
395
|
+
passed_checks: int = 0
|
|
396
|
+
failed_checks: int = 0
|
|
397
|
+
timeout_checks: int = 0
|
|
398
|
+
skipped_checks: int = 0
|
|
399
|
+
budget_exceeded: bool = False
|
|
400
|
+
# Metadata fields
|
|
401
|
+
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
402
|
+
repo_path: str | None = None
|
|
403
|
+
budget: int | None = None
|
|
404
|
+
active_plan_path: str | None = None
|
|
405
|
+
enforcement_config_path: str | None = None
|
|
406
|
+
enforcement_preset: str | None = None
|
|
407
|
+
fix_enabled: bool = False
|
|
408
|
+
fail_fast: bool = False
|
|
409
|
+
|
|
410
|
+
@beartype
|
|
411
|
+
@require(lambda result: isinstance(result, CheckResult), "Must be CheckResult instance")
|
|
412
|
+
def add_check(self, result: CheckResult) -> None:
|
|
413
|
+
"""Add a check result to the report."""
|
|
414
|
+
self.checks.append(result)
|
|
415
|
+
self.total_checks += 1
|
|
416
|
+
|
|
417
|
+
if result.duration:
|
|
418
|
+
self.total_duration += result.duration
|
|
419
|
+
|
|
420
|
+
if result.status == CheckStatus.PASSED:
|
|
421
|
+
self.passed_checks += 1
|
|
422
|
+
elif result.status == CheckStatus.FAILED:
|
|
423
|
+
self.failed_checks += 1
|
|
424
|
+
elif result.status == CheckStatus.TIMEOUT:
|
|
425
|
+
self.timeout_checks += 1
|
|
426
|
+
elif result.status == CheckStatus.SKIPPED:
|
|
427
|
+
self.skipped_checks += 1
|
|
428
|
+
|
|
429
|
+
@beartype
|
|
430
|
+
@ensure(lambda result: result in (0, 1, 2), "Exit code must be 0, 1, or 2")
|
|
431
|
+
def get_exit_code(self) -> int:
|
|
432
|
+
"""
|
|
433
|
+
Get exit code for the repro command.
|
|
434
|
+
|
|
435
|
+
Returns:
|
|
436
|
+
0 = all passed, 1 = some failed, 2 = budget exceeded
|
|
437
|
+
"""
|
|
438
|
+
if self.budget_exceeded or self.timeout_checks > 0:
|
|
439
|
+
return 2
|
|
440
|
+
# CrossHair failures are non-blocking (advisory only) - don't count them
|
|
441
|
+
failed_checks_blocking = [
|
|
442
|
+
check for check in self.checks if check.status == CheckStatus.FAILED and check.tool != "crosshair"
|
|
443
|
+
]
|
|
444
|
+
if failed_checks_blocking:
|
|
445
|
+
return 1
|
|
446
|
+
return 0
|
|
447
|
+
|
|
448
|
+
@beartype
|
|
449
|
+
@require(lambda max_finding_length: max_finding_length > 0, "max_finding_length must be positive")
|
|
450
|
+
@ensure(lambda result: isinstance(result, dict), "Must return dictionary")
|
|
451
|
+
@ensure(lambda result: "total_checks" in result and "checks" in result, "Must include total_checks and checks")
|
|
452
|
+
def to_dict(self, include_findings: bool = True, max_finding_length: int = 50000) -> dict[str, Any]:
|
|
453
|
+
"""
|
|
454
|
+
Convert report to dictionary with structured findings.
|
|
455
|
+
|
|
456
|
+
Args:
|
|
457
|
+
include_findings: Whether to include structured findings for each check (default: True)
|
|
458
|
+
max_finding_length: Maximum length of raw output/error to include if findings unavailable (truncates if longer)
|
|
459
|
+
|
|
460
|
+
Returns:
|
|
461
|
+
Dictionary representation of the report with structured findings
|
|
462
|
+
"""
|
|
463
|
+
result = {
|
|
464
|
+
"total_duration": self.total_duration,
|
|
465
|
+
"total_checks": self.total_checks,
|
|
466
|
+
"passed_checks": self.passed_checks,
|
|
467
|
+
"failed_checks": self.failed_checks,
|
|
468
|
+
"timeout_checks": self.timeout_checks,
|
|
469
|
+
"skipped_checks": self.skipped_checks,
|
|
470
|
+
"budget_exceeded": self.budget_exceeded,
|
|
471
|
+
"checks": [
|
|
472
|
+
check.to_dict(include_findings=include_findings, max_output_length=max_finding_length)
|
|
473
|
+
for check in self.checks
|
|
474
|
+
],
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
# Add metadata if available
|
|
478
|
+
metadata = {}
|
|
479
|
+
if self.timestamp:
|
|
480
|
+
metadata["timestamp"] = self.timestamp
|
|
481
|
+
if self.repo_path:
|
|
482
|
+
metadata["repo_path"] = self.repo_path
|
|
483
|
+
if self.budget is not None:
|
|
484
|
+
metadata["budget"] = self.budget
|
|
485
|
+
if self.active_plan_path:
|
|
486
|
+
metadata["active_plan_path"] = self.active_plan_path
|
|
487
|
+
if self.enforcement_config_path:
|
|
488
|
+
metadata["enforcement_config_path"] = self.enforcement_config_path
|
|
489
|
+
if self.enforcement_preset:
|
|
490
|
+
metadata["enforcement_preset"] = self.enforcement_preset
|
|
491
|
+
if self.fix_enabled:
|
|
492
|
+
metadata["fix_enabled"] = self.fix_enabled
|
|
493
|
+
if self.fail_fast:
|
|
494
|
+
metadata["fail_fast"] = self.fail_fast
|
|
495
|
+
|
|
496
|
+
if metadata:
|
|
497
|
+
result["metadata"] = metadata
|
|
498
|
+
|
|
499
|
+
return result
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
class ReproChecker:
|
|
503
|
+
"""
|
|
504
|
+
Runs validation checks with time budgets and result aggregation.
|
|
505
|
+
|
|
506
|
+
Executes various tools (ruff, semgrep, basedpyright, crosshair, pytest)
|
|
507
|
+
and aggregates their results into a comprehensive report.
|
|
508
|
+
"""
|
|
509
|
+
|
|
510
|
+
@beartype
|
|
511
|
+
@require(lambda budget: budget > 0, "Budget must be positive")
|
|
512
|
+
@ensure(lambda self: self.budget > 0, "Budget must be positive after init")
|
|
513
|
+
def __init__(
|
|
514
|
+
self, repo_path: Path | None = None, budget: int = 120, fail_fast: bool = False, fix: bool = False
|
|
515
|
+
) -> None:
|
|
516
|
+
"""
|
|
517
|
+
Initialize reproducibility checker.
|
|
518
|
+
|
|
519
|
+
Args:
|
|
520
|
+
repo_path: Path to repository (default: current directory)
|
|
521
|
+
budget: Total time budget in seconds (must be > 0)
|
|
522
|
+
fail_fast: Stop on first failure
|
|
523
|
+
fix: Apply auto-fixes where available (Semgrep auto-fixes)
|
|
524
|
+
"""
|
|
525
|
+
self.repo_path = Path(repo_path) if repo_path else Path(".")
|
|
526
|
+
self.budget = budget
|
|
527
|
+
self.fail_fast = fail_fast
|
|
528
|
+
self.fix = fix
|
|
529
|
+
self.report = ReproReport()
|
|
530
|
+
self.start_time = time.time()
|
|
531
|
+
|
|
532
|
+
# Initialize metadata in report
|
|
533
|
+
self.report.repo_path = str(self.repo_path.absolute())
|
|
534
|
+
self.report.budget = budget
|
|
535
|
+
self.report.fix_enabled = fix
|
|
536
|
+
self.report.fail_fast = fail_fast
|
|
537
|
+
|
|
538
|
+
@beartype
|
|
539
|
+
@require(lambda name: isinstance(name, str) and len(name) > 0, "Name must be non-empty string")
|
|
540
|
+
@require(lambda tool: isinstance(tool, str) and len(tool) > 0, "Tool must be non-empty string")
|
|
541
|
+
@require(lambda command: isinstance(command, list) and len(command) > 0, "Command must be non-empty list")
|
|
542
|
+
@require(lambda timeout: timeout is None or timeout > 0, "Timeout must be positive if provided")
|
|
543
|
+
@ensure(lambda result: isinstance(result, CheckResult), "Must return CheckResult")
|
|
544
|
+
@ensure(lambda result: result.duration is None or result.duration >= 0, "Duration must be non-negative")
|
|
545
|
+
def run_check(
|
|
546
|
+
self,
|
|
547
|
+
name: str,
|
|
548
|
+
tool: str,
|
|
549
|
+
command: list[str],
|
|
550
|
+
timeout: int | None = None,
|
|
551
|
+
skip_if_missing: bool = True,
|
|
552
|
+
) -> CheckResult:
|
|
553
|
+
"""
|
|
554
|
+
Run a single validation check.
|
|
555
|
+
|
|
556
|
+
Args:
|
|
557
|
+
name: Human-readable check name
|
|
558
|
+
tool: Tool name (for display)
|
|
559
|
+
command: Command to execute
|
|
560
|
+
timeout: Per-check timeout (default: budget / number of checks, must be > 0 if provided)
|
|
561
|
+
skip_if_missing: Skip check if tool not found
|
|
562
|
+
|
|
563
|
+
Returns:
|
|
564
|
+
CheckResult with status and output
|
|
565
|
+
"""
|
|
566
|
+
result = CheckResult(name=name, tool=tool, status=CheckStatus.PENDING)
|
|
567
|
+
|
|
568
|
+
# Check if tool exists (cross-platform)
|
|
569
|
+
if skip_if_missing:
|
|
570
|
+
tool_path = shutil.which(command[0])
|
|
571
|
+
if tool_path is None:
|
|
572
|
+
result.status = CheckStatus.SKIPPED
|
|
573
|
+
result.error = f"Tool '{command[0]}' not found in PATH, skipping"
|
|
574
|
+
return result
|
|
575
|
+
|
|
576
|
+
# Check budget
|
|
577
|
+
elapsed = time.time() - self.start_time
|
|
578
|
+
if elapsed >= self.budget:
|
|
579
|
+
self.report.budget_exceeded = True
|
|
580
|
+
result.status = CheckStatus.TIMEOUT
|
|
581
|
+
result.timeout = True
|
|
582
|
+
result.error = f"Budget exceeded ({self.budget}s)"
|
|
583
|
+
return result
|
|
584
|
+
|
|
585
|
+
# Calculate timeout for this check
|
|
586
|
+
remaining_budget = self.budget - elapsed
|
|
587
|
+
check_timeout = min(timeout or (remaining_budget / 2), remaining_budget)
|
|
588
|
+
|
|
589
|
+
# Run command
|
|
590
|
+
result.status = CheckStatus.RUNNING
|
|
591
|
+
start = time.time()
|
|
592
|
+
|
|
593
|
+
try:
|
|
594
|
+
proc = subprocess.run(
|
|
595
|
+
command,
|
|
596
|
+
cwd=self.repo_path,
|
|
597
|
+
capture_output=True,
|
|
598
|
+
text=True,
|
|
599
|
+
timeout=check_timeout,
|
|
600
|
+
check=False,
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
result.duration = time.time() - start
|
|
604
|
+
result.exit_code = proc.returncode
|
|
605
|
+
result.output = proc.stdout
|
|
606
|
+
result.error = proc.stderr
|
|
607
|
+
|
|
608
|
+
if proc.returncode == 0:
|
|
609
|
+
result.status = CheckStatus.PASSED
|
|
610
|
+
else:
|
|
611
|
+
result.status = CheckStatus.FAILED
|
|
612
|
+
|
|
613
|
+
except subprocess.TimeoutExpired:
|
|
614
|
+
result.duration = time.time() - start
|
|
615
|
+
result.status = CheckStatus.TIMEOUT
|
|
616
|
+
result.timeout = True
|
|
617
|
+
result.error = f"Check timed out after {check_timeout}s"
|
|
618
|
+
|
|
619
|
+
except Exception as e:
|
|
620
|
+
result.duration = time.time() - start
|
|
621
|
+
result.status = CheckStatus.FAILED
|
|
622
|
+
result.error = f"Check failed with exception: {e!s}"
|
|
623
|
+
|
|
624
|
+
return result
|
|
625
|
+
|
|
626
|
+
@beartype
|
|
627
|
+
@ensure(lambda result: isinstance(result, ReproReport), "Must return ReproReport")
|
|
628
|
+
@ensure(lambda result: result.total_checks >= 0, "Total checks must be non-negative")
|
|
629
|
+
@ensure(
|
|
630
|
+
lambda result: result.total_checks
|
|
631
|
+
== result.passed_checks + result.failed_checks + result.timeout_checks + result.skipped_checks,
|
|
632
|
+
"Total checks must equal sum of all status types",
|
|
633
|
+
)
|
|
634
|
+
def run_all_checks(self) -> ReproReport:
|
|
635
|
+
"""
|
|
636
|
+
Run all validation checks.
|
|
637
|
+
|
|
638
|
+
Returns:
|
|
639
|
+
ReproReport with aggregated results
|
|
640
|
+
"""
|
|
641
|
+
# Check if semgrep config exists
|
|
642
|
+
semgrep_config = self.repo_path / "tools" / "semgrep" / "async.yml"
|
|
643
|
+
semgrep_enabled = semgrep_config.exists()
|
|
644
|
+
|
|
645
|
+
# Check if test directories exist
|
|
646
|
+
contracts_tests = self.repo_path / "tests" / "contracts"
|
|
647
|
+
smoke_tests = self.repo_path / "tests" / "smoke"
|
|
648
|
+
src_dir = self.repo_path / "src"
|
|
649
|
+
|
|
650
|
+
checks: list[tuple[str, str, list[str], int | None, bool]] = [
|
|
651
|
+
("Linting (ruff)", "ruff", ["ruff", "check", "src/", "tests/", "tools/"], None, True),
|
|
652
|
+
]
|
|
653
|
+
|
|
654
|
+
# Add semgrep only if config exists
|
|
655
|
+
if semgrep_enabled:
|
|
656
|
+
semgrep_command = ["semgrep", "--config", str(semgrep_config.relative_to(self.repo_path)), "."]
|
|
657
|
+
if self.fix:
|
|
658
|
+
semgrep_command.append("--autofix")
|
|
659
|
+
checks.append(
|
|
660
|
+
(
|
|
661
|
+
"Async patterns (semgrep)",
|
|
662
|
+
"semgrep",
|
|
663
|
+
semgrep_command,
|
|
664
|
+
30,
|
|
665
|
+
True,
|
|
666
|
+
)
|
|
667
|
+
)
|
|
668
|
+
|
|
669
|
+
checks.extend(
|
|
670
|
+
[
|
|
671
|
+
("Type checking (basedpyright)", "basedpyright", ["basedpyright", "src/", "tools/"], None, True),
|
|
672
|
+
]
|
|
673
|
+
)
|
|
674
|
+
|
|
675
|
+
# Add CrossHair only if src/ exists
|
|
676
|
+
# Exclude common/logger_setup.py from CrossHair analysis due to known signature analysis issues
|
|
677
|
+
# CrossHair doesn't support --exclude, so we exclude the common directory and add other directories
|
|
678
|
+
if src_dir.exists():
|
|
679
|
+
# Get all subdirectories except common
|
|
680
|
+
specfact_dirs = [d for d in src_dir.iterdir() if d.is_dir() and d.name != "common"]
|
|
681
|
+
crosshair_targets = ["src/" + d.name for d in specfact_dirs] + ["tools/"]
|
|
682
|
+
checks.append(
|
|
683
|
+
(
|
|
684
|
+
"Contract exploration (CrossHair)",
|
|
685
|
+
"crosshair",
|
|
686
|
+
["crosshair", "check", *crosshair_targets],
|
|
687
|
+
60,
|
|
688
|
+
True,
|
|
689
|
+
)
|
|
690
|
+
)
|
|
691
|
+
|
|
692
|
+
# Add property tests only if directory exists
|
|
693
|
+
if contracts_tests.exists():
|
|
694
|
+
checks.append(
|
|
695
|
+
(
|
|
696
|
+
"Property tests (pytest contracts)",
|
|
697
|
+
"pytest",
|
|
698
|
+
["pytest", "tests/contracts/", "-v"],
|
|
699
|
+
30,
|
|
700
|
+
True,
|
|
701
|
+
)
|
|
702
|
+
)
|
|
703
|
+
|
|
704
|
+
# Add smoke tests only if directory exists
|
|
705
|
+
if smoke_tests.exists():
|
|
706
|
+
checks.append(("Smoke tests (pytest smoke)", "pytest", ["pytest", "tests/smoke/", "-v"], 30, True))
|
|
707
|
+
|
|
708
|
+
for check_args in checks:
|
|
709
|
+
# Check budget before starting
|
|
710
|
+
elapsed = time.time() - self.start_time
|
|
711
|
+
if elapsed >= self.budget:
|
|
712
|
+
self.report.budget_exceeded = True
|
|
713
|
+
break
|
|
714
|
+
|
|
715
|
+
# Run check
|
|
716
|
+
result = self.run_check(*check_args)
|
|
717
|
+
self.report.add_check(result)
|
|
718
|
+
|
|
719
|
+
# Fail fast if requested
|
|
720
|
+
if self.fail_fast and result.status == CheckStatus.FAILED:
|
|
721
|
+
break
|
|
722
|
+
|
|
723
|
+
self.report.total_duration = time.time() - self.start_time
|
|
724
|
+
|
|
725
|
+
# Check if budget exceeded
|
|
726
|
+
elapsed = time.time() - self.start_time
|
|
727
|
+
if elapsed >= self.budget:
|
|
728
|
+
self.report.budget_exceeded = True
|
|
729
|
+
|
|
730
|
+
# Populate metadata: active plan and enforcement config
|
|
731
|
+
try:
|
|
732
|
+
from specfact_cli.utils.structure import SpecFactStructure
|
|
733
|
+
|
|
734
|
+
# Get active plan path
|
|
735
|
+
active_plan_path = SpecFactStructure.get_default_plan_path(self.repo_path)
|
|
736
|
+
if active_plan_path.exists():
|
|
737
|
+
self.report.active_plan_path = str(active_plan_path.relative_to(self.repo_path))
|
|
738
|
+
|
|
739
|
+
# Get enforcement config path and preset
|
|
740
|
+
enforcement_config_path = SpecFactStructure.get_enforcement_config_path(self.repo_path)
|
|
741
|
+
if enforcement_config_path.exists():
|
|
742
|
+
self.report.enforcement_config_path = str(enforcement_config_path.relative_to(self.repo_path))
|
|
743
|
+
try:
|
|
744
|
+
from specfact_cli.models.enforcement import EnforcementConfig
|
|
745
|
+
from specfact_cli.utils.yaml_utils import load_yaml
|
|
746
|
+
|
|
747
|
+
config_data = load_yaml(enforcement_config_path)
|
|
748
|
+
if config_data:
|
|
749
|
+
enforcement_config = EnforcementConfig(**config_data)
|
|
750
|
+
self.report.enforcement_preset = enforcement_config.preset.value
|
|
751
|
+
except Exception as e:
|
|
752
|
+
# If config can't be loaded, just skip preset (non-fatal)
|
|
753
|
+
console.print(f"[dim]Warning: Could not load enforcement config preset: {e}[/dim]")
|
|
754
|
+
|
|
755
|
+
except Exception as e:
|
|
756
|
+
# If metadata collection fails, continue without it (non-fatal)
|
|
757
|
+
console.print(f"[dim]Warning: Could not collect metadata: {e}[/dim]")
|
|
758
|
+
|
|
759
|
+
return self.report
|