specfact-cli 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. specfact_cli/__init__.py +14 -0
  2. specfact_cli/agents/__init__.py +24 -0
  3. specfact_cli/agents/analyze_agent.py +392 -0
  4. specfact_cli/agents/base.py +95 -0
  5. specfact_cli/agents/plan_agent.py +202 -0
  6. specfact_cli/agents/registry.py +176 -0
  7. specfact_cli/agents/sync_agent.py +133 -0
  8. specfact_cli/analyzers/__init__.py +11 -0
  9. specfact_cli/analyzers/code_analyzer.py +796 -0
  10. specfact_cli/cli.py +396 -0
  11. specfact_cli/commands/__init__.py +7 -0
  12. specfact_cli/commands/enforce.py +88 -0
  13. specfact_cli/commands/import_cmd.py +365 -0
  14. specfact_cli/commands/init.py +125 -0
  15. specfact_cli/commands/plan.py +1089 -0
  16. specfact_cli/commands/repro.py +192 -0
  17. specfact_cli/commands/sync.py +408 -0
  18. specfact_cli/common/__init__.py +25 -0
  19. specfact_cli/common/logger_setup.py +654 -0
  20. specfact_cli/common/logging_utils.py +41 -0
  21. specfact_cli/common/text_utils.py +52 -0
  22. specfact_cli/common/utils.py +48 -0
  23. specfact_cli/comparators/__init__.py +11 -0
  24. specfact_cli/comparators/plan_comparator.py +391 -0
  25. specfact_cli/generators/__init__.py +14 -0
  26. specfact_cli/generators/plan_generator.py +105 -0
  27. specfact_cli/generators/protocol_generator.py +115 -0
  28. specfact_cli/generators/report_generator.py +200 -0
  29. specfact_cli/generators/workflow_generator.py +120 -0
  30. specfact_cli/importers/__init__.py +7 -0
  31. specfact_cli/importers/speckit_converter.py +773 -0
  32. specfact_cli/importers/speckit_scanner.py +711 -0
  33. specfact_cli/models/__init__.py +33 -0
  34. specfact_cli/models/deviation.py +105 -0
  35. specfact_cli/models/enforcement.py +150 -0
  36. specfact_cli/models/plan.py +97 -0
  37. specfact_cli/models/protocol.py +28 -0
  38. specfact_cli/modes/__init__.py +19 -0
  39. specfact_cli/modes/detector.py +126 -0
  40. specfact_cli/modes/router.py +153 -0
  41. specfact_cli/resources/semgrep/async.yml +285 -0
  42. specfact_cli/sync/__init__.py +12 -0
  43. specfact_cli/sync/repository_sync.py +279 -0
  44. specfact_cli/sync/speckit_sync.py +388 -0
  45. specfact_cli/utils/__init__.py +58 -0
  46. specfact_cli/utils/console.py +70 -0
  47. specfact_cli/utils/feature_keys.py +212 -0
  48. specfact_cli/utils/git.py +241 -0
  49. specfact_cli/utils/github_annotations.py +399 -0
  50. specfact_cli/utils/ide_setup.py +382 -0
  51. specfact_cli/utils/prompts.py +180 -0
  52. specfact_cli/utils/structure.py +497 -0
  53. specfact_cli/utils/yaml_utils.py +200 -0
  54. specfact_cli/validators/__init__.py +20 -0
  55. specfact_cli/validators/fsm.py +262 -0
  56. specfact_cli/validators/repro_checker.py +759 -0
  57. specfact_cli/validators/schema.py +196 -0
  58. specfact_cli-0.4.2.dist-info/METADATA +370 -0
  59. specfact_cli-0.4.2.dist-info/RECORD +62 -0
  60. specfact_cli-0.4.2.dist-info/WHEEL +4 -0
  61. specfact_cli-0.4.2.dist-info/entry_points.txt +2 -0
  62. specfact_cli-0.4.2.dist-info/licenses/LICENSE.md +61 -0
@@ -0,0 +1,759 @@
1
+ """
2
+ Reproducibility checker - Runs various validation tools and aggregates results.
3
+
4
+ This module provides functionality to run linting, type checking, contract
5
+ exploration, and test suites with time budgets and result aggregation.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import re
11
+ import shutil
12
+ import subprocess
13
+ import time
14
+ from dataclasses import dataclass, field
15
+ from datetime import datetime
16
+ from enum import Enum
17
+ from pathlib import Path
18
+ from typing import Any
19
+
20
+ from beartype import beartype
21
+ from icontract import ensure, require
22
+ from rich.console import Console
23
+
24
+
25
+ console = Console()
26
+
27
+
28
+ class CheckStatus(Enum):
29
+ """Status of a validation check."""
30
+
31
+ PENDING = "pending"
32
+ RUNNING = "running"
33
+ PASSED = "passed"
34
+ FAILED = "failed"
35
+ TIMEOUT = "timeout"
36
+ SKIPPED = "skipped"
37
+
38
+
39
+ @beartype
40
+ @require(lambda text: isinstance(text, str), "Text must be string")
41
+ @ensure(lambda result: isinstance(result, str), "Must return string")
42
+ def _strip_ansi_codes(text: str) -> str:
43
+ """Remove ANSI escape codes from text."""
44
+ ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
45
+ return ansi_escape.sub("", text)
46
+
47
+
48
+ @beartype
49
+ @require(lambda output: isinstance(output, str), "Output must be string")
50
+ @ensure(lambda result: isinstance(result, dict), "Must return dictionary")
51
+ @ensure(
52
+ lambda result: "violations" in result and "total_violations" in result,
53
+ "Must include violations and total_violations",
54
+ )
55
+ def _extract_ruff_findings(output: str) -> dict[str, Any]:
56
+ """Extract structured findings from ruff output."""
57
+ findings: dict[str, Any] = {
58
+ "violations": [],
59
+ "total_violations": 0,
60
+ "files_checked": 0,
61
+ }
62
+
63
+ # Strip ANSI codes
64
+ clean_output = _strip_ansi_codes(output)
65
+
66
+ # Parse ruff output format:
67
+ # Format 1: "W293 [*] Blank line contains whitespace\n--> src/file.py:240:1"
68
+ # Format 2: "src/file.py:240:1: W293 Blank line contains whitespace"
69
+ lines = clean_output.split("\n")
70
+ i = 0
71
+ while i < len(lines):
72
+ line_stripped = lines[i].strip()
73
+ if not line_stripped:
74
+ i += 1
75
+ continue
76
+
77
+ # Skip help lines and code block markers
78
+ if line_stripped.startswith(("help:", "|", " |")):
79
+ i += 1
80
+ continue
81
+
82
+ # Try format 1: "W293 [*] message" followed by "--> file:line:col"
83
+ code_match = re.match(r"^([A-Z]\d+)\s+\[[^\]]+\]\s+(.+)$", line_stripped)
84
+ if code_match:
85
+ code = code_match.group(1)
86
+ message = code_match.group(2)
87
+ # Look for location line: "--> file:line:col"
88
+ if i + 1 < len(lines):
89
+ location_line = lines[i + 1].strip()
90
+ location_match = re.match(r"-->\s+([^:]+):(\d+):(\d+)", location_line)
91
+ if location_match:
92
+ file_path = location_match.group(1)
93
+ line_num = int(location_match.group(2))
94
+ col_num = int(location_match.group(3))
95
+ findings["violations"].append(
96
+ {
97
+ "file": file_path,
98
+ "line": line_num,
99
+ "column": col_num,
100
+ "code": code,
101
+ "message": message,
102
+ }
103
+ )
104
+ i += 2 # Skip both lines
105
+ continue
106
+
107
+ # Try format 2: "file:line:col: code message"
108
+ pattern = r"^([^:]+):(\d+):(\d+):\s+([A-Z]\d+)\s+(.+)$"
109
+ match = re.match(pattern, line_stripped)
110
+ if match:
111
+ file_path, line_num, col_num, code, message = match.groups()
112
+ findings["violations"].append(
113
+ {
114
+ "file": file_path,
115
+ "line": int(line_num),
116
+ "column": int(col_num),
117
+ "code": code,
118
+ "message": message,
119
+ }
120
+ )
121
+
122
+ i += 1
123
+
124
+ # Set total_violations from list length
125
+ findings["total_violations"] = len(findings["violations"])
126
+
127
+ # Extract files checked count
128
+ files_match = re.search(r"(\d+)\s+files?\s+checked", clean_output, re.IGNORECASE)
129
+ if files_match:
130
+ findings["files_checked"] = int(files_match.group(1))
131
+
132
+ return findings
133
+
134
+
135
+ @beartype
136
+ @require(lambda output: isinstance(output, str), "Output must be string")
137
+ @require(lambda error: isinstance(error, str), "Error must be string")
138
+ @ensure(lambda result: isinstance(result, dict), "Must return dictionary")
139
+ @ensure(lambda result: "total_findings" in result, "Must include total_findings")
140
+ def _extract_semgrep_findings(output: str, error: str) -> dict[str, Any]:
141
+ """Extract structured findings from semgrep output."""
142
+ findings: dict[str, Any] = {
143
+ "findings": [],
144
+ "total_findings": 0,
145
+ "rules_run": 0,
146
+ "targets_scanned": 0,
147
+ }
148
+
149
+ # Combine output and error (semgrep uses stderr for status)
150
+ combined = _strip_ansi_codes((output + "\n" + error).strip())
151
+
152
+ # Extract findings count
153
+ findings_match = re.search(r"Findings:\s*(\d+)", combined, re.IGNORECASE)
154
+ if findings_match:
155
+ findings["total_findings"] = int(findings_match.group(1))
156
+
157
+ # Extract rules run
158
+ rules_match = re.search(r"Rules\s+run:\s*(\d+)", combined, re.IGNORECASE)
159
+ if rules_match:
160
+ findings["rules_run"] = int(rules_match.group(1))
161
+
162
+ # Extract targets scanned
163
+ targets_match = re.search(r"Targets\s+scanned:\s*(\d+)", combined, re.IGNORECASE)
164
+ if targets_match:
165
+ findings["targets_scanned"] = int(targets_match.group(1))
166
+
167
+ return findings
168
+
169
+
170
+ @beartype
171
+ @require(lambda output: isinstance(output, str), "Output must be string")
172
+ @ensure(lambda result: isinstance(result, dict), "Must return dictionary")
173
+ @ensure(lambda result: "errors" in result and "warnings" in result, "Must include errors and warnings")
174
+ def _extract_basedpyright_findings(output: str) -> dict[str, Any]:
175
+ """Extract structured findings from basedpyright output."""
176
+ findings: dict[str, Any] = {
177
+ "errors": [],
178
+ "warnings": [],
179
+ "total_errors": 0,
180
+ "total_warnings": 0,
181
+ }
182
+
183
+ # Strip ANSI codes
184
+ clean_output = _strip_ansi_codes(output)
185
+
186
+ # Parse basedpyright output: "path:line:col: error|warning: message"
187
+ pattern = r"^([^:]+):(\d+):(\d+):\s+(error|warning):\s+(.+)$"
188
+ for line in clean_output.split("\n"):
189
+ line_stripped = line.strip()
190
+ if not line_stripped:
191
+ continue
192
+ match = re.match(pattern, line_stripped)
193
+ if match:
194
+ file_path, line_num, col_num, level, message = match.groups()
195
+ finding = {
196
+ "file": file_path,
197
+ "line": int(line_num),
198
+ "column": int(col_num),
199
+ "message": message,
200
+ }
201
+ if level == "error":
202
+ findings["errors"].append(finding)
203
+ findings["total_errors"] += 1
204
+ else:
205
+ findings["warnings"].append(finding)
206
+ findings["total_warnings"] += 1
207
+
208
+ return findings
209
+
210
+
211
+ @beartype
212
+ @require(lambda output: isinstance(output, str), "Output must be string")
213
+ @ensure(lambda result: isinstance(result, dict), "Must return dictionary")
214
+ @ensure(lambda result: "counterexamples" in result, "Must include counterexamples")
215
+ def _extract_crosshair_findings(output: str) -> dict[str, Any]:
216
+ """Extract structured findings from CrossHair output."""
217
+ findings: dict[str, Any] = {
218
+ "counterexamples": [],
219
+ "total_counterexamples": 0,
220
+ }
221
+
222
+ # Strip ANSI codes
223
+ clean_output = _strip_ansi_codes(output)
224
+
225
+ # CrossHair typically outputs counterexamples
226
+ # Format varies, but we can extract basic info
227
+ if "counterexample" in clean_output.lower() or "failed" in clean_output.lower():
228
+ # Try to extract file and line info
229
+ pattern = r"([^:]+):(\d+):.*?(counterexample|failed)"
230
+ matches = re.finditer(pattern, clean_output, re.IGNORECASE)
231
+ for match in matches:
232
+ findings["counterexamples"].append(
233
+ {
234
+ "file": match.group(1),
235
+ "line": int(match.group(2)),
236
+ "type": match.group(3).lower(),
237
+ }
238
+ )
239
+ findings["total_counterexamples"] += 1
240
+
241
+ return findings
242
+
243
+
244
+ @beartype
245
+ @require(lambda output: isinstance(output, str), "Output must be string")
246
+ @ensure(lambda result: isinstance(result, dict), "Must return dictionary")
247
+ @ensure(lambda result: "tests_run" in result, "Must include tests_run")
248
+ @ensure(lambda result: result["tests_run"] >= 0, "tests_run must be non-negative")
249
+ def _extract_pytest_findings(output: str) -> dict[str, Any]:
250
+ """Extract structured findings from pytest output."""
251
+ findings: dict[str, Any] = {
252
+ "tests_run": 0,
253
+ "tests_passed": 0,
254
+ "tests_failed": 0,
255
+ "tests_skipped": 0,
256
+ "failures": [],
257
+ }
258
+
259
+ # Strip ANSI codes
260
+ clean_output = _strip_ansi_codes(output)
261
+
262
+ # Extract test summary
263
+ summary_match = re.search(r"(\d+)\s+passed", clean_output, re.IGNORECASE)
264
+ if summary_match:
265
+ findings["tests_passed"] = int(summary_match.group(1))
266
+
267
+ failed_match = re.search(r"(\d+)\s+failed", clean_output, re.IGNORECASE)
268
+ if failed_match:
269
+ findings["tests_failed"] = int(failed_match.group(1))
270
+
271
+ skipped_match = re.search(r"(\d+)\s+skipped", clean_output, re.IGNORECASE)
272
+ if skipped_match:
273
+ findings["tests_skipped"] = int(skipped_match.group(1))
274
+
275
+ findings["tests_run"] = findings["tests_passed"] + findings["tests_failed"] + findings["tests_skipped"]
276
+
277
+ return findings
278
+
279
+
280
+ @beartype
281
+ @require(lambda tool: isinstance(tool, str) and len(tool) > 0, "Tool must be non-empty string")
282
+ @require(lambda output: isinstance(output, str), "Output must be string")
283
+ @require(lambda error: isinstance(error, str), "Error must be string")
284
+ @ensure(lambda result: isinstance(result, dict), "Must return dictionary")
285
+ def _extract_findings(tool: str, output: str, error: str) -> dict[str, Any]:
286
+ """
287
+ Extract structured findings from tool output based on tool type.
288
+
289
+ Args:
290
+ tool: Tool name (ruff, semgrep, basedpyright, crosshair, pytest)
291
+ output: Tool stdout output
292
+ error: Tool stderr output
293
+
294
+ Returns:
295
+ Dictionary with structured findings for the specific tool
296
+ """
297
+ tool_lower = tool.lower()
298
+ if tool_lower == "ruff":
299
+ return _extract_ruff_findings(output)
300
+ if tool_lower == "semgrep":
301
+ return _extract_semgrep_findings(output, error)
302
+ if tool_lower == "basedpyright":
303
+ return _extract_basedpyright_findings(output)
304
+ if tool_lower == "crosshair":
305
+ return _extract_crosshair_findings(output)
306
+ if tool_lower == "pytest":
307
+ return _extract_pytest_findings(output)
308
+ # Unknown tool - return empty findings
309
+ return {}
310
+
311
+
312
+ @dataclass
313
+ class CheckResult:
314
+ """Result of a single validation check."""
315
+
316
+ name: str
317
+ tool: str
318
+ status: CheckStatus
319
+ duration: float | None = None
320
+ exit_code: int | None = None
321
+ output: str = ""
322
+ error: str = ""
323
+ timeout: bool = False
324
+
325
+ def __post_init__(self) -> None:
326
+ """Validate that tool is non-empty if findings extraction is needed."""
327
+ if not self.tool:
328
+ self.tool = "unknown" # Default to "unknown" if tool is empty
329
+
330
+ @beartype
331
+ @require(lambda max_output_length: max_output_length > 0, "max_output_length must be positive")
332
+ @ensure(lambda result: isinstance(result, dict), "Must return dictionary")
333
+ @ensure(
334
+ lambda result: "name" in result and "tool" in result and "status" in result,
335
+ "Must include name, tool, and status",
336
+ )
337
+ def to_dict(self, include_findings: bool = True, max_output_length: int = 50000) -> dict[str, Any]:
338
+ """
339
+ Convert result to dictionary with structured findings.
340
+
341
+ Args:
342
+ include_findings: Whether to include structured findings (default: True)
343
+ max_output_length: Maximum length of raw output/error to include if findings unavailable (truncates if longer)
344
+
345
+ Returns:
346
+ Dictionary representation of the check result with structured findings
347
+ """
348
+ result = {
349
+ "name": self.name,
350
+ "tool": self.tool,
351
+ "status": self.status.value,
352
+ "duration": self.duration,
353
+ "exit_code": self.exit_code,
354
+ "timeout": self.timeout,
355
+ "output_length": len(self.output),
356
+ "error_length": len(self.error),
357
+ }
358
+
359
+ # Extract structured findings based on tool type
360
+ if include_findings and self.tool:
361
+ try:
362
+ findings = _extract_findings(self.tool, self.output, self.error)
363
+ if findings:
364
+ result["findings"] = findings
365
+ except Exception:
366
+ # If extraction fails, fall back to raw output (truncated)
367
+ if self.output:
368
+ if len(self.output) <= max_output_length:
369
+ result["output"] = _strip_ansi_codes(self.output)
370
+ else:
371
+ result["output"] = _strip_ansi_codes(self.output[:max_output_length])
372
+ result["output_truncated"] = True
373
+ else:
374
+ result["output"] = ""
375
+
376
+ if self.error:
377
+ if len(self.error) <= max_output_length:
378
+ result["error"] = _strip_ansi_codes(self.error)
379
+ else:
380
+ result["error"] = _strip_ansi_codes(self.error[:max_output_length])
381
+ result["error_truncated"] = True
382
+ else:
383
+ result["error"] = ""
384
+
385
+ return result
386
+
387
+
388
+ @dataclass
389
+ class ReproReport:
390
+ """Aggregated report of all validation checks."""
391
+
392
+ checks: list[CheckResult] = field(default_factory=list)
393
+ total_duration: float = 0.0
394
+ total_checks: int = 0
395
+ passed_checks: int = 0
396
+ failed_checks: int = 0
397
+ timeout_checks: int = 0
398
+ skipped_checks: int = 0
399
+ budget_exceeded: bool = False
400
+ # Metadata fields
401
+ timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
402
+ repo_path: str | None = None
403
+ budget: int | None = None
404
+ active_plan_path: str | None = None
405
+ enforcement_config_path: str | None = None
406
+ enforcement_preset: str | None = None
407
+ fix_enabled: bool = False
408
+ fail_fast: bool = False
409
+
410
+ @beartype
411
+ @require(lambda result: isinstance(result, CheckResult), "Must be CheckResult instance")
412
+ def add_check(self, result: CheckResult) -> None:
413
+ """Add a check result to the report."""
414
+ self.checks.append(result)
415
+ self.total_checks += 1
416
+
417
+ if result.duration:
418
+ self.total_duration += result.duration
419
+
420
+ if result.status == CheckStatus.PASSED:
421
+ self.passed_checks += 1
422
+ elif result.status == CheckStatus.FAILED:
423
+ self.failed_checks += 1
424
+ elif result.status == CheckStatus.TIMEOUT:
425
+ self.timeout_checks += 1
426
+ elif result.status == CheckStatus.SKIPPED:
427
+ self.skipped_checks += 1
428
+
429
+ @beartype
430
+ @ensure(lambda result: result in (0, 1, 2), "Exit code must be 0, 1, or 2")
431
+ def get_exit_code(self) -> int:
432
+ """
433
+ Get exit code for the repro command.
434
+
435
+ Returns:
436
+ 0 = all passed, 1 = some failed, 2 = budget exceeded
437
+ """
438
+ if self.budget_exceeded or self.timeout_checks > 0:
439
+ return 2
440
+ # CrossHair failures are non-blocking (advisory only) - don't count them
441
+ failed_checks_blocking = [
442
+ check for check in self.checks if check.status == CheckStatus.FAILED and check.tool != "crosshair"
443
+ ]
444
+ if failed_checks_blocking:
445
+ return 1
446
+ return 0
447
+
448
+ @beartype
449
+ @require(lambda max_finding_length: max_finding_length > 0, "max_finding_length must be positive")
450
+ @ensure(lambda result: isinstance(result, dict), "Must return dictionary")
451
+ @ensure(lambda result: "total_checks" in result and "checks" in result, "Must include total_checks and checks")
452
+ def to_dict(self, include_findings: bool = True, max_finding_length: int = 50000) -> dict[str, Any]:
453
+ """
454
+ Convert report to dictionary with structured findings.
455
+
456
+ Args:
457
+ include_findings: Whether to include structured findings for each check (default: True)
458
+ max_finding_length: Maximum length of raw output/error to include if findings unavailable (truncates if longer)
459
+
460
+ Returns:
461
+ Dictionary representation of the report with structured findings
462
+ """
463
+ result = {
464
+ "total_duration": self.total_duration,
465
+ "total_checks": self.total_checks,
466
+ "passed_checks": self.passed_checks,
467
+ "failed_checks": self.failed_checks,
468
+ "timeout_checks": self.timeout_checks,
469
+ "skipped_checks": self.skipped_checks,
470
+ "budget_exceeded": self.budget_exceeded,
471
+ "checks": [
472
+ check.to_dict(include_findings=include_findings, max_output_length=max_finding_length)
473
+ for check in self.checks
474
+ ],
475
+ }
476
+
477
+ # Add metadata if available
478
+ metadata = {}
479
+ if self.timestamp:
480
+ metadata["timestamp"] = self.timestamp
481
+ if self.repo_path:
482
+ metadata["repo_path"] = self.repo_path
483
+ if self.budget is not None:
484
+ metadata["budget"] = self.budget
485
+ if self.active_plan_path:
486
+ metadata["active_plan_path"] = self.active_plan_path
487
+ if self.enforcement_config_path:
488
+ metadata["enforcement_config_path"] = self.enforcement_config_path
489
+ if self.enforcement_preset:
490
+ metadata["enforcement_preset"] = self.enforcement_preset
491
+ if self.fix_enabled:
492
+ metadata["fix_enabled"] = self.fix_enabled
493
+ if self.fail_fast:
494
+ metadata["fail_fast"] = self.fail_fast
495
+
496
+ if metadata:
497
+ result["metadata"] = metadata
498
+
499
+ return result
500
+
501
+
502
+ class ReproChecker:
503
+ """
504
+ Runs validation checks with time budgets and result aggregation.
505
+
506
+ Executes various tools (ruff, semgrep, basedpyright, crosshair, pytest)
507
+ and aggregates their results into a comprehensive report.
508
+ """
509
+
510
+ @beartype
511
+ @require(lambda budget: budget > 0, "Budget must be positive")
512
+ @ensure(lambda self: self.budget > 0, "Budget must be positive after init")
513
+ def __init__(
514
+ self, repo_path: Path | None = None, budget: int = 120, fail_fast: bool = False, fix: bool = False
515
+ ) -> None:
516
+ """
517
+ Initialize reproducibility checker.
518
+
519
+ Args:
520
+ repo_path: Path to repository (default: current directory)
521
+ budget: Total time budget in seconds (must be > 0)
522
+ fail_fast: Stop on first failure
523
+ fix: Apply auto-fixes where available (Semgrep auto-fixes)
524
+ """
525
+ self.repo_path = Path(repo_path) if repo_path else Path(".")
526
+ self.budget = budget
527
+ self.fail_fast = fail_fast
528
+ self.fix = fix
529
+ self.report = ReproReport()
530
+ self.start_time = time.time()
531
+
532
+ # Initialize metadata in report
533
+ self.report.repo_path = str(self.repo_path.absolute())
534
+ self.report.budget = budget
535
+ self.report.fix_enabled = fix
536
+ self.report.fail_fast = fail_fast
537
+
538
+ @beartype
539
+ @require(lambda name: isinstance(name, str) and len(name) > 0, "Name must be non-empty string")
540
+ @require(lambda tool: isinstance(tool, str) and len(tool) > 0, "Tool must be non-empty string")
541
+ @require(lambda command: isinstance(command, list) and len(command) > 0, "Command must be non-empty list")
542
+ @require(lambda timeout: timeout is None or timeout > 0, "Timeout must be positive if provided")
543
+ @ensure(lambda result: isinstance(result, CheckResult), "Must return CheckResult")
544
+ @ensure(lambda result: result.duration is None or result.duration >= 0, "Duration must be non-negative")
545
+ def run_check(
546
+ self,
547
+ name: str,
548
+ tool: str,
549
+ command: list[str],
550
+ timeout: int | None = None,
551
+ skip_if_missing: bool = True,
552
+ ) -> CheckResult:
553
+ """
554
+ Run a single validation check.
555
+
556
+ Args:
557
+ name: Human-readable check name
558
+ tool: Tool name (for display)
559
+ command: Command to execute
560
+ timeout: Per-check timeout (default: budget / number of checks, must be > 0 if provided)
561
+ skip_if_missing: Skip check if tool not found
562
+
563
+ Returns:
564
+ CheckResult with status and output
565
+ """
566
+ result = CheckResult(name=name, tool=tool, status=CheckStatus.PENDING)
567
+
568
+ # Check if tool exists (cross-platform)
569
+ if skip_if_missing:
570
+ tool_path = shutil.which(command[0])
571
+ if tool_path is None:
572
+ result.status = CheckStatus.SKIPPED
573
+ result.error = f"Tool '{command[0]}' not found in PATH, skipping"
574
+ return result
575
+
576
+ # Check budget
577
+ elapsed = time.time() - self.start_time
578
+ if elapsed >= self.budget:
579
+ self.report.budget_exceeded = True
580
+ result.status = CheckStatus.TIMEOUT
581
+ result.timeout = True
582
+ result.error = f"Budget exceeded ({self.budget}s)"
583
+ return result
584
+
585
+ # Calculate timeout for this check
586
+ remaining_budget = self.budget - elapsed
587
+ check_timeout = min(timeout or (remaining_budget / 2), remaining_budget)
588
+
589
+ # Run command
590
+ result.status = CheckStatus.RUNNING
591
+ start = time.time()
592
+
593
+ try:
594
+ proc = subprocess.run(
595
+ command,
596
+ cwd=self.repo_path,
597
+ capture_output=True,
598
+ text=True,
599
+ timeout=check_timeout,
600
+ check=False,
601
+ )
602
+
603
+ result.duration = time.time() - start
604
+ result.exit_code = proc.returncode
605
+ result.output = proc.stdout
606
+ result.error = proc.stderr
607
+
608
+ if proc.returncode == 0:
609
+ result.status = CheckStatus.PASSED
610
+ else:
611
+ result.status = CheckStatus.FAILED
612
+
613
+ except subprocess.TimeoutExpired:
614
+ result.duration = time.time() - start
615
+ result.status = CheckStatus.TIMEOUT
616
+ result.timeout = True
617
+ result.error = f"Check timed out after {check_timeout}s"
618
+
619
+ except Exception as e:
620
+ result.duration = time.time() - start
621
+ result.status = CheckStatus.FAILED
622
+ result.error = f"Check failed with exception: {e!s}"
623
+
624
+ return result
625
+
626
+ @beartype
627
+ @ensure(lambda result: isinstance(result, ReproReport), "Must return ReproReport")
628
+ @ensure(lambda result: result.total_checks >= 0, "Total checks must be non-negative")
629
+ @ensure(
630
+ lambda result: result.total_checks
631
+ == result.passed_checks + result.failed_checks + result.timeout_checks + result.skipped_checks,
632
+ "Total checks must equal sum of all status types",
633
+ )
634
+ def run_all_checks(self) -> ReproReport:
635
+ """
636
+ Run all validation checks.
637
+
638
+ Returns:
639
+ ReproReport with aggregated results
640
+ """
641
+ # Check if semgrep config exists
642
+ semgrep_config = self.repo_path / "tools" / "semgrep" / "async.yml"
643
+ semgrep_enabled = semgrep_config.exists()
644
+
645
+ # Check if test directories exist
646
+ contracts_tests = self.repo_path / "tests" / "contracts"
647
+ smoke_tests = self.repo_path / "tests" / "smoke"
648
+ src_dir = self.repo_path / "src"
649
+
650
+ checks: list[tuple[str, str, list[str], int | None, bool]] = [
651
+ ("Linting (ruff)", "ruff", ["ruff", "check", "src/", "tests/", "tools/"], None, True),
652
+ ]
653
+
654
+ # Add semgrep only if config exists
655
+ if semgrep_enabled:
656
+ semgrep_command = ["semgrep", "--config", str(semgrep_config.relative_to(self.repo_path)), "."]
657
+ if self.fix:
658
+ semgrep_command.append("--autofix")
659
+ checks.append(
660
+ (
661
+ "Async patterns (semgrep)",
662
+ "semgrep",
663
+ semgrep_command,
664
+ 30,
665
+ True,
666
+ )
667
+ )
668
+
669
+ checks.extend(
670
+ [
671
+ ("Type checking (basedpyright)", "basedpyright", ["basedpyright", "src/", "tools/"], None, True),
672
+ ]
673
+ )
674
+
675
+ # Add CrossHair only if src/ exists
676
+ # Exclude common/logger_setup.py from CrossHair analysis due to known signature analysis issues
677
+ # CrossHair doesn't support --exclude, so we exclude the common directory and add other directories
678
+ if src_dir.exists():
679
+ # Get all subdirectories except common
680
+ specfact_dirs = [d for d in src_dir.iterdir() if d.is_dir() and d.name != "common"]
681
+ crosshair_targets = ["src/" + d.name for d in specfact_dirs] + ["tools/"]
682
+ checks.append(
683
+ (
684
+ "Contract exploration (CrossHair)",
685
+ "crosshair",
686
+ ["crosshair", "check", *crosshair_targets],
687
+ 60,
688
+ True,
689
+ )
690
+ )
691
+
692
+ # Add property tests only if directory exists
693
+ if contracts_tests.exists():
694
+ checks.append(
695
+ (
696
+ "Property tests (pytest contracts)",
697
+ "pytest",
698
+ ["pytest", "tests/contracts/", "-v"],
699
+ 30,
700
+ True,
701
+ )
702
+ )
703
+
704
+ # Add smoke tests only if directory exists
705
+ if smoke_tests.exists():
706
+ checks.append(("Smoke tests (pytest smoke)", "pytest", ["pytest", "tests/smoke/", "-v"], 30, True))
707
+
708
+ for check_args in checks:
709
+ # Check budget before starting
710
+ elapsed = time.time() - self.start_time
711
+ if elapsed >= self.budget:
712
+ self.report.budget_exceeded = True
713
+ break
714
+
715
+ # Run check
716
+ result = self.run_check(*check_args)
717
+ self.report.add_check(result)
718
+
719
+ # Fail fast if requested
720
+ if self.fail_fast and result.status == CheckStatus.FAILED:
721
+ break
722
+
723
+ self.report.total_duration = time.time() - self.start_time
724
+
725
+ # Check if budget exceeded
726
+ elapsed = time.time() - self.start_time
727
+ if elapsed >= self.budget:
728
+ self.report.budget_exceeded = True
729
+
730
+ # Populate metadata: active plan and enforcement config
731
+ try:
732
+ from specfact_cli.utils.structure import SpecFactStructure
733
+
734
+ # Get active plan path
735
+ active_plan_path = SpecFactStructure.get_default_plan_path(self.repo_path)
736
+ if active_plan_path.exists():
737
+ self.report.active_plan_path = str(active_plan_path.relative_to(self.repo_path))
738
+
739
+ # Get enforcement config path and preset
740
+ enforcement_config_path = SpecFactStructure.get_enforcement_config_path(self.repo_path)
741
+ if enforcement_config_path.exists():
742
+ self.report.enforcement_config_path = str(enforcement_config_path.relative_to(self.repo_path))
743
+ try:
744
+ from specfact_cli.models.enforcement import EnforcementConfig
745
+ from specfact_cli.utils.yaml_utils import load_yaml
746
+
747
+ config_data = load_yaml(enforcement_config_path)
748
+ if config_data:
749
+ enforcement_config = EnforcementConfig(**config_data)
750
+ self.report.enforcement_preset = enforcement_config.preset.value
751
+ except Exception as e:
752
+ # If config can't be loaded, just skip preset (non-fatal)
753
+ console.print(f"[dim]Warning: Could not load enforcement config preset: {e}[/dim]")
754
+
755
+ except Exception as e:
756
+ # If metadata collection fails, continue without it (non-fatal)
757
+ console.print(f"[dim]Warning: Could not collect metadata: {e}[/dim]")
758
+
759
+ return self.report