specpass 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- specpass-0.1.0/PKG-INFO +29 -0
- specpass-0.1.0/README.md +20 -0
- specpass-0.1.0/pyproject.toml +17 -0
- specpass-0.1.0/setup.cfg +4 -0
- specpass-0.1.0/setup.py +16 -0
- specpass-0.1.0/specpass.egg-info/PKG-INFO +29 -0
- specpass-0.1.0/specpass.egg-info/SOURCES.txt +9 -0
- specpass-0.1.0/specpass.egg-info/dependency_links.txt +1 -0
- specpass-0.1.0/specpass.egg-info/entry_points.txt +2 -0
- specpass-0.1.0/specpass.egg-info/top_level.txt +1 -0
- specpass-0.1.0/specpass.py +874 -0
specpass-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: specpass
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Verification-First Code Generation Platform
|
|
5
|
+
Author: WoOty
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Dynamic: requires-python
|
|
9
|
+
|
|
10
|
+
# specpass
|
|
11
|
+
|
|
12
|
+
**From spec to verified code. One command.**
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
pip install specpass
|
|
16
|
+
specpass run spec.md --out build/
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
specpass — Verification-First Code Generation Pipeline. Берёт spec в формате Markdown, генерирует код через DeepSeek/Claude, проверяет 4 детерминированными gates (import, static, contract, runtime), детектит LLM-шорткаты (11 cheat patterns), выдаёт quality score.
|
|
20
|
+
|
|
21
|
+
**Уникально:** Cost advantage ×2000 (DeepSeek context caching, $0.01/45 файлов). Аналогов нет.
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
specpass fastapi-auth # инициализация из spec registry
|
|
25
|
+
specpass verify build/ --spec spec.md # только verification
|
|
26
|
+
specpass search "jwt" # поиск specs
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Лицензия: MIT
|
specpass-0.1.0/README.md
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# specpass
|
|
2
|
+
|
|
3
|
+
**From spec to verified code. One command.**
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
pip install specpass
|
|
7
|
+
specpass run spec.md --out build/
|
|
8
|
+
```
|
|
9
|
+
|
|
10
|
+
specpass — Verification-First Code Generation Pipeline. Берёт spec в формате Markdown, генерирует код через DeepSeek/Claude, проверяет 4 детерминированными gates (import, static, contract, runtime), детектит LLM-шорткаты (11 cheat patterns), выдаёт quality score.
|
|
11
|
+
|
|
12
|
+
**Уникально:** Cost advantage ×2000 (DeepSeek context caching, $0.01/45 файлов). Аналогов нет.
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
specpass fastapi-auth # инициализация из spec registry
|
|
16
|
+
specpass verify build/ --spec spec.md # только verification
|
|
17
|
+
specpass search "jwt" # поиск specs
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
Лицензия: MIT
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "specpass"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Verification-First Code Generation Platform"
|
|
9
|
+
authors = [{name = "WoOty"}]
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
readme = "README.md"
|
|
12
|
+
|
|
13
|
+
[project.scripts]
|
|
14
|
+
specpass = "specpass:main"
|
|
15
|
+
|
|
16
|
+
[tool.setuptools]
|
|
17
|
+
py-modules = ["specpass"]
|
specpass-0.1.0/setup.cfg
ADDED
specpass-0.1.0/setup.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from setuptools import setup
|
|
2
|
+
|
|
3
|
+
setup(
|
|
4
|
+
name="specpass",
|
|
5
|
+
version="0.1.0",
|
|
6
|
+
description="Verification-First Code Generation Platform",
|
|
7
|
+
long_description="From spec to verified code. One command.",
|
|
8
|
+
author="WoOty",
|
|
9
|
+
py_modules=["specpass"],
|
|
10
|
+
entry_points={
|
|
11
|
+
"console_scripts": [
|
|
12
|
+
"specpass=specpass:main",
|
|
13
|
+
],
|
|
14
|
+
},
|
|
15
|
+
python_requires=">=3.10",
|
|
16
|
+
)
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: specpass
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Verification-First Code Generation Platform
|
|
5
|
+
Author: WoOty
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Dynamic: requires-python
|
|
9
|
+
|
|
10
|
+
# specpass
|
|
11
|
+
|
|
12
|
+
**From spec to verified code. One command.**
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
pip install specpass
|
|
16
|
+
specpass run spec.md --out build/
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
specpass — Verification-First Code Generation Pipeline. Берёт spec в формате Markdown, генерирует код через DeepSeek/Claude, проверяет 4 детерминированными gates (import, static, contract, runtime), детектит LLM-шорткаты (11 cheat patterns), выдаёт quality score.
|
|
20
|
+
|
|
21
|
+
**Уникально:** Cost advantage ×2000 (DeepSeek context caching, $0.01/45 файлов). Аналогов нет.
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
specpass fastapi-auth # инициализация из spec registry
|
|
25
|
+
specpass verify build/ --spec spec.md # только verification
|
|
26
|
+
specpass search "jwt" # поиск specs
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Лицензия: MIT
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
specpass
|
|
@@ -0,0 +1,874 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
specpass v1 — Verification-First Code Platform.
|
|
4
|
+
|
|
5
|
+
Pipeline:
|
|
6
|
+
spec.md → [verify spec] → [generate code + harness] → [run harness] → [cheat detect] → [score + audit]
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
specpass spec.md --out build/
|
|
10
|
+
specpass verify build/ --spec spec.md
|
|
11
|
+
specpass search "jwt auth"
|
|
12
|
+
specpass publish spec.md
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import sys, os, json, subprocess, time, re, hashlib, argparse, shutil, textwrap
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Optional, List, Dict
|
|
18
|
+
|
|
19
|
+
SPECPASS_DIR = Path(__file__).parent
|
|
20
|
+
AGENTS_DIR = SPECPASS_DIR / "agents"
|
|
21
|
+
TEMPLATES_DIR = SPECPASS_DIR / "templates"
|
|
22
|
+
SPECS_DIR = SPECPASS_DIR / "specs"
|
|
23
|
+
REGISTRY_DIR = SPECPASS_DIR / "registry"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
27
|
+
# PART 1: Spec
|
|
28
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
29
|
+
|
|
30
|
+
def load_markdown_spec(path: Path) -> dict:
|
|
31
|
+
"""Parse Markdown spec into structured dict."""
|
|
32
|
+
text = path.read_text()
|
|
33
|
+
spec = {
|
|
34
|
+
"name": "", "contracts": [], "dependencies": [], "boundaries": [],
|
|
35
|
+
"examples": [], "path": str(path), "complexity": "simple",
|
|
36
|
+
}
|
|
37
|
+
lines = text.split("\n")
|
|
38
|
+
for line in lines:
|
|
39
|
+
if line.startswith("# ") and not line.startswith("## "):
|
|
40
|
+
spec["name"] = line[2:].strip()
|
|
41
|
+
break
|
|
42
|
+
|
|
43
|
+
current_section = None
|
|
44
|
+
for line in lines:
|
|
45
|
+
stripped = line.strip()
|
|
46
|
+
if stripped.startswith("## Contracts"):
|
|
47
|
+
current_section = "contracts"; continue
|
|
48
|
+
elif stripped.startswith("## Dependencies"):
|
|
49
|
+
current_section = "deps"; continue
|
|
50
|
+
elif stripped.startswith("## Boundaries"):
|
|
51
|
+
current_section = "boundaries"; continue
|
|
52
|
+
elif stripped.startswith("## Examples"):
|
|
53
|
+
current_section = "examples"; continue
|
|
54
|
+
elif stripped.startswith("## "):
|
|
55
|
+
current_section = None; continue
|
|
56
|
+
|
|
57
|
+
if current_section == "contracts" and stripped.startswith("- `"):
|
|
58
|
+
m = re.match(r"- `(.+?)`\s*(?:@test\s+(\S+))?", stripped)
|
|
59
|
+
if m:
|
|
60
|
+
sig_str, test_path = m.group(1), m.group(2) or ""
|
|
61
|
+
sig_match = re.match(r"(\w+(?:\.\w+)?)\(([^)]*)\)\s*(?:->\s*(\S+))?", sig_str)
|
|
62
|
+
if sig_match:
|
|
63
|
+
name, args_str, returns = sig_match.group(1), sig_match.group(2), sig_match.group(3) or "None"
|
|
64
|
+
args = []
|
|
65
|
+
if args_str.strip():
|
|
66
|
+
for a in args_str.split(","):
|
|
67
|
+
a = a.strip()
|
|
68
|
+
if ":" in a:
|
|
69
|
+
aname, atype = a.split(":", 1)
|
|
70
|
+
args.append({"name": aname.strip(), "type": atype.strip()})
|
|
71
|
+
else:
|
|
72
|
+
args.append({"name": a, "type": "Any"})
|
|
73
|
+
spec["contracts"].append({"name": name, "args": args, "returns": returns, "test": test_path})
|
|
74
|
+
elif current_section == "deps" and stripped.startswith("- "):
|
|
75
|
+
spec["dependencies"].append(stripped[2:].strip())
|
|
76
|
+
elif current_section == "boundaries" and stripped.startswith("- "):
|
|
77
|
+
spec["boundaries"].append(stripped[2:].strip())
|
|
78
|
+
elif current_section == "examples" and stripped.startswith("- "):
|
|
79
|
+
spec["examples"].append(stripped[2:].strip())
|
|
80
|
+
|
|
81
|
+
# Определяем сложность
|
|
82
|
+
num_contracts = len(spec["contracts"])
|
|
83
|
+
has_deps = len(spec["dependencies"]) > 0
|
|
84
|
+
has_complex_types = any(
|
|
85
|
+
"List" in str(c["args"]) or "Dict" in str(c["args"]) or "Optional" in str(c["args"])
|
|
86
|
+
for c in spec["contracts"]
|
|
87
|
+
)
|
|
88
|
+
if num_contracts > 5 or (has_deps and has_complex_types) or len(spec["boundaries"]) > 3:
|
|
89
|
+
spec["complexity"] = "complex"
|
|
90
|
+
|
|
91
|
+
return spec
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def verify_spec(spec: dict) -> list:
|
|
95
|
+
"""Pre-generation spec verification."""
|
|
96
|
+
errors = []
|
|
97
|
+
if not spec["name"]:
|
|
98
|
+
errors.append("Spec must have a name (# Module Name)")
|
|
99
|
+
if not spec["contracts"]:
|
|
100
|
+
errors.append("Spec must have at least one contract")
|
|
101
|
+
for c in spec["contracts"]:
|
|
102
|
+
if not c["test"]:
|
|
103
|
+
errors.append(f"Contract '{c['name']}' has no @test path")
|
|
104
|
+
for c in spec["contracts"]:
|
|
105
|
+
if c["test"]:
|
|
106
|
+
test_path = Path(c["test"])
|
|
107
|
+
if not test_path.is_absolute():
|
|
108
|
+
test_path = Path(spec.get("path", ".")).parent / test_path
|
|
109
|
+
if not test_path.exists():
|
|
110
|
+
errors.append(f"Test file for '{c['name']}' not found: {test_path}")
|
|
111
|
+
return errors
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
115
|
+
# PART 2: Cost Routing Engine
|
|
116
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
117
|
+
|
|
118
|
+
ROUTE_TABLE = {
|
|
119
|
+
"simple": {
|
|
120
|
+
"generation": "deepseek/deepseek-v4-flash", # $0.14/M inp, $0.28/M out
|
|
121
|
+
"fix": "deepseek/deepseek-v4-flash",
|
|
122
|
+
"harness": "deepseek/deepseek-v4-flash",
|
|
123
|
+
"adversarial": None, # skip for simple
|
|
124
|
+
},
|
|
125
|
+
"complex": {
|
|
126
|
+
"generation": "deepseek/deepseek-v4-flash", # cache hit 95% = $0.0028/M
|
|
127
|
+
"fix": "anthropic/claude-sonnet-4", # better at fixing logic
|
|
128
|
+
"harness": "anthropic/claude-sonnet-4",
|
|
129
|
+
"adversarial": "anthropic/claude-sonnet-4",
|
|
130
|
+
},
|
|
131
|
+
"critical": {
|
|
132
|
+
"generation": "anthropic/claude-sonnet-4", # best quality
|
|
133
|
+
"fix": "anthropic/claude-sonnet-4",
|
|
134
|
+
"harness": "anthropic/claude-sonnet-4",
|
|
135
|
+
"adversarial": "anthropic/claude-opus-4",
|
|
136
|
+
},
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def select_model(spec: dict, task: str = "generation") -> str:
|
|
141
|
+
"""Выбрать модель для задачи на основе сложности spec."""
|
|
142
|
+
complexity = spec.get("complexity", "simple")
|
|
143
|
+
route = ROUTE_TABLE.get(complexity, ROUTE_TABLE["simple"])
|
|
144
|
+
model = route.get(task, "deepseek/deepseek-v4-flash")
|
|
145
|
+
if model is None:
|
|
146
|
+
return "deepseek/deepseek-v4-flash" # fallback if task skipped
|
|
147
|
+
return model
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
151
|
+
# PART 3: Custom Verification Harness Generator
|
|
152
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
153
|
+
|
|
154
|
+
def generate_harness(spec: dict, code_dir: Path, model: str) -> dict:
|
|
155
|
+
"""
|
|
156
|
+
Генерирует verification harness под конкретный spec.
|
|
157
|
+
Вместо 4 статических gates — скрипт, который точно знает что проверять.
|
|
158
|
+
"""
|
|
159
|
+
contract_details = "\n".join(
|
|
160
|
+
f"# {c['name']}({', '.join(a['name']+':'+a['type'] for a in c['args'])}) -> {c['returns']}"
|
|
161
|
+
for c in spec["contracts"]
|
|
162
|
+
)
|
|
163
|
+
examples_text = "\n".join(f"# {e}" for e in spec["examples"]) if spec["examples"] else "# no examples"
|
|
164
|
+
boundaries_text = "\n".join(f"# {b}" for b in spec["boundaries"]) if spec["boundaries"] else "# no boundaries"
|
|
165
|
+
|
|
166
|
+
prompt = f"""Generate a Python verification script for this spec:
|
|
167
|
+
|
|
168
|
+
Project: {spec['name']}
|
|
169
|
+
Contracts:
|
|
170
|
+
{contract_details}
|
|
171
|
+
|
|
172
|
+
Examples:
|
|
173
|
+
{examples_text}
|
|
174
|
+
|
|
175
|
+
Boundaries:
|
|
176
|
+
{boundaries_text}
|
|
177
|
+
|
|
178
|
+
The script MUST:
|
|
179
|
+
1. Check all modules import successfully
|
|
180
|
+
2. Verify all contract function names exist with correct args
|
|
181
|
+
3. Run pytest on {spec['name'].lower().replace(' ', '_')}/
|
|
182
|
+
4. Check security boundaries are respected
|
|
183
|
+
5. Return a JSON report with: "pass": bool, "score": float, "details": dict
|
|
184
|
+
|
|
185
|
+
Write ONLY the Python script. No explanations. Use pytest for step 3."""
|
|
186
|
+
|
|
187
|
+
# Call LLM to generate harness
|
|
188
|
+
result = _call_llm_simple(model, prompt)
|
|
189
|
+
if not result["success"]:
|
|
190
|
+
return {"success": False, "error": result.get("error", "harness gen failed"), "code": ""}
|
|
191
|
+
|
|
192
|
+
code = result["content"]
|
|
193
|
+
# Extract code block
|
|
194
|
+
m = re.search(r"```(?:python)?\n(.+?)\n```", code, re.DOTALL)
|
|
195
|
+
if m:
|
|
196
|
+
code = m.group(1).strip()
|
|
197
|
+
|
|
198
|
+
# Save harness
|
|
199
|
+
harness_path = code_dir / "verify.py"
|
|
200
|
+
harness_path.write_text(code)
|
|
201
|
+
|
|
202
|
+
return {"success": True, "path": str(harness_path), "code": code}
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def run_harness(code_dir: Path) -> dict:
|
|
206
|
+
"""Run generated verification harness."""
|
|
207
|
+
harness_path = code_dir / "verify.py"
|
|
208
|
+
if not harness_path.exists():
|
|
209
|
+
return {"pass": False, "score": 0, "error": "No harness found, running static gates"}
|
|
210
|
+
|
|
211
|
+
result = subprocess.run(
|
|
212
|
+
[sys.executable, str(harness_path)],
|
|
213
|
+
cwd=code_dir, capture_output=True, text=True, timeout=60
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# Try to parse JSON output from harness
|
|
217
|
+
try:
|
|
218
|
+
report = json.loads(result.stdout)
|
|
219
|
+
return {
|
|
220
|
+
"pass": report.get("pass", False),
|
|
221
|
+
"score": report.get("score", 0),
|
|
222
|
+
"details": report.get("details", {}),
|
|
223
|
+
"stdout": result.stdout[:500],
|
|
224
|
+
}
|
|
225
|
+
except (json.JSONDecodeError, ValueError):
|
|
226
|
+
# Harness didn't return JSON — fallback to exit code
|
|
227
|
+
return {
|
|
228
|
+
"pass": result.returncode == 0,
|
|
229
|
+
"score": 100 if result.returncode == 0 else 0,
|
|
230
|
+
"stdout": result.stdout[:500],
|
|
231
|
+
"stderr": result.stderr[:200],
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
236
|
+
# PART 4: Static Gates (fallback when no custom harness)
|
|
237
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
238
|
+
|
|
239
|
+
def gate_imports(code_dir: Path) -> dict:
|
|
240
|
+
modules = []
|
|
241
|
+
seen = set()
|
|
242
|
+
for f in Path(code_dir).rglob("*.py"):
|
|
243
|
+
if f.name in ("conftest.py", "verify.py"):
|
|
244
|
+
continue
|
|
245
|
+
# Определяем полное имя модуля
|
|
246
|
+
rel = f.relative_to(code_dir)
|
|
247
|
+
if f.name == "__init__.py":
|
|
248
|
+
mod_name = str(rel.parent).replace("/", ".") if str(rel.parent) != "." else f.parent.name
|
|
249
|
+
else:
|
|
250
|
+
mod_name = str(rel.with_suffix("")).replace("/", ".")
|
|
251
|
+
if not mod_name or not mod_name.replace("_", "").replace(".", "").isalnum():
|
|
252
|
+
continue
|
|
253
|
+
if mod_name not in seen:
|
|
254
|
+
modules.append((mod_name, f))
|
|
255
|
+
seen.add(mod_name)
|
|
256
|
+
|
|
257
|
+
results = {}
|
|
258
|
+
for mod_name, fpath in modules:
|
|
259
|
+
r = subprocess.run([sys.executable, "-c", f"import {mod_name}"], cwd=code_dir,
|
|
260
|
+
capture_output=True, text=True, timeout=10)
|
|
261
|
+
results[mod_name] = {"pass": r.returncode == 0, "error": r.stderr[:200] if r.returncode else "",
|
|
262
|
+
"file": str(fpath.relative_to(code_dir))}
|
|
263
|
+
return results
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def gate_static(code_dir: Path) -> dict:
|
|
267
|
+
mypy = subprocess.run([sys.executable, "-m", "mypy", "."], cwd=code_dir,
|
|
268
|
+
capture_output=True, text=True, timeout=30)
|
|
269
|
+
mypy_errs = sum(1 for l in mypy.stdout.split("\n") if "error:" in l)
|
|
270
|
+
return {"pass": mypy_errs == 0, "mypy_errors": mypy_errs}
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def gate_contracts(code_dir: Path, spec: dict) -> dict:
|
|
274
|
+
results = {}
|
|
275
|
+
for c in spec["contracts"]:
|
|
276
|
+
name = c["name"]
|
|
277
|
+
grep = subprocess.run(["grep", "-rn", f"def {name}(", str(code_dir)],
|
|
278
|
+
capture_output=True, text=True, timeout=10)
|
|
279
|
+
if grep.returncode != 0:
|
|
280
|
+
results[name] = {"pass": False, "error": f"Function '{name}()' not found"}
|
|
281
|
+
continue
|
|
282
|
+
req_args = c["args"]
|
|
283
|
+
for line in grep.stdout.split("\n"):
|
|
284
|
+
if f"def {name}(" in line:
|
|
285
|
+
arg_match = re.search(rf"def {name}\(([^)]*)\)", line)
|
|
286
|
+
if arg_match:
|
|
287
|
+
actual_args = [a.strip().split(":")[0].strip() for a in arg_match.group(1).split(",")
|
|
288
|
+
if a.strip() and a.strip() != "self"]
|
|
289
|
+
missing = [a["name"] for a in req_args if a["name"] not in actual_args]
|
|
290
|
+
results[name] = {"pass": len(missing) == 0, "error": f"Missing args: {missing}" if missing else ""}
|
|
291
|
+
break
|
|
292
|
+
if name not in results:
|
|
293
|
+
results[name] = {"pass": True}
|
|
294
|
+
return results
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def gate_runtime(code_dir: Path) -> dict:
|
|
298
|
+
import shutil
|
|
299
|
+
pytest_path = shutil.which("pytest")
|
|
300
|
+
if not pytest_path:
|
|
301
|
+
return {"pass": False, "passed": 0, "failed": 1, "error": "pytest not found"}
|
|
302
|
+
result = subprocess.run([pytest_path, "-x", "-q", "--tb=short"], cwd=code_dir,
|
|
303
|
+
capture_output=True, text=True, timeout=60)
|
|
304
|
+
return {"pass": result.returncode == 0, "passed": result.stdout.count("passed"),
|
|
305
|
+
"failed": result.stdout.count("failed")}
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def run_static_gates(code_dir: Path, spec: dict) -> dict:
|
|
309
|
+
"""Fallback: 4 статических gates."""
|
|
310
|
+
print("\n ⚡ Using static gates (no custom harness)")
|
|
311
|
+
gates = {
|
|
312
|
+
"import": gate_imports(code_dir),
|
|
313
|
+
"static": gate_static(code_dir),
|
|
314
|
+
"contract": gate_contracts(code_dir, spec),
|
|
315
|
+
"runtime": gate_runtime(code_dir),
|
|
316
|
+
}
|
|
317
|
+
import_pass = all(g["pass"] for g in gates["import"].values())
|
|
318
|
+
contract_pass = all(g["pass"] for g in gates["contract"].values())
|
|
319
|
+
runtime_pass = gates["runtime"]["pass"]
|
|
320
|
+
static_pass = gates["static"]["pass"]
|
|
321
|
+
|
|
322
|
+
score = (100 if import_pass else 0) * 0.2 + (100 if static_pass else 0) * 0.2 + \
|
|
323
|
+
(100 if contract_pass else 0) * 0.3 + (100 if runtime_pass else 0) * 0.3
|
|
324
|
+
|
|
325
|
+
print(f" Import: {'✅' if import_pass else '❌'} {sum(1 for g in gates['import'].values() if g['pass'])}/{len(gates['import'])}")
|
|
326
|
+
print(f" Static: {'✅' if static_pass else '❌'} {gates['static']['mypy_errors']} mypy errors")
|
|
327
|
+
contract_report = next((g for n, g in gates['contract'].items()), {"pass": contract_pass})
|
|
328
|
+
print(f" Contracts: {'✅' if contract_pass else '❌'} {sum(1 for n, g in gates['contract'].items() if g['pass'])}/{len(gates['contract'])}")
|
|
329
|
+
print(f" Runtime: {'✅' if runtime_pass else '❌'} {gates['runtime']['passed']} passed, {gates['runtime']['failed']} failed")
|
|
330
|
+
|
|
331
|
+
return {"score": round(score, 1), "gates": gates,
|
|
332
|
+
"pass": import_pass and contract_pass and score >= 70}
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
336
|
+
# PART 5: Cheat Detection (Swarm-inspired, 11 patterns)
|
|
337
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
338
|
+
|
|
339
|
+
CHEAT_PATTERNS = {
|
|
340
|
+
"test-modification": {
|
|
341
|
+
"description": "Agent edited test instead of implementation",
|
|
342
|
+
"severity": "high",
|
|
343
|
+
"check": lambda f, c: False # requires diff, skip for now
|
|
344
|
+
},
|
|
345
|
+
"hardcoded-answer": {
|
|
346
|
+
"description": "Literal return of expected output",
|
|
347
|
+
"severity": "high",
|
|
348
|
+
"check": lambda f, c: bool(re.search(r"return\s+(True|False|\d+|'[^']*'|\"[^\"]*\")", c)) and "def " in c
|
|
349
|
+
},
|
|
350
|
+
"exception-swallowing": {
|
|
351
|
+
"description": "Caught/ignored exceptions to pass tests",
|
|
352
|
+
"severity": "medium",
|
|
353
|
+
"check": lambda f, c: bool(re.search(r"except\s+\w+\s*:\s*\n\s*(?:pass|#|$)", c))
|
|
354
|
+
},
|
|
355
|
+
"mock-hallucination": {
|
|
356
|
+
"description": "Mock of nonexistent function/dependency",
|
|
357
|
+
"severity": "medium",
|
|
358
|
+
"check": lambda f, c: bool(re.search(r"mock\.patch\(['\"](?!.*\.)", c))
|
|
359
|
+
},
|
|
360
|
+
"assertion-removal": {
|
|
361
|
+
"description": "Test has no assertions (passes vacuously)",
|
|
362
|
+
"severity": "high",
|
|
363
|
+
"check": lambda f, c: "def test_" in f and f.endswith(".py") and "assert" not in c
|
|
364
|
+
},
|
|
365
|
+
"complexity-mismatch": {
|
|
366
|
+
"description": "Trivial impl + complex test (test-does-all)",
|
|
367
|
+
"severity": "medium",
|
|
368
|
+
"check": lambda f, c: len(c) < 50 and "def " in c and \
|
|
369
|
+
"def test_" not in c and "assert" not in c
|
|
370
|
+
},
|
|
371
|
+
"test-only-fix": {
|
|
372
|
+
"description": "Only tests changed, no impl change",
|
|
373
|
+
"severity": "high",
|
|
374
|
+
"check": lambda f, c: False # requires diff, skip for now
|
|
375
|
+
},
|
|
376
|
+
"circular-import": {
|
|
377
|
+
"description": "Module imports itself or creates cycle",
|
|
378
|
+
"severity": "medium",
|
|
379
|
+
"check": lambda f, c: False # requires full module graph
|
|
380
|
+
},
|
|
381
|
+
"stale-comment": {
|
|
382
|
+
"description": "Comment says one thing, code does another",
|
|
383
|
+
"severity": "low",
|
|
384
|
+
"check": lambda f, c: bool(re.search(r"TODO|FIXME|HACK|XXX", c))
|
|
385
|
+
},
|
|
386
|
+
"dead-branch": {
|
|
387
|
+
"description": "Condition that's always True/False",
|
|
388
|
+
"severity": "low",
|
|
389
|
+
"check": lambda f, c: bool(re.search(r"if\s+(True|False)\s*:", c))
|
|
390
|
+
},
|
|
391
|
+
"config-leak": {
|
|
392
|
+
"description": "Hardcoded config/secrets in code",
|
|
393
|
+
"severity": "high",
|
|
394
|
+
"check": lambda f, c: bool(re.search(r"(api_key|secret|password|token)\s*=\s*['\"][^'\"]+['\"]", c, re.I))
|
|
395
|
+
},
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def run_cheat_detection(code_dir: Path) -> dict:
|
|
400
|
+
"""Scan generated code for LLM shortcut patterns."""
|
|
401
|
+
findings = []
|
|
402
|
+
for py_file in sorted(code_dir.rglob("*.py")):
|
|
403
|
+
if py_file.name == "conftest.py" or py_file.name == "verify.py":
|
|
404
|
+
continue
|
|
405
|
+
rel = str(py_file.relative_to(code_dir))
|
|
406
|
+
content = py_file.read_text()
|
|
407
|
+
|
|
408
|
+
for pattern_name, pattern in CHEAT_PATTERNS.items():
|
|
409
|
+
try:
|
|
410
|
+
if pattern["check"](rel, content):
|
|
411
|
+
findings.append({
|
|
412
|
+
"pattern": pattern_name,
|
|
413
|
+
"severity": pattern["severity"],
|
|
414
|
+
"file": rel,
|
|
415
|
+
"description": pattern["description"],
|
|
416
|
+
})
|
|
417
|
+
except Exception:
|
|
418
|
+
pass
|
|
419
|
+
|
|
420
|
+
score = 100
|
|
421
|
+
for f in findings:
|
|
422
|
+
if f["severity"] == "high":
|
|
423
|
+
score -= 15
|
|
424
|
+
elif f["severity"] == "medium":
|
|
425
|
+
score -= 8
|
|
426
|
+
else:
|
|
427
|
+
score -= 3
|
|
428
|
+
|
|
429
|
+
return {
|
|
430
|
+
"pass": score >= 70,
|
|
431
|
+
"score": max(0, score),
|
|
432
|
+
"findings": findings,
|
|
433
|
+
"total": len(findings),
|
|
434
|
+
"high": len([f for f in findings if f["severity"] == "high"]),
|
|
435
|
+
"medium": len([f for f in findings if f["severity"] == "medium"]),
|
|
436
|
+
"low": len([f for f in findings if f["severity"] == "low"]),
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
441
|
+
# PART 6: Verification Cache
|
|
442
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
443
|
+
|
|
444
|
+
VERIFY_CACHE = {} # code_dir -> (checksum, result)
|
|
445
|
+
|
|
446
|
+
def _dir_checksum(code_dir: Path) -> str:
|
|
447
|
+
"""SHA256 of all .py files in directory."""
|
|
448
|
+
h = hashlib.sha256()
|
|
449
|
+
for f in sorted(code_dir.rglob("*.py")):
|
|
450
|
+
h.update(f.read_bytes())
|
|
451
|
+
return h.hexdigest()[:16]
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
def cached_verification(code_dir: Path, spec: dict, force: bool = False) -> dict:
|
|
455
|
+
"""Run verification with caching — skip if code unchanged."""
|
|
456
|
+
if force:
|
|
457
|
+
return _run_verification(code_dir, spec)
|
|
458
|
+
|
|
459
|
+
checksum = _dir_checksum(code_dir)
|
|
460
|
+
if code_dir in VERIFY_CACHE and VERIFY_CACHE[code_dir][0] == checksum:
|
|
461
|
+
print(" 📦 Verification cache hit — skipping")
|
|
462
|
+
return VERIFY_CACHE[code_dir][1]
|
|
463
|
+
|
|
464
|
+
result = _run_verification(code_dir, spec)
|
|
465
|
+
VERIFY_CACHE[code_dir] = (checksum, result)
|
|
466
|
+
return result
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def _run_verification(code_dir: Path, spec: dict) -> dict:
|
|
470
|
+
"""Full verification: try custom harness first, fallback to static gates, then cheat detection."""
|
|
471
|
+
print("\n ── Verification Pipeline ──")
|
|
472
|
+
|
|
473
|
+
# Phase 1: Custom harness (if generated)
|
|
474
|
+
harness_result = run_harness(code_dir)
|
|
475
|
+
if harness_result.get("pass", False):
|
|
476
|
+
print(f" ✅ Custom harness: score {harness_result.get('score', 100)}")
|
|
477
|
+
ver_score = harness_result.get("score", 100)
|
|
478
|
+
gates_result = {"harness": harness_result}
|
|
479
|
+
else:
|
|
480
|
+
# Phase 2: Static gates fallback
|
|
481
|
+
gates_result = run_static_gates(code_dir, spec)
|
|
482
|
+
ver_score = gates_result.get("score", 0)
|
|
483
|
+
|
|
484
|
+
# Phase 3: Cheat detection (always runs)
|
|
485
|
+
print("\n ── Cheat Detection ──")
|
|
486
|
+
cheat = run_cheat_detection(code_dir)
|
|
487
|
+
if cheat["findings"]:
|
|
488
|
+
print(f" ⚠️ {cheat['total']} findings: {cheat['high']} high, {cheat['medium']} med, {cheat['low']} low")
|
|
489
|
+
for f in cheat["findings"][:5]:
|
|
490
|
+
print(f" {'🔴' if f['severity']=='high' else '🟡' if f['severity']=='medium' else '🔵'} {f['file']}: {f['description']}")
|
|
491
|
+
else:
|
|
492
|
+
print(f" ✅ Clean: no shortcuts detected")
|
|
493
|
+
|
|
494
|
+
# Combined score
|
|
495
|
+
combined = round(ver_score * 0.7 + cheat["score"] * 0.3, 1)
|
|
496
|
+
overall_pass = gates_result.get("pass", False) and cheat["pass"]
|
|
497
|
+
|
|
498
|
+
print(f"\n Combined Score: {combined}/100 — {'✅ PASS' if overall_pass else '❌ FAIL'}")
|
|
499
|
+
|
|
500
|
+
report = {
|
|
501
|
+
"score": combined,
|
|
502
|
+
"pass": overall_pass,
|
|
503
|
+
"verification": gates_result,
|
|
504
|
+
"cheat_detection": cheat,
|
|
505
|
+
}
|
|
506
|
+
return report
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
510
|
+
# PART 7: Code Generation (v6 — with cost routing)
|
|
511
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
512
|
+
|
|
513
|
+
def _call_llm_simple(model: str, prompt: str) -> dict:
|
|
514
|
+
"""Minimal LLM call via OpenRouter."""
|
|
515
|
+
key = os.environ.get("OPENROUTER_API_KEY", "")
|
|
516
|
+
if not key:
|
|
517
|
+
env_path = Path("/home/wooty/.hermes/.env")
|
|
518
|
+
if env_path.exists():
|
|
519
|
+
for line in env_path.read_text().split("\n"):
|
|
520
|
+
if "OPENROUTER_API_KEY" in line and "=" in line:
|
|
521
|
+
key = line.split("=", 1)[1].strip()
|
|
522
|
+
break
|
|
523
|
+
if not key:
|
|
524
|
+
return {"success": False, "error": "No API key"}
|
|
525
|
+
|
|
526
|
+
import urllib.request
|
|
527
|
+
data = json.dumps({
|
|
528
|
+
"model": model,
|
|
529
|
+
"messages": [{"role": "user", "content": prompt}],
|
|
530
|
+
"temperature": 0.3,
|
|
531
|
+
"max_tokens": 4000,
|
|
532
|
+
}).encode()
|
|
533
|
+
req = urllib.request.Request(
|
|
534
|
+
"https://openrouter.ai/api/v1/chat/completions", data=data,
|
|
535
|
+
headers={"Authorization": f"Bearer {key}", "Content-Type": "application/json"}
|
|
536
|
+
)
|
|
537
|
+
try:
|
|
538
|
+
resp = urllib.request.urlopen(req, timeout=120)
|
|
539
|
+
result = json.loads(resp.read())
|
|
540
|
+
content = result.get("choices", [{}])[0].get("message", {}).get("content", "")
|
|
541
|
+
return {"success": True, "content": content,
|
|
542
|
+
"usage": result.get("usage", {}), "model": model}
|
|
543
|
+
except Exception as e:
|
|
544
|
+
return {"success": False, "error": str(e)}
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
def generate_code_v6(spec: dict, output_dir: Path) -> dict:
|
|
548
|
+
"""
|
|
549
|
+
Generate code with cost routing.
|
|
550
|
+
Simple specs → DeepSeek ($0.001/файл)
|
|
551
|
+
Complex specs → Claude ($0.01/файл) для ключевых файлов
|
|
552
|
+
"""
|
|
553
|
+
gen_model = select_model(spec, "generation")
|
|
554
|
+
fix_model = select_model(spec, "fix")
|
|
555
|
+
|
|
556
|
+
module_name = spec["name"].lower().replace(" ", "_")
|
|
557
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
558
|
+
|
|
559
|
+
# Генерируем по одному файлу на контракт
|
|
560
|
+
files_created = 0
|
|
561
|
+
total_cost = 0.0
|
|
562
|
+
|
|
563
|
+
for c in spec["contracts"]:
|
|
564
|
+
file_path = output_dir / f"{module_name}/{c['name']}.py"
|
|
565
|
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
566
|
+
|
|
567
|
+
prompt = f"""Generate Python code for function: {c['name']}
|
|
568
|
+
|
|
569
|
+
Signature: {c['name']}({', '.join(a['name']+': '+a['type'] for a in c['args'])}) -> {c['returns']}
|
|
570
|
+
|
|
571
|
+
Requirements:
|
|
572
|
+
- Complete, working code
|
|
573
|
+
- Type hints
|
|
574
|
+
- Docstring
|
|
575
|
+
- Absolute imports only
|
|
576
|
+
- Return correct type
|
|
577
|
+
|
|
578
|
+
Write ONLY the code."""
|
|
579
|
+
|
|
580
|
+
result = _call_llm_simple(gen_model, prompt)
|
|
581
|
+
if result["success"]:
|
|
582
|
+
# Extract code block
|
|
583
|
+
code = result["content"]
|
|
584
|
+
m = re.search(r"```(?:python)?\n(.+?)\n```", code, re.DOTALL)
|
|
585
|
+
if m:
|
|
586
|
+
code = m.group(1).strip()
|
|
587
|
+
file_path.write_text(code)
|
|
588
|
+
files_created += 1
|
|
589
|
+
# Estimate cost
|
|
590
|
+
usage = result.get("usage", {})
|
|
591
|
+
tokens = usage.get("total_tokens", 500) if isinstance(usage, dict) else 500
|
|
592
|
+
total_cost += tokens / 1_000_000 * 0.14
|
|
593
|
+
|
|
594
|
+
# Обновляем __init__.py с экспортами
|
|
595
|
+
init_path = output_dir / module_name / "__init__.py"
|
|
596
|
+
if init_path.parent.exists():
|
|
597
|
+
exports = [f"from {module_name}.{c['name']} import {c['name']}" for c in spec["contracts"]]
|
|
598
|
+
init_path.write_text(f"# {spec['name']} — auto-generated\n" + "\n".join(exports) + "\n")
|
|
599
|
+
|
|
600
|
+
# Копируем тесты
|
|
601
|
+
spec_dir = Path(spec["path"]).parent if spec.get("path") else None
|
|
602
|
+
if spec_dir:
|
|
603
|
+
for c in spec["contracts"]:
|
|
604
|
+
if c["test"]:
|
|
605
|
+
src = spec_dir / c["test"]
|
|
606
|
+
if src.exists():
|
|
607
|
+
dst = output_dir / c["test"]
|
|
608
|
+
dst.parent.mkdir(parents=True, exist_ok=True)
|
|
609
|
+
shutil.copy2(src, dst)
|
|
610
|
+
|
|
611
|
+
return {"files_created": files_created, "total_cost": round(total_cost, 6), "model": gen_model}
|
|
612
|
+
|
|
613
|
+
|
|
614
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
615
|
+
# PART 8: Spec Registry
|
|
616
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
617
|
+
|
|
618
|
+
def init_default_registry():
|
|
619
|
+
"""Инициализирует registry с несколькими базовыми spec-ами."""
|
|
620
|
+
REGISTRY_DIR.mkdir(parents=True, exist_ok=True)
|
|
621
|
+
defaults = {
|
|
622
|
+
"fastapi-auth": """# FastAPI JWT Auth
|
|
623
|
+
|
|
624
|
+
## Contracts
|
|
625
|
+
- `create_token(user_id: int) -> str` @test tests/test_auth.py
|
|
626
|
+
- `verify_token(token: str) -> dict` @test tests/test_auth.py
|
|
627
|
+
- `get_current_user(token: str) -> User` @test tests/test_auth.py
|
|
628
|
+
|
|
629
|
+
## Dependencies
|
|
630
|
+
- fastapi>=0.100.0
|
|
631
|
+
- python-jose[cryptography]>=3.3.0
|
|
632
|
+
- passlib[bcrypt]>=1.7.4
|
|
633
|
+
|
|
634
|
+
## Boundaries
|
|
635
|
+
- Never log passwords
|
|
636
|
+
- Token expiry: 24h
|
|
637
|
+
- Always hash before store
|
|
638
|
+
""",
|
|
639
|
+
"cli-app": """# CLI Application
|
|
640
|
+
|
|
641
|
+
## Contracts
|
|
642
|
+
- `main(args: list) -> int` @test tests/test_cli.py
|
|
643
|
+
- `parse_config(path: str) -> dict` @test tests/test_cli.py
|
|
644
|
+
|
|
645
|
+
## Dependencies
|
|
646
|
+
- click>=8.0.0
|
|
647
|
+
|
|
648
|
+
## Boundaries
|
|
649
|
+
- Exit code 0 for success, 1 for error
|
|
650
|
+
- Log to stderr, output to stdout
|
|
651
|
+
""",
|
|
652
|
+
"pydantic-model": """# Pydantic Data Model
|
|
653
|
+
|
|
654
|
+
## Contracts
|
|
655
|
+
- `validate(data: dict) -> Model` @test tests/test_model.py
|
|
656
|
+
- `to_dict(instance: Model) -> dict` @test tests/test_model.py
|
|
657
|
+
|
|
658
|
+
## Dependencies
|
|
659
|
+
- pydantic>=2.0.0
|
|
660
|
+
|
|
661
|
+
## Boundaries
|
|
662
|
+
- Raise ValidationError on invalid data
|
|
663
|
+
- All fields required unless Optional
|
|
664
|
+
""",
|
|
665
|
+
}
|
|
666
|
+
for name, content in defaults.items():
|
|
667
|
+
path = REGISTRY_DIR / f"{name}.md"
|
|
668
|
+
if not path.exists():
|
|
669
|
+
path.write_text(content)
|
|
670
|
+
|
|
671
|
+
|
|
672
|
+
def cmd_search(query: str):
|
|
673
|
+
"""Search spec registry."""
|
|
674
|
+
init_default_registry()
|
|
675
|
+
results = []
|
|
676
|
+
query_lower = query.lower()
|
|
677
|
+
for f in REGISTRY_DIR.glob("*.md"):
|
|
678
|
+
content = f.read_text().lower()
|
|
679
|
+
if query_lower in content or query_lower in f.stem.lower():
|
|
680
|
+
results.append({"name": f.stem, "path": str(f), "preview": content[:100].strip()})
|
|
681
|
+
return results
|
|
682
|
+
|
|
683
|
+
|
|
684
|
+
def cmd_publish(spec_path: str):
|
|
685
|
+
"""Publish spec to registry."""
|
|
686
|
+
path = Path(spec_path)
|
|
687
|
+
if not path.exists():
|
|
688
|
+
return {"error": f"Spec not found: {spec_path}"}
|
|
689
|
+
dest = REGISTRY_DIR / path.name
|
|
690
|
+
shutil.copy2(path, dest)
|
|
691
|
+
return {"success": True, "path": str(dest), "name": path.stem}
|
|
692
|
+
|
|
693
|
+
|
|
694
|
+
def cmd_init(spec_name: str, output: str):
|
|
695
|
+
"""Initialize project from a registry spec."""
|
|
696
|
+
init_default_registry()
|
|
697
|
+
spec_path = REGISTRY_DIR / f"{spec_name}.md"
|
|
698
|
+
if not spec_path.exists():
|
|
699
|
+
return {"error": f"Spec '{spec_name}' not found. Available: {[f.stem for f in REGISTRY_DIR.glob('*.md')]}"}
|
|
700
|
+
|
|
701
|
+
out_path = Path(output) / spec_path.name
|
|
702
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
703
|
+
shutil.copy2(spec_path, out_path)
|
|
704
|
+
return {"success": True, "path": str(out_path), "spec": spec_name}
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
708
|
+
# PART 9: Pipeline
|
|
709
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
710
|
+
|
|
711
|
+
def run_pipeline(spec_path: str, output_dir: str, model: str = "",
|
|
712
|
+
max_iterations: int = 3, force: bool = False) -> dict:
|
|
713
|
+
"""Full specpass pipeline."""
|
|
714
|
+
start = time.time()
|
|
715
|
+
|
|
716
|
+
print(f"{'='*60}")
|
|
717
|
+
print(f" specpass — Verification-First Platform")
|
|
718
|
+
print(f" Spec: {spec_path}")
|
|
719
|
+
print(f" Output: {output_dir}")
|
|
720
|
+
print(f"{'='*60}")
|
|
721
|
+
|
|
722
|
+
spec_file = Path(spec_path)
|
|
723
|
+
out_dir = Path(output_dir)
|
|
724
|
+
|
|
725
|
+
if not spec_file.exists():
|
|
726
|
+
return {"success": False, "error": f"Spec not found: {spec_path}"}
|
|
727
|
+
|
|
728
|
+
# Phase 0: Load & verify spec
|
|
729
|
+
print(f"\n📋 Phase 0: Spec")
|
|
730
|
+
spec = load_markdown_spec(spec_file)
|
|
731
|
+
spec_errors = verify_spec(spec)
|
|
732
|
+
if spec_errors:
|
|
733
|
+
for e in spec_errors:
|
|
734
|
+
print(f" ❌ {e}")
|
|
735
|
+
return {"success": False, "phase": "spec_verify", "errors": spec_errors}
|
|
736
|
+
gen_model = select_model(spec, "generation")
|
|
737
|
+
fix_model = select_model(spec, "fix")
|
|
738
|
+
print(f" ✅ {spec['name']} ({len(spec['contracts'])} contracts, {spec['complexity']})")
|
|
739
|
+
print(f" Gen: {gen_model} | Fix: {fix_model}")
|
|
740
|
+
|
|
741
|
+
# Phase 1: Generate code
|
|
742
|
+
print(f"\n🤖 Phase 1: Generation")
|
|
743
|
+
gen_result = generate_code_v6(spec, out_dir)
|
|
744
|
+
files = list(out_dir.rglob("*.py"))
|
|
745
|
+
print(f" Generated {gen_result['files_created']} files, ~${gen_result['total_cost']:.4f}")
|
|
746
|
+
|
|
747
|
+
# Phase 2: Verification
|
|
748
|
+
print(f"\n🔬 Phase 2: Verification")
|
|
749
|
+
verification = cached_verification(out_dir, spec, force=force)
|
|
750
|
+
|
|
751
|
+
# Phase 3: Fix loop
|
|
752
|
+
iteration = 1
|
|
753
|
+
while not verification["pass"] and iteration < max_iterations:
|
|
754
|
+
print(f"\n🔄 Fix iteration {iteration}/{max_iterations}")
|
|
755
|
+
# Regenerate failing files via debug agent
|
|
756
|
+
debug_result = subprocess.run(
|
|
757
|
+
[sys.executable, str(AGENTS_DIR / "debug_agent.py"),
|
|
758
|
+
spec_path, "--code-dir", str(out_dir),
|
|
759
|
+
"--base-dir", str(out_dir.parent), "--max-cycles", "1"],
|
|
760
|
+
capture_output=True, text=True, timeout=120
|
|
761
|
+
)
|
|
762
|
+
print(debug_result.stdout[:200] if debug_result.stdout else " No debug output")
|
|
763
|
+
verification = cached_verification(out_dir, spec, force=True)
|
|
764
|
+
iteration += 1
|
|
765
|
+
|
|
766
|
+
# Phase 4: Custom verification harness generation (for next time)
|
|
767
|
+
if verification["pass"]:
|
|
768
|
+
print(f"\n🔧 Generating custom verification harness...")
|
|
769
|
+
harness_model = select_model(spec, "harness")
|
|
770
|
+
if harness_model:
|
|
771
|
+
harness = generate_harness(spec, out_dir, harness_model)
|
|
772
|
+
if harness["success"]:
|
|
773
|
+
print(f" ✅ Harness saved to {harness['path']}")
|
|
774
|
+
else:
|
|
775
|
+
print(f" ⚠️ Harness generation skipped: {harness.get('error', '?')}")
|
|
776
|
+
|
|
777
|
+
# Summary
|
|
778
|
+
elapsed = time.time() - start
|
|
779
|
+
print(f"\n{'='*60}")
|
|
780
|
+
print(f" {'✅ PIPELINE PASS' if verification['pass'] else '❌ PIPELINE FAIL'}")
|
|
781
|
+
print(f" Score: {verification['score']}/100")
|
|
782
|
+
print(f" Time: {elapsed:.1f}s, Files: {len(files)}, Cost: ~${gen_result['total_cost']:.4f}")
|
|
783
|
+
print(f"{'='*60}")
|
|
784
|
+
|
|
785
|
+
return {
|
|
786
|
+
"success": verification["pass"],
|
|
787
|
+
"score": verification["score"],
|
|
788
|
+
"elapsed": elapsed,
|
|
789
|
+
"files": len(files),
|
|
790
|
+
"cost": gen_result["total_cost"],
|
|
791
|
+
"model": gen_model,
|
|
792
|
+
"output_dir": str(out_dir),
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
|
|
796
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
797
|
+
# PART 10: CLI
|
|
798
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
799
|
+
|
|
800
|
+
def main():
|
|
801
|
+
parser = argparse.ArgumentParser(description="specpass — Verification-First Platform")
|
|
802
|
+
sub = parser.add_subparsers(dest="command", help="Commands")
|
|
803
|
+
|
|
804
|
+
# Pipeline
|
|
805
|
+
p = sub.add_parser("run", help="Run full pipeline")
|
|
806
|
+
p.add_argument("spec", help="Path to spec file")
|
|
807
|
+
p.add_argument("--out", "-o", default="build", help="Output directory")
|
|
808
|
+
p.add_argument("--model", default="", help="Override model")
|
|
809
|
+
p.add_argument("--max-iterations", type=int, default=3)
|
|
810
|
+
p.add_argument("--force", action="store_true", help="Skip cache")
|
|
811
|
+
|
|
812
|
+
# Verify
|
|
813
|
+
v = sub.add_parser("verify", help="Verify existing build")
|
|
814
|
+
v.add_argument("dir", help="Build directory")
|
|
815
|
+
v.add_argument("--spec", "-s", required=True, help="Spec file")
|
|
816
|
+
v.add_argument("--force", action="store_true")
|
|
817
|
+
|
|
818
|
+
# Registry
|
|
819
|
+
s = sub.add_parser("search", help="Search spec registry")
|
|
820
|
+
s.add_argument("query", help="Search query")
|
|
821
|
+
|
|
822
|
+
pub = sub.add_parser("publish", help="Publish spec to registry")
|
|
823
|
+
pub.add_argument("spec", help="Path to spec file")
|
|
824
|
+
|
|
825
|
+
init = sub.add_parser("init", help="Init from registry spec")
|
|
826
|
+
init.add_argument("spec_name", help="Spec name (e.g. fastapi-auth)")
|
|
827
|
+
init.add_argument("--out", "-o", default=".", help="Output directory")
|
|
828
|
+
|
|
829
|
+
args = parser.parse_args()
|
|
830
|
+
|
|
831
|
+
if args.command == "run":
|
|
832
|
+
result = run_pipeline(args.spec, args.out, args.model, args.max_iterations, args.force)
|
|
833
|
+
print(json.dumps(result, indent=2, ensure_ascii=False))
|
|
834
|
+
sys.exit(0 if result["success"] else 1)
|
|
835
|
+
|
|
836
|
+
elif args.command == "verify":
|
|
837
|
+
spec = load_markdown_spec(Path(args.spec))
|
|
838
|
+
code_dir = Path(args.dir)
|
|
839
|
+
if not code_dir.exists():
|
|
840
|
+
print(f"Error: directory not found: {code_dir}")
|
|
841
|
+
sys.exit(1)
|
|
842
|
+
result = cached_verification(code_dir, spec, force=args.force)
|
|
843
|
+
print(json.dumps(result, indent=2, ensure_ascii=False))
|
|
844
|
+
sys.exit(0 if result["pass"] else 1)
|
|
845
|
+
|
|
846
|
+
elif args.command == "search":
|
|
847
|
+
results = cmd_search(args.query)
|
|
848
|
+
if results:
|
|
849
|
+
for r in results:
|
|
850
|
+
print(f" 📄 {r['name']}")
|
|
851
|
+
print(f" {r['preview']}...")
|
|
852
|
+
else:
|
|
853
|
+
print(f"No specs found for '{args.query}'")
|
|
854
|
+
|
|
855
|
+
elif args.command == "publish":
|
|
856
|
+
result = cmd_publish(args.spec)
|
|
857
|
+
if "error" in result:
|
|
858
|
+
print(f"❌ {result['error']}")
|
|
859
|
+
else:
|
|
860
|
+
print(f"✅ Published '{result['name']}' to {result['path']}")
|
|
861
|
+
|
|
862
|
+
elif args.command == "init":
|
|
863
|
+
result = cmd_init(args.spec_name, args.out)
|
|
864
|
+
if "error" in result:
|
|
865
|
+
print(f"❌ {result['error']}")
|
|
866
|
+
else:
|
|
867
|
+
print(f"✅ Created {result['path']} from '{args.spec_name}'")
|
|
868
|
+
|
|
869
|
+
else:
|
|
870
|
+
parser.print_help()
|
|
871
|
+
|
|
872
|
+
|
|
873
|
+
if __name__ == "__main__":
|
|
874
|
+
main()
|