zcode-supervisor 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tools/__init__.py +2 -0
- tools/zcode_control/__init__.py +16 -0
- tools/zcode_control/browser_scripts.mjs +106 -0
- tools/zcode_control/provider_errors.mjs +135 -0
- tools/zcode_control/zcodectl.mjs +2097 -0
- tools/zcode_eval/__init__.py +2 -0
- tools/zcode_eval/duel_import.py +304 -0
- tools/zcode_eval/zcode_eval.py +687 -0
- tools/zcode_eval/zcode_release.py +221 -0
- tools/zcode_supervisor/__init__.py +2 -0
- tools/zcode_supervisor/auto_route.py +393 -0
- tools/zcode_supervisor/repo_setup.py +439 -0
- tools/zcode_supervisor/zcode_supervisor.py +696 -0
- zcode_supervisor-0.0.1.dist-info/METADATA +928 -0
- zcode_supervisor-0.0.1.dist-info/RECORD +19 -0
- zcode_supervisor-0.0.1.dist-info/WHEEL +5 -0
- zcode_supervisor-0.0.1.dist-info/entry_points.txt +7 -0
- zcode_supervisor-0.0.1.dist-info/licenses/LICENSE +21 -0
- zcode_supervisor-0.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Import external supervisor duel rows into the local ZCode eval ledger."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import math
|
|
8
|
+
import re
|
|
9
|
+
from datetime import datetime, timezone
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
SUPPORTED_TOOLS = ("zcode", "claude-code-glm52")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def utc_now() -> str:
|
|
17
|
+
return datetime.now(timezone.utc).replace(microsecond=0).isoformat()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def finite_number(value: Any) -> float | None:
|
|
21
|
+
if isinstance(value, bool):
|
|
22
|
+
return None
|
|
23
|
+
if isinstance(value, (int, float)) and math.isfinite(value):
|
|
24
|
+
return float(value)
|
|
25
|
+
return None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def read_json(path: Path) -> dict[str, Any]:
|
|
29
|
+
return json.loads(path.read_text(encoding="utf-8"))
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def load_records(path: Path) -> list[dict[str, Any]]:
|
|
33
|
+
if not path.exists():
|
|
34
|
+
return []
|
|
35
|
+
records: list[dict[str, Any]] = []
|
|
36
|
+
with path.open("r", encoding="utf-8") as handle:
|
|
37
|
+
for line in handle:
|
|
38
|
+
line = line.strip()
|
|
39
|
+
if line:
|
|
40
|
+
records.append(json.loads(line))
|
|
41
|
+
return records
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def first_match(text: str, patterns: list[str]) -> str | None:
|
|
45
|
+
for pattern in patterns:
|
|
46
|
+
match = re.search(pattern, text)
|
|
47
|
+
if match and match.group(1):
|
|
48
|
+
return match.group(1)
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def classify_provider_error_text(*, stdout: str = "", stderr: str = "", exit_code: Any = None) -> dict[str, Any]:
|
|
53
|
+
text = f"{stderr}\n{stdout}"
|
|
54
|
+
try:
|
|
55
|
+
exit_code_number = int(exit_code)
|
|
56
|
+
except (TypeError, ValueError):
|
|
57
|
+
exit_code_number = None
|
|
58
|
+
exit_143 = exit_code_number == 143
|
|
59
|
+
provider_code = first_match(
|
|
60
|
+
text,
|
|
61
|
+
[
|
|
62
|
+
r"providerCode:\s*['\"]?(\d+)['\"]?",
|
|
63
|
+
r'"providerCode"\s*:\s*"(\d+)"',
|
|
64
|
+
r"\[(\d{3,})\]\[",
|
|
65
|
+
r"\bcode:\s*['\"]?(\d{3,})['\"]?",
|
|
66
|
+
r'"code"\s*:\s*"(\d{3,})"',
|
|
67
|
+
],
|
|
68
|
+
)
|
|
69
|
+
temporary = bool(re.search(r"temporarily overloaded|try again later|overloaded_error", text, re.I)) or provider_code == "1305"
|
|
70
|
+
provider_business = bool(
|
|
71
|
+
re.search(r"ProviderBusinessError|PROVIDER_BUSINESS_ERROR|isProviderBusinessError:\s*true", text, re.I)
|
|
72
|
+
)
|
|
73
|
+
provider_error = provider_business or temporary or provider_code == "1305" or exit_143
|
|
74
|
+
provider_line = next(
|
|
75
|
+
(line for line in text.splitlines() if re.search(r"ProviderBusinessError|PROVIDER_BUSINESS_ERROR", line, re.I)),
|
|
76
|
+
None,
|
|
77
|
+
)
|
|
78
|
+
provider_message = first_match(
|
|
79
|
+
text,
|
|
80
|
+
[
|
|
81
|
+
r"providerMessage:\s*'([^']+)'",
|
|
82
|
+
r'providerMessage:\s*"([^"]+)"',
|
|
83
|
+
r'"providerMessage"\s*:\s*"([^"]+)"',
|
|
84
|
+
r"ProviderBusinessError:\s*([^\n]+)",
|
|
85
|
+
],
|
|
86
|
+
)
|
|
87
|
+
return {
|
|
88
|
+
"provider_error": provider_error,
|
|
89
|
+
"provider_code": provider_code,
|
|
90
|
+
"provider_message": provider_message or ("ZCode CLI exited with code 143" if exit_143 else None),
|
|
91
|
+
"provider_request_id": first_match(
|
|
92
|
+
text,
|
|
93
|
+
[
|
|
94
|
+
r"providerRequestId:\s*'([^']+)'",
|
|
95
|
+
r'providerRequestId:\s*"([^"]+)"',
|
|
96
|
+
r'"providerRequestId"\s*:\s*"([^"]+)"',
|
|
97
|
+
r"request_id:\s*'([^']+)'",
|
|
98
|
+
r'"request_id"\s*:\s*"([^"]+)"',
|
|
99
|
+
],
|
|
100
|
+
),
|
|
101
|
+
"provider_error_line": provider_line,
|
|
102
|
+
"provider_id": first_match(
|
|
103
|
+
text,
|
|
104
|
+
[r"providerId:\s*'([^']+)'", r'providerId:\s*"([^"]+)"', r'"providerId"\s*:\s*"([^"]+)"'],
|
|
105
|
+
),
|
|
106
|
+
"provider_kind": first_match(
|
|
107
|
+
text,
|
|
108
|
+
[r"providerKind:\s*'([^']+)'", r'providerKind:\s*"([^"]+)"', r'"providerKind"\s*:\s*"([^"]+)"'],
|
|
109
|
+
),
|
|
110
|
+
"retryable_provider_error": provider_error and (temporary or exit_143),
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def read_json_optional(path: Path) -> dict[str, Any]:
|
|
115
|
+
if not path.exists():
|
|
116
|
+
return {}
|
|
117
|
+
value = json.loads(path.read_text(encoding="utf-8"))
|
|
118
|
+
return value if isinstance(value, dict) else {}
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def bool_or_none(value: Any) -> bool | None:
|
|
122
|
+
return value if isinstance(value, bool) else None
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def row_changed_count(row: dict[str, Any]) -> int | None:
|
|
126
|
+
audit = row.get("audit")
|
|
127
|
+
if isinstance(audit, dict) and isinstance(audit.get("changed_count"), int):
|
|
128
|
+
return audit["changed_count"]
|
|
129
|
+
changed_files = row.get("changed_files")
|
|
130
|
+
if isinstance(changed_files, list):
|
|
131
|
+
return len(changed_files)
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def classify_duel_supervisor_state(row: dict[str, Any], provider: dict[str, Any]) -> dict[str, Any]:
|
|
136
|
+
if row.get("run_ok") is True:
|
|
137
|
+
return {"supervisor_state": "success", "partial_artifacts_possible": False, "safe_to_retry_later": False}
|
|
138
|
+
if not provider.get("provider_error"):
|
|
139
|
+
return {"supervisor_state": "cli_error", "partial_artifacts_possible": False, "safe_to_retry_later": False}
|
|
140
|
+
changed_count = row_changed_count(row)
|
|
141
|
+
audit = row.get("audit")
|
|
142
|
+
audit_ok = bool_or_none(audit.get("ok")) if isinstance(audit, dict) else None
|
|
143
|
+
if changed_count == 0 and provider.get("retryable_provider_error"):
|
|
144
|
+
return {"supervisor_state": "retryable_provider_error", "partial_artifacts_possible": False, "safe_to_retry_later": True}
|
|
145
|
+
if changed_count is not None and changed_count > 0 and audit_ok is True:
|
|
146
|
+
return {"supervisor_state": "partial_success", "partial_artifacts_possible": True, "safe_to_retry_later": False}
|
|
147
|
+
return {
|
|
148
|
+
"supervisor_state": "unsafe_partial",
|
|
149
|
+
"partial_artifacts_possible": changed_count is None or changed_count > 0,
|
|
150
|
+
"safe_to_retry_later": False,
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def validation_summary(row: dict[str, Any]) -> str:
|
|
155
|
+
validation = row.get("validation")
|
|
156
|
+
if not isinstance(validation, dict):
|
|
157
|
+
return ""
|
|
158
|
+
status = "pass" if validation.get("ok") is True or row.get("validation_ok") is True else "fail"
|
|
159
|
+
returncode = validation.get("returncode")
|
|
160
|
+
return f"{status}; returncode={returncode}" if returncode is not None else status
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def add_numeric(record: dict[str, Any], key: str, value: Any) -> None:
|
|
164
|
+
number = finite_number(value)
|
|
165
|
+
if number is not None:
|
|
166
|
+
record[key] = number
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def duel_tool_name(raw_tool: Any) -> str | None:
|
|
170
|
+
if raw_tool == "zcode":
|
|
171
|
+
return "zcode"
|
|
172
|
+
if raw_tool == "glm":
|
|
173
|
+
return "claude-code-glm52"
|
|
174
|
+
if raw_tool in SUPPORTED_TOOLS:
|
|
175
|
+
return raw_tool
|
|
176
|
+
return None
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def build_duel_record(row: dict[str, Any], *, source: Path, run_dir: Path) -> dict[str, Any] | None:
|
|
180
|
+
tool = duel_tool_name(row.get("tool"))
|
|
181
|
+
task_id = row.get("task_id")
|
|
182
|
+
if tool is None or not isinstance(task_id, str):
|
|
183
|
+
return None
|
|
184
|
+
raw_tool = row.get("tool")
|
|
185
|
+
result_path = run_dir / "_control" / str(raw_tool) / task_id / f"{raw_tool}-result.json"
|
|
186
|
+
if raw_tool == "zcode":
|
|
187
|
+
result_path = run_dir / "_control" / "zcode" / task_id / "zcode-result.json"
|
|
188
|
+
result_json = read_json_optional(result_path)
|
|
189
|
+
exit_code = result_json.get("exit_code")
|
|
190
|
+
error_exit = row.get("run_ok") is False or result_json.get("ok") is False or (
|
|
191
|
+
isinstance(exit_code, int) and exit_code != 0
|
|
192
|
+
)
|
|
193
|
+
provider = classify_provider_error_text(
|
|
194
|
+
stdout=str(result_json.get("stdout") or ""),
|
|
195
|
+
stderr=str(result_json.get("stderr") or ""),
|
|
196
|
+
exit_code=exit_code,
|
|
197
|
+
)
|
|
198
|
+
if not error_exit:
|
|
199
|
+
provider = {**provider, "provider_error": False, "retryable_provider_error": False}
|
|
200
|
+
state = classify_duel_supervisor_state(row, provider)
|
|
201
|
+
status = "partial" if state["supervisor_state"] == "partial_success" else "pass" if row.get("run_ok") is True else "fail"
|
|
202
|
+
usage_available = any(
|
|
203
|
+
finite_number(row.get(field)) is not None
|
|
204
|
+
for field in ("tokens_total", "input_tokens", "output_tokens", "cache_read_tokens")
|
|
205
|
+
)
|
|
206
|
+
record: dict[str, Any] = {
|
|
207
|
+
"recorded_at": utc_now(),
|
|
208
|
+
"run_id": f"duel-{run_dir.name}-{tool}-{task_id}",
|
|
209
|
+
"tool": tool,
|
|
210
|
+
"task_id": task_id,
|
|
211
|
+
"task_name": task_id.replace("_", " "),
|
|
212
|
+
"task_kind": row.get("kind"),
|
|
213
|
+
"status": status,
|
|
214
|
+
"validation": validation_summary(row),
|
|
215
|
+
"validation_ok": bool_or_none(row.get("validation_ok")),
|
|
216
|
+
"scope_ok": bool_or_none(row.get("scope_ok")),
|
|
217
|
+
"run_ok": bool_or_none(row.get("run_ok")),
|
|
218
|
+
"output_files_ok": bool_or_none(row.get("output_files_ok")),
|
|
219
|
+
"quality_score": finite_number(row.get("quality_score")),
|
|
220
|
+
"preview": row.get("preview"),
|
|
221
|
+
"source_run_dir": str(run_dir),
|
|
222
|
+
"source_result_path": str(result_path) if result_path.exists() else None,
|
|
223
|
+
"notes": f"Imported from external duel result {source}",
|
|
224
|
+
"supervisor_state": state["supervisor_state"],
|
|
225
|
+
"partial_artifacts_possible": state["partial_artifacts_possible"],
|
|
226
|
+
"safe_to_retry_later": state["safe_to_retry_later"],
|
|
227
|
+
"provider_error": bool(provider.get("provider_error")),
|
|
228
|
+
"retryable_provider_error": bool(provider.get("retryable_provider_error")),
|
|
229
|
+
"usage_available": usage_available,
|
|
230
|
+
"attempt_count": 1,
|
|
231
|
+
"attempts": 1,
|
|
232
|
+
"retry_count": 0,
|
|
233
|
+
"retry_delays_ms": [],
|
|
234
|
+
"quota_percent_status": "unavailable",
|
|
235
|
+
"quota_percent_unavailable_reason": "historical_duel_missing_authoritative_quota_snapshot",
|
|
236
|
+
}
|
|
237
|
+
if not usage_available:
|
|
238
|
+
record["no_usage_reason"] = (
|
|
239
|
+
"provider_error_without_zcode_cli_usage"
|
|
240
|
+
if provider.get("provider_error")
|
|
241
|
+
else "historical_duel_missing_usage_json"
|
|
242
|
+
)
|
|
243
|
+
for key in ("provider_code", "provider_message", "provider_request_id", "provider_error_line", "provider_id", "provider_kind"):
|
|
244
|
+
if provider.get(key) is not None:
|
|
245
|
+
record[key] = provider[key]
|
|
246
|
+
wall_ms = finite_number(row.get("wall_ms"))
|
|
247
|
+
add_numeric(record, "duration_seconds", wall_ms / 1000 if wall_ms is not None else None)
|
|
248
|
+
for source_key, dest_key in (
|
|
249
|
+
("tokens_total", "tokens_total"),
|
|
250
|
+
("input_tokens", "input_tokens"),
|
|
251
|
+
("output_tokens", "output_tokens"),
|
|
252
|
+
("cache_read_tokens", "cache_read_tokens"),
|
|
253
|
+
("lines_added", "lines_added"),
|
|
254
|
+
("lines_deleted", "lines_deleted"),
|
|
255
|
+
):
|
|
256
|
+
add_numeric(record, dest_key, row.get(source_key))
|
|
257
|
+
changed_count = row_changed_count(row)
|
|
258
|
+
if changed_count is not None:
|
|
259
|
+
record["files_changed"] = float(changed_count)
|
|
260
|
+
return {key: value for key, value in record.items() if value is not None}
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def resolve_run_dir(raw_run_dir: Any, source: Path) -> Path:
|
|
264
|
+
if not isinstance(raw_run_dir, str) or not raw_run_dir.strip():
|
|
265
|
+
return source.parent
|
|
266
|
+
raw_path = Path(raw_run_dir)
|
|
267
|
+
if raw_path.is_absolute():
|
|
268
|
+
return raw_path
|
|
269
|
+
if source.parent.name == raw_path.name and (source.parent / "_control").exists():
|
|
270
|
+
return source.parent
|
|
271
|
+
for ancestor in source.parents:
|
|
272
|
+
candidate = ancestor / raw_path
|
|
273
|
+
if (candidate / "_control").exists() or (candidate / "results.json").exists():
|
|
274
|
+
return candidate
|
|
275
|
+
return source.parent
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def import_duel_results(args: Any) -> int:
|
|
279
|
+
payload = read_json(args.source)
|
|
280
|
+
rows = payload.get("rows") if isinstance(payload, dict) else None
|
|
281
|
+
if not isinstance(rows, list):
|
|
282
|
+
raise ValueError(f"{args.source}: expected a JSON object with rows[]")
|
|
283
|
+
run_dir = resolve_run_dir(payload.get("run_dir"), args.source)
|
|
284
|
+
existing_run_ids = {record.get("run_id") for record in load_records(args.path)}
|
|
285
|
+
imported = 0
|
|
286
|
+
skipped = 0
|
|
287
|
+
args.path.parent.mkdir(parents=True, exist_ok=True)
|
|
288
|
+
with args.path.open("a", encoding="utf-8") as handle:
|
|
289
|
+
for row in rows:
|
|
290
|
+
if not isinstance(row, dict):
|
|
291
|
+
continue
|
|
292
|
+
record = build_duel_record(row, source=args.source, run_dir=run_dir)
|
|
293
|
+
if record is None:
|
|
294
|
+
continue
|
|
295
|
+
if args.tool != "all" and record["tool"] != args.tool:
|
|
296
|
+
continue
|
|
297
|
+
if not args.allow_duplicates and record["run_id"] in existing_run_ids:
|
|
298
|
+
skipped += 1
|
|
299
|
+
continue
|
|
300
|
+
handle.write(json.dumps(record, sort_keys=True) + "\n")
|
|
301
|
+
existing_run_ids.add(record["run_id"])
|
|
302
|
+
imported += 1
|
|
303
|
+
print(json.dumps({"imported": imported, "path": str(args.path), "skipped": skipped}, indent=2, sort_keys=True))
|
|
304
|
+
return 0
|