zcode-supervisor 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+ """ZCode evaluation helpers."""
2
+
@@ -0,0 +1,304 @@
1
+ #!/usr/bin/env python3
2
+ """Import external supervisor duel rows into the local ZCode eval ledger."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import json
7
+ import math
8
+ import re
9
+ from datetime import datetime, timezone
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+ SUPPORTED_TOOLS = ("zcode", "claude-code-glm52")
14
+
15
+
16
+ def utc_now() -> str:
17
+ return datetime.now(timezone.utc).replace(microsecond=0).isoformat()
18
+
19
+
20
+ def finite_number(value: Any) -> float | None:
21
+ if isinstance(value, bool):
22
+ return None
23
+ if isinstance(value, (int, float)) and math.isfinite(value):
24
+ return float(value)
25
+ return None
26
+
27
+
28
+ def read_json(path: Path) -> dict[str, Any]:
29
+ return json.loads(path.read_text(encoding="utf-8"))
30
+
31
+
32
+ def load_records(path: Path) -> list[dict[str, Any]]:
33
+ if not path.exists():
34
+ return []
35
+ records: list[dict[str, Any]] = []
36
+ with path.open("r", encoding="utf-8") as handle:
37
+ for line in handle:
38
+ line = line.strip()
39
+ if line:
40
+ records.append(json.loads(line))
41
+ return records
42
+
43
+
44
+ def first_match(text: str, patterns: list[str]) -> str | None:
45
+ for pattern in patterns:
46
+ match = re.search(pattern, text)
47
+ if match and match.group(1):
48
+ return match.group(1)
49
+ return None
50
+
51
+
52
+ def classify_provider_error_text(*, stdout: str = "", stderr: str = "", exit_code: Any = None) -> dict[str, Any]:
53
+ text = f"{stderr}\n{stdout}"
54
+ try:
55
+ exit_code_number = int(exit_code)
56
+ except (TypeError, ValueError):
57
+ exit_code_number = None
58
+ exit_143 = exit_code_number == 143
59
+ provider_code = first_match(
60
+ text,
61
+ [
62
+ r"providerCode:\s*['\"]?(\d+)['\"]?",
63
+ r'"providerCode"\s*:\s*"(\d+)"',
64
+ r"\[(\d{3,})\]\[",
65
+ r"\bcode:\s*['\"]?(\d{3,})['\"]?",
66
+ r'"code"\s*:\s*"(\d{3,})"',
67
+ ],
68
+ )
69
+ temporary = bool(re.search(r"temporarily overloaded|try again later|overloaded_error", text, re.I)) or provider_code == "1305"
70
+ provider_business = bool(
71
+ re.search(r"ProviderBusinessError|PROVIDER_BUSINESS_ERROR|isProviderBusinessError:\s*true", text, re.I)
72
+ )
73
+ provider_error = provider_business or temporary or provider_code == "1305" or exit_143
74
+ provider_line = next(
75
+ (line for line in text.splitlines() if re.search(r"ProviderBusinessError|PROVIDER_BUSINESS_ERROR", line, re.I)),
76
+ None,
77
+ )
78
+ provider_message = first_match(
79
+ text,
80
+ [
81
+ r"providerMessage:\s*'([^']+)'",
82
+ r'providerMessage:\s*"([^"]+)"',
83
+ r'"providerMessage"\s*:\s*"([^"]+)"',
84
+ r"ProviderBusinessError:\s*([^\n]+)",
85
+ ],
86
+ )
87
+ return {
88
+ "provider_error": provider_error,
89
+ "provider_code": provider_code,
90
+ "provider_message": provider_message or ("ZCode CLI exited with code 143" if exit_143 else None),
91
+ "provider_request_id": first_match(
92
+ text,
93
+ [
94
+ r"providerRequestId:\s*'([^']+)'",
95
+ r'providerRequestId:\s*"([^"]+)"',
96
+ r'"providerRequestId"\s*:\s*"([^"]+)"',
97
+ r"request_id:\s*'([^']+)'",
98
+ r'"request_id"\s*:\s*"([^"]+)"',
99
+ ],
100
+ ),
101
+ "provider_error_line": provider_line,
102
+ "provider_id": first_match(
103
+ text,
104
+ [r"providerId:\s*'([^']+)'", r'providerId:\s*"([^"]+)"', r'"providerId"\s*:\s*"([^"]+)"'],
105
+ ),
106
+ "provider_kind": first_match(
107
+ text,
108
+ [r"providerKind:\s*'([^']+)'", r'providerKind:\s*"([^"]+)"', r'"providerKind"\s*:\s*"([^"]+)"'],
109
+ ),
110
+ "retryable_provider_error": provider_error and (temporary or exit_143),
111
+ }
112
+
113
+
114
+ def read_json_optional(path: Path) -> dict[str, Any]:
115
+ if not path.exists():
116
+ return {}
117
+ value = json.loads(path.read_text(encoding="utf-8"))
118
+ return value if isinstance(value, dict) else {}
119
+
120
+
121
+ def bool_or_none(value: Any) -> bool | None:
122
+ return value if isinstance(value, bool) else None
123
+
124
+
125
+ def row_changed_count(row: dict[str, Any]) -> int | None:
126
+ audit = row.get("audit")
127
+ if isinstance(audit, dict) and isinstance(audit.get("changed_count"), int):
128
+ return audit["changed_count"]
129
+ changed_files = row.get("changed_files")
130
+ if isinstance(changed_files, list):
131
+ return len(changed_files)
132
+ return None
133
+
134
+
135
+ def classify_duel_supervisor_state(row: dict[str, Any], provider: dict[str, Any]) -> dict[str, Any]:
136
+ if row.get("run_ok") is True:
137
+ return {"supervisor_state": "success", "partial_artifacts_possible": False, "safe_to_retry_later": False}
138
+ if not provider.get("provider_error"):
139
+ return {"supervisor_state": "cli_error", "partial_artifacts_possible": False, "safe_to_retry_later": False}
140
+ changed_count = row_changed_count(row)
141
+ audit = row.get("audit")
142
+ audit_ok = bool_or_none(audit.get("ok")) if isinstance(audit, dict) else None
143
+ if changed_count == 0 and provider.get("retryable_provider_error"):
144
+ return {"supervisor_state": "retryable_provider_error", "partial_artifacts_possible": False, "safe_to_retry_later": True}
145
+ if changed_count is not None and changed_count > 0 and audit_ok is True:
146
+ return {"supervisor_state": "partial_success", "partial_artifacts_possible": True, "safe_to_retry_later": False}
147
+ return {
148
+ "supervisor_state": "unsafe_partial",
149
+ "partial_artifacts_possible": changed_count is None or changed_count > 0,
150
+ "safe_to_retry_later": False,
151
+ }
152
+
153
+
154
+ def validation_summary(row: dict[str, Any]) -> str:
155
+ validation = row.get("validation")
156
+ if not isinstance(validation, dict):
157
+ return ""
158
+ status = "pass" if validation.get("ok") is True or row.get("validation_ok") is True else "fail"
159
+ returncode = validation.get("returncode")
160
+ return f"{status}; returncode={returncode}" if returncode is not None else status
161
+
162
+
163
+ def add_numeric(record: dict[str, Any], key: str, value: Any) -> None:
164
+ number = finite_number(value)
165
+ if number is not None:
166
+ record[key] = number
167
+
168
+
169
+ def duel_tool_name(raw_tool: Any) -> str | None:
170
+ if raw_tool == "zcode":
171
+ return "zcode"
172
+ if raw_tool == "glm":
173
+ return "claude-code-glm52"
174
+ if raw_tool in SUPPORTED_TOOLS:
175
+ return raw_tool
176
+ return None
177
+
178
+
179
+ def build_duel_record(row: dict[str, Any], *, source: Path, run_dir: Path) -> dict[str, Any] | None:
180
+ tool = duel_tool_name(row.get("tool"))
181
+ task_id = row.get("task_id")
182
+ if tool is None or not isinstance(task_id, str):
183
+ return None
184
+ raw_tool = row.get("tool")
185
+ result_path = run_dir / "_control" / str(raw_tool) / task_id / f"{raw_tool}-result.json"
186
+ if raw_tool == "zcode":
187
+ result_path = run_dir / "_control" / "zcode" / task_id / "zcode-result.json"
188
+ result_json = read_json_optional(result_path)
189
+ exit_code = result_json.get("exit_code")
190
+ error_exit = row.get("run_ok") is False or result_json.get("ok") is False or (
191
+ isinstance(exit_code, int) and exit_code != 0
192
+ )
193
+ provider = classify_provider_error_text(
194
+ stdout=str(result_json.get("stdout") or ""),
195
+ stderr=str(result_json.get("stderr") or ""),
196
+ exit_code=exit_code,
197
+ )
198
+ if not error_exit:
199
+ provider = {**provider, "provider_error": False, "retryable_provider_error": False}
200
+ state = classify_duel_supervisor_state(row, provider)
201
+ status = "partial" if state["supervisor_state"] == "partial_success" else "pass" if row.get("run_ok") is True else "fail"
202
+ usage_available = any(
203
+ finite_number(row.get(field)) is not None
204
+ for field in ("tokens_total", "input_tokens", "output_tokens", "cache_read_tokens")
205
+ )
206
+ record: dict[str, Any] = {
207
+ "recorded_at": utc_now(),
208
+ "run_id": f"duel-{run_dir.name}-{tool}-{task_id}",
209
+ "tool": tool,
210
+ "task_id": task_id,
211
+ "task_name": task_id.replace("_", " "),
212
+ "task_kind": row.get("kind"),
213
+ "status": status,
214
+ "validation": validation_summary(row),
215
+ "validation_ok": bool_or_none(row.get("validation_ok")),
216
+ "scope_ok": bool_or_none(row.get("scope_ok")),
217
+ "run_ok": bool_or_none(row.get("run_ok")),
218
+ "output_files_ok": bool_or_none(row.get("output_files_ok")),
219
+ "quality_score": finite_number(row.get("quality_score")),
220
+ "preview": row.get("preview"),
221
+ "source_run_dir": str(run_dir),
222
+ "source_result_path": str(result_path) if result_path.exists() else None,
223
+ "notes": f"Imported from external duel result {source}",
224
+ "supervisor_state": state["supervisor_state"],
225
+ "partial_artifacts_possible": state["partial_artifacts_possible"],
226
+ "safe_to_retry_later": state["safe_to_retry_later"],
227
+ "provider_error": bool(provider.get("provider_error")),
228
+ "retryable_provider_error": bool(provider.get("retryable_provider_error")),
229
+ "usage_available": usage_available,
230
+ "attempt_count": 1,
231
+ "attempts": 1,
232
+ "retry_count": 0,
233
+ "retry_delays_ms": [],
234
+ "quota_percent_status": "unavailable",
235
+ "quota_percent_unavailable_reason": "historical_duel_missing_authoritative_quota_snapshot",
236
+ }
237
+ if not usage_available:
238
+ record["no_usage_reason"] = (
239
+ "provider_error_without_zcode_cli_usage"
240
+ if provider.get("provider_error")
241
+ else "historical_duel_missing_usage_json"
242
+ )
243
+ for key in ("provider_code", "provider_message", "provider_request_id", "provider_error_line", "provider_id", "provider_kind"):
244
+ if provider.get(key) is not None:
245
+ record[key] = provider[key]
246
+ wall_ms = finite_number(row.get("wall_ms"))
247
+ add_numeric(record, "duration_seconds", wall_ms / 1000 if wall_ms is not None else None)
248
+ for source_key, dest_key in (
249
+ ("tokens_total", "tokens_total"),
250
+ ("input_tokens", "input_tokens"),
251
+ ("output_tokens", "output_tokens"),
252
+ ("cache_read_tokens", "cache_read_tokens"),
253
+ ("lines_added", "lines_added"),
254
+ ("lines_deleted", "lines_deleted"),
255
+ ):
256
+ add_numeric(record, dest_key, row.get(source_key))
257
+ changed_count = row_changed_count(row)
258
+ if changed_count is not None:
259
+ record["files_changed"] = float(changed_count)
260
+ return {key: value for key, value in record.items() if value is not None}
261
+
262
+
263
+ def resolve_run_dir(raw_run_dir: Any, source: Path) -> Path:
264
+ if not isinstance(raw_run_dir, str) or not raw_run_dir.strip():
265
+ return source.parent
266
+ raw_path = Path(raw_run_dir)
267
+ if raw_path.is_absolute():
268
+ return raw_path
269
+ if source.parent.name == raw_path.name and (source.parent / "_control").exists():
270
+ return source.parent
271
+ for ancestor in source.parents:
272
+ candidate = ancestor / raw_path
273
+ if (candidate / "_control").exists() or (candidate / "results.json").exists():
274
+ return candidate
275
+ return source.parent
276
+
277
+
278
+ def import_duel_results(args: Any) -> int:
279
+ payload = read_json(args.source)
280
+ rows = payload.get("rows") if isinstance(payload, dict) else None
281
+ if not isinstance(rows, list):
282
+ raise ValueError(f"{args.source}: expected a JSON object with rows[]")
283
+ run_dir = resolve_run_dir(payload.get("run_dir"), args.source)
284
+ existing_run_ids = {record.get("run_id") for record in load_records(args.path)}
285
+ imported = 0
286
+ skipped = 0
287
+ args.path.parent.mkdir(parents=True, exist_ok=True)
288
+ with args.path.open("a", encoding="utf-8") as handle:
289
+ for row in rows:
290
+ if not isinstance(row, dict):
291
+ continue
292
+ record = build_duel_record(row, source=args.source, run_dir=run_dir)
293
+ if record is None:
294
+ continue
295
+ if args.tool != "all" and record["tool"] != args.tool:
296
+ continue
297
+ if not args.allow_duplicates and record["run_id"] in existing_run_ids:
298
+ skipped += 1
299
+ continue
300
+ handle.write(json.dumps(record, sort_keys=True) + "\n")
301
+ existing_run_ids.add(record["run_id"])
302
+ imported += 1
303
+ print(json.dumps({"imported": imported, "path": str(args.path), "skipped": skipped}, indent=2, sort_keys=True))
304
+ return 0