svgap 0.3.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- svgap/__init__.py +3 -0
- svgap/__main__.py +5 -0
- svgap/adjudication.py +524 -0
- svgap/audit.py +384 -0
- svgap/backends/__init__.py +1 -0
- svgap/backends/base.py +12 -0
- svgap/backends/reference_yosys.py +477 -0
- svgap/backends/registry.py +56 -0
- svgap/challenge.py +247 -0
- svgap/cli.py +561 -0
- svgap/demo.py +112 -0
- svgap/demo_assets/safe/design.sv +27 -0
- svgap/demo_assets/safe/manifest.toml +32 -0
- svgap/demo_assets/tb.sv +25 -0
- svgap/demo_assets/unsafe/design.sv +13 -0
- svgap/demo_assets/unsafe/manifest.toml +32 -0
- svgap/functional.py +165 -0
- svgap/legibility.py +178 -0
- svgap/manifest.py +152 -0
- svgap/model.py +90 -0
- svgap/onboarding.py +61 -0
- svgap/pilot.py +129 -0
- svgap/provenance.py +37 -0
- svgap/reporting.py +138 -0
- svgap/study.py +78 -0
- svgap/validation.py +97 -0
- svgap-0.3.0a1.dist-info/METADATA +265 -0
- svgap-0.3.0a1.dist-info/RECORD +33 -0
- svgap-0.3.0a1.dist-info/WHEEL +5 -0
- svgap-0.3.0a1.dist-info/entry_points.txt +5 -0
- svgap-0.3.0a1.dist-info/licenses/LICENSE +201 -0
- svgap-0.3.0a1.dist-info/licenses/NOTICE +5 -0
- svgap-0.3.0a1.dist-info/top_level.txt +1 -0
svgap/__init__.py
ADDED
svgap/__main__.py
ADDED
svgap/adjudication.py
ADDED
|
@@ -0,0 +1,524 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import csv
|
|
4
|
+
import hashlib
|
|
5
|
+
import hmac
|
|
6
|
+
import json
|
|
7
|
+
from dataclasses import asdict, dataclass, field
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Literal, Protocol
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
Verdict = Literal[
|
|
14
|
+
"hazard_demonstrated", "no_divergence_observed", "inconclusive"
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TraceError(ValueError):
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class InstrumenterUnavailable(RuntimeError):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass(frozen=True)
|
|
27
|
+
class TraceSample:
|
|
28
|
+
cycle: int
|
|
29
|
+
clock: str
|
|
30
|
+
values: dict[str, str]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass(frozen=True)
|
|
34
|
+
class Trace:
|
|
35
|
+
schema_version: str
|
|
36
|
+
trace_id: str
|
|
37
|
+
candidate_digest: str
|
|
38
|
+
observer: dict[str, str]
|
|
39
|
+
signals: tuple[str, ...]
|
|
40
|
+
samples: tuple[TraceSample, ...]
|
|
41
|
+
provenance: dict[str, Any] = field(default_factory=dict)
|
|
42
|
+
|
|
43
|
+
def to_dict(self) -> dict[str, Any]:
|
|
44
|
+
return {
|
|
45
|
+
"schema_version": self.schema_version,
|
|
46
|
+
"trace_id": self.trace_id,
|
|
47
|
+
"candidate_digest": self.candidate_digest,
|
|
48
|
+
"observer": self.observer,
|
|
49
|
+
"signals": list(self.signals),
|
|
50
|
+
"samples": [asdict(sample) for sample in self.samples],
|
|
51
|
+
"provenance": self.provenance,
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass(frozen=True)
|
|
56
|
+
class Divergence:
|
|
57
|
+
cycle: int
|
|
58
|
+
signal: str
|
|
59
|
+
golden: str
|
|
60
|
+
observed: str
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass(frozen=True)
|
|
64
|
+
class ComparisonResult:
|
|
65
|
+
equivalent: bool
|
|
66
|
+
matched_shift: int | None = None
|
|
67
|
+
first_divergence: Divergence | None = None
|
|
68
|
+
diagnostics: tuple[str, ...] = ()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class Instrumenter(Protocol):
|
|
72
|
+
name: str
|
|
73
|
+
version: str
|
|
74
|
+
mode: str
|
|
75
|
+
|
|
76
|
+
def trace_for_seed(self, seed: int) -> Trace: ...
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class MockPrerecordedInstrumenter:
|
|
80
|
+
name = "mock-prerecorded"
|
|
81
|
+
version = "1.0"
|
|
82
|
+
mode = "mock_prerecorded"
|
|
83
|
+
|
|
84
|
+
def __init__(self, traces: dict[int, Trace]):
|
|
85
|
+
self.traces = traces
|
|
86
|
+
|
|
87
|
+
def trace_for_seed(self, seed: int) -> Trace:
|
|
88
|
+
try:
|
|
89
|
+
return self.traces[seed]
|
|
90
|
+
except KeyError as exc:
|
|
91
|
+
raise InstrumenterUnavailable(f"no prerecorded trace for seed {seed}") from exc
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class ResetReleaseSkewInstrumenter:
|
|
95
|
+
"""Capability marker; deliberately contains no perturbation implementation."""
|
|
96
|
+
|
|
97
|
+
name = "reset-release-skew"
|
|
98
|
+
version = "unavailable"
|
|
99
|
+
mode = "unavailable"
|
|
100
|
+
|
|
101
|
+
def trace_for_seed(self, seed: int) -> Trace:
|
|
102
|
+
raise InstrumenterUnavailable(
|
|
103
|
+
"ResetReleaseSkewInstrumenter is BLOCKED_PENDING_PATENT_AND_EMPLOYER_REVIEW; "
|
|
104
|
+
"no netlist rewrite or reset-skew execution is included"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def load_trace(path: Path) -> Trace:
|
|
109
|
+
try:
|
|
110
|
+
payload = json.loads(path.read_text(encoding="utf-8"))
|
|
111
|
+
except (OSError, json.JSONDecodeError) as exc:
|
|
112
|
+
raise TraceError(f"cannot read trace {path}: {exc}") from exc
|
|
113
|
+
return trace_from_dict(payload)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def trace_from_dict(payload: Any) -> Trace:
|
|
117
|
+
if not isinstance(payload, dict):
|
|
118
|
+
raise TraceError("trace must be an object")
|
|
119
|
+
allowed = {
|
|
120
|
+
"schema_version",
|
|
121
|
+
"trace_id",
|
|
122
|
+
"candidate_digest",
|
|
123
|
+
"observer",
|
|
124
|
+
"signals",
|
|
125
|
+
"samples",
|
|
126
|
+
"provenance",
|
|
127
|
+
}
|
|
128
|
+
required = allowed - {"provenance"}
|
|
129
|
+
if set(payload) - allowed or not required.issubset(payload):
|
|
130
|
+
raise TraceError("trace fields do not match schema v1")
|
|
131
|
+
if payload["schema_version"] != "1.0":
|
|
132
|
+
raise TraceError("unsupported trace schema_version")
|
|
133
|
+
if not isinstance(payload["trace_id"], str) or not payload["trace_id"].strip():
|
|
134
|
+
raise TraceError("trace_id must be a nonempty string")
|
|
135
|
+
if (
|
|
136
|
+
not isinstance(payload["candidate_digest"], str)
|
|
137
|
+
or not payload["candidate_digest"].strip()
|
|
138
|
+
):
|
|
139
|
+
raise TraceError("candidate_digest must be a nonempty string")
|
|
140
|
+
signals = payload["signals"]
|
|
141
|
+
if (
|
|
142
|
+
not isinstance(signals, list)
|
|
143
|
+
or not signals
|
|
144
|
+
or len(signals) != len(set(signals))
|
|
145
|
+
or not all(isinstance(item, str) and item for item in signals)
|
|
146
|
+
):
|
|
147
|
+
raise TraceError("signals must be a nonempty unique string array")
|
|
148
|
+
observer = payload["observer"]
|
|
149
|
+
if not isinstance(observer, dict) or set(observer) != {"name", "version", "sampling"}:
|
|
150
|
+
raise TraceError("observer fields do not match schema v1")
|
|
151
|
+
if not all(isinstance(value, str) and value for value in observer.values()):
|
|
152
|
+
raise TraceError("observer fields must be nonempty strings")
|
|
153
|
+
samples_raw = payload["samples"]
|
|
154
|
+
if not isinstance(samples_raw, list) or not samples_raw:
|
|
155
|
+
raise TraceError("samples must be a nonempty array")
|
|
156
|
+
samples: list[TraceSample] = []
|
|
157
|
+
prior_cycle = -1
|
|
158
|
+
for item in samples_raw:
|
|
159
|
+
if not isinstance(item, dict) or set(item) != {"cycle", "clock", "values"}:
|
|
160
|
+
raise TraceError("sample fields do not match schema v1")
|
|
161
|
+
cycle, clock, values = item["cycle"], item["clock"], item["values"]
|
|
162
|
+
if not isinstance(cycle, int) or cycle < 0 or cycle <= prior_cycle:
|
|
163
|
+
raise TraceError("sample cycles must be strictly increasing nonnegative integers")
|
|
164
|
+
if not isinstance(clock, str) or not clock:
|
|
165
|
+
raise TraceError("sample clock must be nonempty")
|
|
166
|
+
if not isinstance(values, dict) or set(values) != set(signals):
|
|
167
|
+
raise TraceError("every sample must contain exactly the declared signals")
|
|
168
|
+
if not all(
|
|
169
|
+
isinstance(key, str) and isinstance(value, str) and bool(value)
|
|
170
|
+
for key, value in values.items()
|
|
171
|
+
):
|
|
172
|
+
raise TraceError("sample values must map strings to nonempty strings")
|
|
173
|
+
samples.append(TraceSample(cycle=cycle, clock=clock, values=dict(values)))
|
|
174
|
+
prior_cycle = cycle
|
|
175
|
+
provenance = payload.get("provenance", {})
|
|
176
|
+
if not isinstance(provenance, dict):
|
|
177
|
+
raise TraceError("trace provenance must be an object")
|
|
178
|
+
return Trace(
|
|
179
|
+
schema_version="1.0",
|
|
180
|
+
trace_id=payload["trace_id"],
|
|
181
|
+
candidate_digest=payload["candidate_digest"],
|
|
182
|
+
observer={key: str(value) for key, value in observer.items()},
|
|
183
|
+
signals=tuple(signals),
|
|
184
|
+
samples=tuple(samples),
|
|
185
|
+
provenance=dict(provenance),
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def trace_digest(trace: Trace) -> str:
|
|
190
|
+
payload = json.dumps(trace.to_dict(), sort_keys=True, separators=(",", ":"))
|
|
191
|
+
return "sha256:" + hashlib.sha256(payload.encode()).hexdigest()
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def derive_seed_bits(
|
|
195
|
+
study_digest: str, candidate_digest: str, seed: int, width: int
|
|
196
|
+
) -> tuple[int, ...]:
|
|
197
|
+
if seed < 0 or width < 0:
|
|
198
|
+
raise ValueError("seed and width must be nonnegative")
|
|
199
|
+
key = hashlib.sha256(study_digest.encode()).digest()
|
|
200
|
+
output: list[int] = []
|
|
201
|
+
counter = 0
|
|
202
|
+
while len(output) < width:
|
|
203
|
+
message = f"{candidate_digest}:{seed}:{counter}".encode()
|
|
204
|
+
block = hmac.new(key, message, hashlib.sha256).digest()
|
|
205
|
+
output.extend((byte >> bit) & 1 for byte in block for bit in range(8))
|
|
206
|
+
counter += 1
|
|
207
|
+
return tuple(output[:width])
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def compare_traces(
|
|
211
|
+
golden: Trace,
|
|
212
|
+
observed: Trace,
|
|
213
|
+
*,
|
|
214
|
+
max_shift: int = 0,
|
|
215
|
+
warmup_samples: int = 0,
|
|
216
|
+
x_policy: Literal["exact", "golden_x_wildcard"] = "exact",
|
|
217
|
+
) -> ComparisonResult:
|
|
218
|
+
if max_shift < 0 or warmup_samples < 0:
|
|
219
|
+
raise TraceError("max_shift and warmup_samples must be nonnegative")
|
|
220
|
+
if golden.candidate_digest != observed.candidate_digest:
|
|
221
|
+
raise TraceError("trace candidate digests differ")
|
|
222
|
+
if golden.signals != observed.signals:
|
|
223
|
+
raise TraceError("trace signal declarations differ")
|
|
224
|
+
if warmup_samples >= min(len(golden.samples), len(observed.samples)):
|
|
225
|
+
raise TraceError("warmup removes every comparable sample")
|
|
226
|
+
attempts: list[tuple[int, int, Divergence | None]] = []
|
|
227
|
+
for shift in range(max_shift + 1):
|
|
228
|
+
golden_end = len(golden.samples) - shift if shift else len(golden.samples)
|
|
229
|
+
golden_samples = golden.samples[warmup_samples:golden_end]
|
|
230
|
+
observed_samples = observed.samples[warmup_samples + shift :]
|
|
231
|
+
if not golden_samples or not observed_samples:
|
|
232
|
+
continue
|
|
233
|
+
if len(golden_samples) != len(observed_samples):
|
|
234
|
+
attempts.append(
|
|
235
|
+
(
|
|
236
|
+
0,
|
|
237
|
+
shift,
|
|
238
|
+
Divergence(
|
|
239
|
+
observed_samples[0].cycle,
|
|
240
|
+
"<trace-length>",
|
|
241
|
+
str(len(golden_samples)),
|
|
242
|
+
str(len(observed_samples)),
|
|
243
|
+
),
|
|
244
|
+
)
|
|
245
|
+
)
|
|
246
|
+
continue
|
|
247
|
+
first: Divergence | None = None
|
|
248
|
+
prefix = 0
|
|
249
|
+
for index in range(len(golden_samples)):
|
|
250
|
+
left = golden_samples[index]
|
|
251
|
+
right = observed_samples[index]
|
|
252
|
+
if left.clock != right.clock:
|
|
253
|
+
first = Divergence(right.cycle, "<clock>", left.clock, right.clock)
|
|
254
|
+
break
|
|
255
|
+
mismatch = first_value_mismatch(left.values, right.values, golden.signals, x_policy)
|
|
256
|
+
if mismatch:
|
|
257
|
+
signal, expected, actual = mismatch
|
|
258
|
+
first = Divergence(right.cycle, signal, expected, actual)
|
|
259
|
+
break
|
|
260
|
+
prefix += 1
|
|
261
|
+
if first is None:
|
|
262
|
+
return ComparisonResult(equivalent=True, matched_shift=shift)
|
|
263
|
+
attempts.append((prefix, shift, first))
|
|
264
|
+
if not attempts:
|
|
265
|
+
raise TraceError("traces have no comparable samples")
|
|
266
|
+
_prefix, shift, divergence = max(attempts, key=lambda item: (item[0], -item[1]))
|
|
267
|
+
return ComparisonResult(
|
|
268
|
+
equivalent=False,
|
|
269
|
+
matched_shift=None,
|
|
270
|
+
first_divergence=divergence,
|
|
271
|
+
diagnostics=(f"no global shift in 0..{max_shift} matched; best attempted shift={shift}",),
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def first_value_mismatch(
|
|
276
|
+
golden: dict[str, str],
|
|
277
|
+
observed: dict[str, str],
|
|
278
|
+
signals: tuple[str, ...],
|
|
279
|
+
x_policy: str,
|
|
280
|
+
) -> tuple[str, str, str] | None:
|
|
281
|
+
if x_policy not in {"exact", "golden_x_wildcard"}:
|
|
282
|
+
raise TraceError(f"unsupported x_policy: {x_policy}")
|
|
283
|
+
for signal in signals:
|
|
284
|
+
expected, actual = golden[signal], observed[signal]
|
|
285
|
+
if x_policy == "golden_x_wildcard" and set(expected.lower()) <= {"x", "z"}:
|
|
286
|
+
continue
|
|
287
|
+
if expected != actual:
|
|
288
|
+
return signal, expected, actual
|
|
289
|
+
return None
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def adjudicate_prerecorded(
|
|
293
|
+
*,
|
|
294
|
+
candidate_id: str,
|
|
295
|
+
rule_id: str,
|
|
296
|
+
golden: Trace,
|
|
297
|
+
instrumenter: MockPrerecordedInstrumenter,
|
|
298
|
+
seeds: list[int],
|
|
299
|
+
semantics_name: str,
|
|
300
|
+
semantics_version: str,
|
|
301
|
+
calibration_status: Literal["pass", "fail", "not_run"],
|
|
302
|
+
calibration_suite_digest: str,
|
|
303
|
+
max_shift: int = 0,
|
|
304
|
+
warmup_samples: int = 0,
|
|
305
|
+
) -> dict[str, Any]:
|
|
306
|
+
diagnostics: list[str] = []
|
|
307
|
+
reproducer = None
|
|
308
|
+
completed = 0
|
|
309
|
+
verdict: Verdict = "no_divergence_observed"
|
|
310
|
+
if calibration_status != "pass":
|
|
311
|
+
verdict = "inconclusive"
|
|
312
|
+
diagnostics.append("calibration gate did not pass")
|
|
313
|
+
else:
|
|
314
|
+
for seed in seeds:
|
|
315
|
+
try:
|
|
316
|
+
observed = instrumenter.trace_for_seed(seed)
|
|
317
|
+
comparison = compare_traces(
|
|
318
|
+
golden,
|
|
319
|
+
observed,
|
|
320
|
+
max_shift=max_shift,
|
|
321
|
+
warmup_samples=warmup_samples,
|
|
322
|
+
)
|
|
323
|
+
except (InstrumenterUnavailable, TraceError) as exc:
|
|
324
|
+
diagnostics.append(f"seed {seed}: {exc}")
|
|
325
|
+
verdict = "inconclusive"
|
|
326
|
+
continue
|
|
327
|
+
completed += 1
|
|
328
|
+
if not comparison.equivalent:
|
|
329
|
+
verdict = "hazard_demonstrated"
|
|
330
|
+
divergence = comparison.first_divergence
|
|
331
|
+
if divergence is None:
|
|
332
|
+
raise AssertionError("non-equivalent comparison lacks divergence")
|
|
333
|
+
reproducer = {
|
|
334
|
+
"seed": seed,
|
|
335
|
+
"observed_trace_digest": trace_digest(observed),
|
|
336
|
+
"first_divergence": asdict(divergence),
|
|
337
|
+
}
|
|
338
|
+
diagnostics.extend(comparison.diagnostics)
|
|
339
|
+
break
|
|
340
|
+
if completed == 0 and verdict != "hazard_demonstrated":
|
|
341
|
+
verdict = "inconclusive"
|
|
342
|
+
return {
|
|
343
|
+
"schema_version": "1.0",
|
|
344
|
+
"candidate_id": candidate_id,
|
|
345
|
+
"candidate_digest": golden.candidate_digest,
|
|
346
|
+
"rule_id": rule_id,
|
|
347
|
+
"semantics": {"name": semantics_name, "version": semantics_version},
|
|
348
|
+
"instrumenter": {
|
|
349
|
+
"name": instrumenter.name,
|
|
350
|
+
"version": instrumenter.version,
|
|
351
|
+
"mode": instrumenter.mode,
|
|
352
|
+
},
|
|
353
|
+
"observer": {
|
|
354
|
+
"name": golden.observer["name"],
|
|
355
|
+
"version": golden.observer["version"],
|
|
356
|
+
},
|
|
357
|
+
"verdict": verdict,
|
|
358
|
+
"seed_budget": len(seeds),
|
|
359
|
+
"seeds_completed": completed,
|
|
360
|
+
"golden_trace_digest": trace_digest(golden),
|
|
361
|
+
"reproducer": reproducer,
|
|
362
|
+
"calibration": {
|
|
363
|
+
"status": calibration_status,
|
|
364
|
+
"suite_digest": calibration_suite_digest,
|
|
365
|
+
},
|
|
366
|
+
"diagnostics": diagnostics,
|
|
367
|
+
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def trace_from_csv(
|
|
372
|
+
path: Path,
|
|
373
|
+
*,
|
|
374
|
+
trace_id: str,
|
|
375
|
+
candidate_digest: str,
|
|
376
|
+
observer_name: str,
|
|
377
|
+
observer_version: str,
|
|
378
|
+
sampling: str,
|
|
379
|
+
) -> Trace:
|
|
380
|
+
for name, value in (
|
|
381
|
+
("trace_id", trace_id),
|
|
382
|
+
("candidate_digest", candidate_digest),
|
|
383
|
+
("observer_name", observer_name),
|
|
384
|
+
("observer_version", observer_version),
|
|
385
|
+
("sampling", sampling),
|
|
386
|
+
):
|
|
387
|
+
if not value.strip():
|
|
388
|
+
raise TraceError(f"{name} must be nonempty")
|
|
389
|
+
grouped: dict[tuple[int, str], dict[str, str]] = {}
|
|
390
|
+
signals: set[str] = set()
|
|
391
|
+
try:
|
|
392
|
+
with path.open(newline="", encoding="utf-8") as handle:
|
|
393
|
+
reader = csv.DictReader(handle)
|
|
394
|
+
if reader.fieldnames != ["cycle", "clock", "signal", "value"]:
|
|
395
|
+
raise TraceError("trace CSV header must be cycle,clock,signal,value")
|
|
396
|
+
for row in reader:
|
|
397
|
+
cycle = int(row["cycle"])
|
|
398
|
+
clock, signal, value = row["clock"], row["signal"], row["value"]
|
|
399
|
+
if cycle < 0 or not clock or not signal or not value:
|
|
400
|
+
raise TraceError("trace CSV contains an invalid row")
|
|
401
|
+
key = (cycle, clock)
|
|
402
|
+
values = grouped.setdefault(key, {})
|
|
403
|
+
if signal in values:
|
|
404
|
+
raise TraceError(f"duplicate trace value at cycle {cycle}: {signal}")
|
|
405
|
+
values[signal] = value
|
|
406
|
+
signals.add(signal)
|
|
407
|
+
except (OSError, ValueError) as exc:
|
|
408
|
+
raise TraceError(f"cannot normalize trace CSV: {exc}") from exc
|
|
409
|
+
ordered_signals = tuple(sorted(signals))
|
|
410
|
+
samples = tuple(
|
|
411
|
+
TraceSample(cycle=cycle, clock=clock, values=values)
|
|
412
|
+
for (cycle, clock), values in sorted(grouped.items())
|
|
413
|
+
)
|
|
414
|
+
if not samples or any(set(sample.values) != set(ordered_signals) for sample in samples):
|
|
415
|
+
raise TraceError("every CSV sample must contain every observed signal")
|
|
416
|
+
return Trace(
|
|
417
|
+
schema_version="1.0",
|
|
418
|
+
trace_id=trace_id,
|
|
419
|
+
candidate_digest=candidate_digest,
|
|
420
|
+
observer={"name": observer_name, "version": observer_version, "sampling": sampling},
|
|
421
|
+
signals=ordered_signals,
|
|
422
|
+
samples=samples,
|
|
423
|
+
provenance={"source": path.name, "source_sha256": hashlib.sha256(path.read_bytes()).hexdigest()},
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def run_calibration_suite(
|
|
428
|
+
path: Path, *, max_shift: int = 0, warmup_samples: int = 0
|
|
429
|
+
) -> dict[str, Any]:
|
|
430
|
+
path = path.resolve()
|
|
431
|
+
try:
|
|
432
|
+
raw = path.read_bytes()
|
|
433
|
+
payload = json.loads(raw)
|
|
434
|
+
except (OSError, json.JSONDecodeError) as exc:
|
|
435
|
+
raise TraceError(f"cannot read calibration suite: {exc}") from exc
|
|
436
|
+
if not isinstance(payload, dict) or set(payload) != {
|
|
437
|
+
"schema_version",
|
|
438
|
+
"suite_id",
|
|
439
|
+
"instrumenter",
|
|
440
|
+
"cases",
|
|
441
|
+
}:
|
|
442
|
+
raise TraceError("calibration suite fields do not match schema v1")
|
|
443
|
+
if payload["schema_version"] != "1.0" or payload["instrumenter"] != "mock_prerecorded":
|
|
444
|
+
raise TraceError("unsupported calibration suite")
|
|
445
|
+
if not isinstance(payload["suite_id"], str) or not payload["suite_id"].strip():
|
|
446
|
+
raise TraceError("calibration suite_id must be nonempty")
|
|
447
|
+
cases = payload["cases"]
|
|
448
|
+
if not isinstance(cases, list) or len(cases) < 2:
|
|
449
|
+
raise TraceError("calibration suite requires at least two cases")
|
|
450
|
+
outcomes = []
|
|
451
|
+
files = [path]
|
|
452
|
+
for case in cases:
|
|
453
|
+
if not isinstance(case, dict) or set(case) != {
|
|
454
|
+
"case_id",
|
|
455
|
+
"expected",
|
|
456
|
+
"golden",
|
|
457
|
+
"observed",
|
|
458
|
+
}:
|
|
459
|
+
raise TraceError("calibration case fields do not match schema v1")
|
|
460
|
+
if not isinstance(case["case_id"], str) or not case["case_id"].strip():
|
|
461
|
+
raise TraceError("calibration case_id must be nonempty")
|
|
462
|
+
if case["expected"] not in {
|
|
463
|
+
"hazard_demonstrated",
|
|
464
|
+
"no_divergence_observed",
|
|
465
|
+
"inconclusive",
|
|
466
|
+
}:
|
|
467
|
+
raise TraceError("calibration expected verdict is unsupported")
|
|
468
|
+
if not isinstance(case["golden"], str) or not case["golden"]:
|
|
469
|
+
raise TraceError("calibration golden path must be nonempty")
|
|
470
|
+
if (
|
|
471
|
+
not isinstance(case["observed"], list)
|
|
472
|
+
or not case["observed"]
|
|
473
|
+
or not all(isinstance(item, str) and item for item in case["observed"])
|
|
474
|
+
):
|
|
475
|
+
raise TraceError("calibration observed paths must be a nonempty string array")
|
|
476
|
+
golden_path = (path.parent / case["golden"]).resolve()
|
|
477
|
+
observed_paths = [(path.parent / item).resolve() for item in case["observed"]]
|
|
478
|
+
if not golden_path.is_relative_to(path.parent) or any(
|
|
479
|
+
not item.is_relative_to(path.parent) for item in observed_paths
|
|
480
|
+
):
|
|
481
|
+
raise TraceError("calibration trace paths must remain inside the suite directory")
|
|
482
|
+
files.extend([golden_path, *observed_paths])
|
|
483
|
+
try:
|
|
484
|
+
golden = load_trace(golden_path)
|
|
485
|
+
actual: Verdict = "no_divergence_observed"
|
|
486
|
+
diagnostics: list[str] = []
|
|
487
|
+
for observed_path in observed_paths:
|
|
488
|
+
observed = load_trace(observed_path)
|
|
489
|
+
comparison = compare_traces(
|
|
490
|
+
golden,
|
|
491
|
+
observed,
|
|
492
|
+
max_shift=max_shift,
|
|
493
|
+
warmup_samples=warmup_samples,
|
|
494
|
+
)
|
|
495
|
+
if not comparison.equivalent:
|
|
496
|
+
actual = "hazard_demonstrated"
|
|
497
|
+
break
|
|
498
|
+
except TraceError as exc:
|
|
499
|
+
actual = "inconclusive"
|
|
500
|
+
diagnostics = [str(exc)]
|
|
501
|
+
outcomes.append(
|
|
502
|
+
{
|
|
503
|
+
"case_id": case["case_id"],
|
|
504
|
+
"expected": case["expected"],
|
|
505
|
+
"actual": actual,
|
|
506
|
+
"pass": actual == case["expected"],
|
|
507
|
+
"diagnostics": diagnostics,
|
|
508
|
+
}
|
|
509
|
+
)
|
|
510
|
+
digest = hashlib.sha256()
|
|
511
|
+
for file_path in sorted(set(files)):
|
|
512
|
+
relative = file_path.relative_to(path.parent).as_posix()
|
|
513
|
+
data = file_path.read_bytes()
|
|
514
|
+
digest.update(relative.encode())
|
|
515
|
+
digest.update(b"\0")
|
|
516
|
+
digest.update(data)
|
|
517
|
+
digest.update(b"\0")
|
|
518
|
+
return {
|
|
519
|
+
"schema_version": "1.0",
|
|
520
|
+
"suite_id": payload["suite_id"],
|
|
521
|
+
"suite_digest": "sha256:" + digest.hexdigest(),
|
|
522
|
+
"status": "pass" if all(item["pass"] for item in outcomes) else "fail",
|
|
523
|
+
"cases": outcomes,
|
|
524
|
+
}
|