tigrcorn-certification 0.3.16.dev5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,725 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import platform
6
+ import subprocess
7
+ import sys
8
+ import time
9
+ from dataclasses import dataclass, field
10
+ from pathlib import Path
11
+ from typing import Any, Mapping
12
+
13
+ DEFAULT_PERFORMANCE_MATRIX_PATH = Path('docs/review/performance/performance_matrix.json')
14
+ DEFAULT_BASELINE_ARTIFACT_ROOT = Path('docs/review/performance/artifacts/phase6_reference_baseline')
15
+ DEFAULT_CURRENT_ARTIFACT_ROOT = Path('docs/review/performance/artifacts/phase6_current_release')
16
+
17
+
18
+ @dataclass(slots=True)
19
+ class PerfProfile:
20
+ profile_id: str
21
+ family: str
22
+ description: str
23
+ driver: str
24
+ deployment_profile: str
25
+ lane: str = 'component_regression'
26
+ certification_platforms: list[str] = field(default_factory=list)
27
+ live_listener_required: bool = False
28
+ rfc_targets: list[str] = field(default_factory=list)
29
+ correctness_required: bool = False
30
+ hot_path: bool = False
31
+ iterations: int = 10
32
+ warmups: int = 1
33
+ units_per_iteration: int = 1
34
+ thresholds: dict[str, Any] = field(default_factory=dict)
35
+ relative_regression_budget: dict[str, Any] = field(default_factory=dict)
36
+ driver_config: dict[str, Any] = field(default_factory=dict)
37
+
38
+
39
+ @dataclass(slots=True)
40
+ class PerfMatrix:
41
+ matrix_name: str
42
+ baseline_artifact_root: str
43
+ current_artifact_root: str
44
+ profiles: list[PerfProfile]
45
+ metadata: dict[str, Any] = field(default_factory=dict)
46
+
47
+
48
+ @dataclass(slots=True)
49
+ class PerfProfileResult:
50
+ profile_id: str
51
+ passed: bool
52
+ artifact_dir: str
53
+ failure_reasons: list[str] = field(default_factory=list)
54
+ metrics: dict[str, Any] = field(default_factory=dict)
55
+ correctness: dict[str, Any] = field(default_factory=dict)
56
+ threshold_evaluation: dict[str, Any] = field(default_factory=dict)
57
+ relative_regression: dict[str, Any] = field(default_factory=dict)
58
+
59
+
60
+ @dataclass(slots=True)
61
+ class PerfRunSummary:
62
+ matrix_name: str
63
+ artifact_root: str
64
+ baseline_root: str | None
65
+ commit_hash: str
66
+ total: int
67
+ passed: int
68
+ failed: int
69
+ profiles: list[PerfProfileResult]
70
+
71
+
72
+ class PerfRunnerError(RuntimeError):
73
+ pass
74
+
75
+
76
+ def load_performance_matrix(path: str | Path) -> PerfMatrix:
77
+ payload = json.loads(Path(path).read_text(encoding='utf-8'))
78
+ matrix_platforms = [str(item) for item in payload.get('metadata', {}).get('certification_platforms', [])]
79
+ profiles = [
80
+ PerfProfile(
81
+ profile_id=item['profile_id'],
82
+ family=item['family'],
83
+ description=item['description'],
84
+ driver=item['driver'],
85
+ deployment_profile=item['deployment_profile'],
86
+ lane=str(item.get('lane', 'component_regression')),
87
+ certification_platforms=[str(entry) for entry in item.get('certification_platforms', matrix_platforms)],
88
+ live_listener_required=bool(item.get('live_listener_required', False)),
89
+ rfc_targets=list(item.get('rfc_targets', [])),
90
+ correctness_required=bool(item.get('correctness_required', False)),
91
+ hot_path=bool(item.get('hot_path', False)),
92
+ iterations=int(item.get('iterations', 10)),
93
+ warmups=int(item.get('warmups', 1)),
94
+ units_per_iteration=int(item.get('units_per_iteration', 1)),
95
+ thresholds=dict(item.get('thresholds', {})),
96
+ relative_regression_budget=dict(item.get('relative_regression_budget', {})),
97
+ driver_config=dict(item.get('driver_config', {})),
98
+ )
99
+ for item in payload.get('profiles', [])
100
+ ]
101
+ return PerfMatrix(
102
+ matrix_name=str(payload.get('matrix_name', 'tigrcorn-performance-matrix')),
103
+ baseline_artifact_root=str(payload.get('baseline_artifact_root', DEFAULT_BASELINE_ARTIFACT_ROOT.as_posix())),
104
+ current_artifact_root=str(payload.get('current_artifact_root', DEFAULT_CURRENT_ARTIFACT_ROOT.as_posix())),
105
+ profiles=profiles,
106
+ metadata=dict(payload.get('metadata', {})),
107
+ )
108
+
109
+
110
+ def run_performance_matrix(
111
+ source_root: str | Path,
112
+ *,
113
+ matrix_path: str | Path | None = None,
114
+ artifact_root: str | Path | None = None,
115
+ baseline_root: str | Path | None = None,
116
+ profile_ids: list[str] | None = None,
117
+ establish_baseline: bool = False,
118
+ ) -> PerfRunSummary:
119
+ source_root = Path(source_root)
120
+ matrix_file = source_root / (Path(matrix_path) if matrix_path is not None else DEFAULT_PERFORMANCE_MATRIX_PATH)
121
+ matrix = load_performance_matrix(matrix_file)
122
+ selected_ids = set(profile_ids or [profile.profile_id for profile in matrix.profiles])
123
+ selected_profiles = [profile for profile in matrix.profiles if profile.profile_id in selected_ids]
124
+ if not selected_profiles:
125
+ raise PerfRunnerError('no performance profiles selected')
126
+
127
+ if artifact_root is None:
128
+ default_root = matrix.baseline_artifact_root if establish_baseline else matrix.current_artifact_root
129
+ artifact_root = source_root / Path(default_root)
130
+ else:
131
+ artifact_root = source_root / Path(artifact_root)
132
+ artifact_root = Path(artifact_root)
133
+ artifact_root.mkdir(parents=True, exist_ok=True)
134
+
135
+ if baseline_root is None:
136
+ baseline_path = None if establish_baseline else source_root / Path(matrix.baseline_artifact_root)
137
+ else:
138
+ baseline_path = source_root / Path(baseline_root)
139
+
140
+ commit_hash = _resolve_commit_hash(source_root)
141
+ environment = _environment_snapshot(matrix=matrix, command=sys.argv)
142
+
143
+ from benchmarks.registry import get_driver
144
+
145
+ results: list[PerfProfileResult] = []
146
+ for profile in selected_profiles:
147
+ driver = get_driver(profile.driver)
148
+ measurement = driver(profile, source_root=source_root)
149
+ profile_dir = artifact_root / profile.profile_id
150
+ profile_dir.mkdir(parents=True, exist_ok=True)
151
+ metrics = _summarize_measurement(measurement, profile=profile)
152
+ threshold_eval, failures = _evaluate_thresholds(profile, metrics)
153
+ correctness = {
154
+ 'required': profile.correctness_required,
155
+ 'checks': measurement.get('correctness_checks', {}),
156
+ 'passed': all(measurement.get('correctness_checks', {}).values()) if profile.correctness_required else True,
157
+ 'note': measurement.get('correctness_note', 'same-stack correctness-under-load checks'),
158
+ 'lane': profile.lane,
159
+ 'live_listener_required': profile.live_listener_required,
160
+ }
161
+ if not correctness['passed']:
162
+ failures.append('correctness-under-load checks failed')
163
+ relative_regression = _evaluate_relative_regression(profile, metrics, baseline_path)
164
+ if relative_regression.get('evaluated') and not relative_regression.get('passed', True):
165
+ failures.extend(relative_regression.get('failure_reasons', []))
166
+ _write_profile_artifacts(
167
+ profile_dir,
168
+ profile=profile,
169
+ matrix=matrix,
170
+ commit_hash=commit_hash,
171
+ metrics=metrics,
172
+ environment=environment,
173
+ correctness=correctness,
174
+ threshold_evaluation=threshold_eval,
175
+ relative_regression=relative_regression,
176
+ measurement=measurement,
177
+ passed=not failures,
178
+ failure_reasons=failures,
179
+ )
180
+ results.append(
181
+ PerfProfileResult(
182
+ profile_id=profile.profile_id,
183
+ passed=not failures,
184
+ artifact_dir=str(profile_dir),
185
+ failure_reasons=failures,
186
+ metrics=metrics,
187
+ correctness=correctness,
188
+ threshold_evaluation=threshold_eval,
189
+ relative_regression=relative_regression,
190
+ )
191
+ )
192
+
193
+ summary = PerfRunSummary(
194
+ matrix_name=matrix.matrix_name,
195
+ artifact_root=str(artifact_root),
196
+ baseline_root=str(baseline_path) if baseline_path is not None else None,
197
+ commit_hash=commit_hash,
198
+ total=len(results),
199
+ passed=sum(1 for result in results if result.passed),
200
+ failed=sum(1 for result in results if not result.passed),
201
+ profiles=results,
202
+ )
203
+ _write_run_summary(artifact_root, summary, environment, profiles=selected_profiles)
204
+ return summary
205
+
206
+
207
+ def validate_performance_artifacts(
208
+ source_root: str | Path,
209
+ *,
210
+ matrix_path: str | Path | None = None,
211
+ artifact_root: str | Path | None = None,
212
+ baseline_root: str | Path | None = None,
213
+ require_relative_regression: bool = False,
214
+ ) -> list[str]:
215
+ source_root = Path(source_root)
216
+ matrix_file = source_root / (Path(matrix_path) if matrix_path is not None else DEFAULT_PERFORMANCE_MATRIX_PATH)
217
+ matrix = load_performance_matrix(matrix_file)
218
+ artifact_base = source_root / (Path(artifact_root) if artifact_root is not None else Path(matrix.current_artifact_root))
219
+ baseline_path = source_root / Path(baseline_root) if baseline_root is not None else None
220
+
221
+ failures: list[str] = []
222
+ if not artifact_base.exists():
223
+ return [f'missing performance artifact root: {artifact_base}']
224
+
225
+ for filename in ('summary.json', 'index.json'):
226
+ if not (artifact_base / filename).exists():
227
+ failures.append(f'missing performance summary file: {artifact_base / filename}')
228
+
229
+ for profile in matrix.profiles:
230
+ profile_dir = artifact_base / profile.profile_id
231
+ if not profile_dir.exists():
232
+ failures.append(f'missing profile artifact directory: {profile_dir}')
233
+ continue
234
+ required_files = ('result.json', 'summary.json', 'env.json', 'percentile_histogram.json', 'raw_samples.csv', 'command.json', 'correctness.json')
235
+ missing_for_profile = False
236
+ for filename in required_files:
237
+ if not (profile_dir / filename).exists():
238
+ failures.append(f'missing artifact file for {profile.profile_id}: {profile_dir / filename}')
239
+ missing_for_profile = True
240
+ if missing_for_profile:
241
+ continue
242
+ result = json.loads((profile_dir / 'result.json').read_text(encoding='utf-8'))
243
+ if result.get('profile_id') != profile.profile_id:
244
+ failures.append(f'{profile.profile_id} result.json does not match profile id')
245
+ if result.get('lane') != profile.lane:
246
+ failures.append(f'{profile.profile_id} result.json does not match configured lane')
247
+ if not result.get('passed', False):
248
+ failures.append(f'{profile.profile_id} performance artifact is failing: {result.get("failure_reasons", [])}')
249
+ if profile.correctness_required and not result.get('correctness', {}).get('passed', False):
250
+ failures.append(f'{profile.profile_id} is missing passing correctness-under-load evidence')
251
+ if require_relative_regression and not result.get('relative_regression', {}).get('evaluated', False):
252
+ failures.append(f'{profile.profile_id} did not evaluate relative regression against a baseline')
253
+ if baseline_path is not None and not (baseline_path / profile.profile_id / 'result.json').exists():
254
+ failures.append(f'missing baseline artifact for {profile.profile_id}: {baseline_path / profile.profile_id / "result.json"}')
255
+ return failures
256
+
257
+
258
+ def _resolve_commit_hash(source_root: Path) -> str:
259
+ env_value = os.environ.get('GIT_COMMIT') or os.environ.get('COMMIT_SHA')
260
+ if env_value:
261
+ return env_value
262
+ try:
263
+ completed = subprocess.run(
264
+ ['git', '-C', str(source_root), 'rev-parse', 'HEAD'],
265
+ capture_output=True,
266
+ text=True,
267
+ timeout=5.0,
268
+ check=True,
269
+ )
270
+ except Exception:
271
+ return 'unknown'
272
+ value = completed.stdout.strip()
273
+ return value or 'unknown'
274
+
275
+
276
+ def _environment_snapshot(*, matrix: PerfMatrix, command: list[str]) -> dict[str, Any]:
277
+ clock_info = time.get_clock_info('perf_counter')
278
+ platform_id = _default_platform_id()
279
+ return {
280
+ 'matrix_name': matrix.matrix_name,
281
+ 'python_version': platform.python_version(),
282
+ 'python_implementation': platform.python_implementation(),
283
+ 'platform': platform.platform(),
284
+ 'machine': platform.machine(),
285
+ 'processor': platform.processor(),
286
+ 'cpu_count': os.cpu_count(),
287
+ 'perf_counter_resolution': clock_info.resolution,
288
+ 'perf_counter_monotonic': clock_info.monotonic,
289
+ 'argv': list(command),
290
+ 'generated_at_epoch': time.time(),
291
+ 'certification_platform': platform_id,
292
+ 'matrix_declared_platforms': list(matrix.metadata.get('certification_platforms', [])),
293
+ }
294
+
295
+
296
+ def _summarize_measurement(measurement: Mapping[str, Any], *, profile: PerfProfile) -> dict[str, Any]:
297
+ samples = [float(item) for item in measurement.get('samples_ms', [])]
298
+ total_attempts = int(measurement.get('total_attempts', len(samples)))
299
+ total_units = int(measurement.get('total_units', profile.units_per_iteration * total_attempts))
300
+ total_duration = float(measurement.get('total_duration_seconds', 0.0))
301
+ throughput = 0.0 if total_duration <= 0 else float(total_units) / total_duration
302
+ error_count = int(measurement.get('error_count', 0))
303
+ error_rate = 0.0 if total_attempts <= 0 else error_count / float(total_attempts)
304
+ p50, p95, p99, p99_9 = _percentiles(samples)
305
+ protocol_stall_counts = {str(key): int(value) for key, value in dict(measurement.get('protocol_stall_counts', {})).items()}
306
+ protocol_stalls = sum(protocol_stall_counts.values())
307
+ time_to_first_byte_ms = _derive_time_to_first_byte(measurement, p50)
308
+ handshake_latency_ms = _derive_handshake_latency(measurement, p50, profile)
309
+ return {
310
+ 'sample_count': len(samples),
311
+ 'total_attempts': total_attempts,
312
+ 'total_units': total_units,
313
+ 'total_duration_seconds': total_duration,
314
+ 'throughput_ops_per_sec': throughput,
315
+ 'p50_ms': p50,
316
+ 'p95_ms': p95,
317
+ 'p99_ms': p99,
318
+ 'p99_9_ms': p99_9,
319
+ 'time_to_first_byte_ms': time_to_first_byte_ms,
320
+ 'handshake_latency_ms': handshake_latency_ms,
321
+ 'error_count': error_count,
322
+ 'error_rate': error_rate,
323
+ 'cpu_seconds': float(measurement.get('cpu_seconds', 0.0)),
324
+ 'rss_kib': float(measurement.get('rss_kib', 0.0)),
325
+ 'connections': int(measurement.get('connections', 0)),
326
+ 'streams': int(measurement.get('streams', 0)),
327
+ 'scheduler_rejections': int(measurement.get('scheduler_rejections', 0)),
328
+ 'protocol_stalls': protocol_stalls,
329
+ 'protocol_stall_counts': protocol_stall_counts,
330
+ 'profile_metadata': dict(measurement.get('metadata', {})),
331
+ 'lane': profile.lane,
332
+ 'certification_platforms': list(profile.certification_platforms),
333
+ 'live_listener_required': profile.live_listener_required,
334
+ }
335
+
336
+
337
+ def _evaluate_thresholds(profile: PerfProfile, metrics: Mapping[str, Any]) -> tuple[dict[str, Any], list[str]]:
338
+ failures: list[str] = []
339
+ thresholds = dict(profile.thresholds)
340
+ evaluation = {'thresholds': thresholds, 'checks': {}, 'passed': True}
341
+
342
+ def check(name: str, condition: bool, *, observed: Any, threshold: Any) -> None:
343
+ evaluation['checks'][name] = {'observed': observed, 'threshold': threshold, 'passed': condition}
344
+ if not condition:
345
+ failures.append(f'{profile.profile_id} failed threshold {name}: observed={observed!r} threshold={threshold!r}')
346
+
347
+ comparators = {
348
+ 'min_throughput_ops_per_sec': lambda observed, threshold: float(observed) >= float(threshold),
349
+ 'max_p50_ms': lambda observed, threshold: float(observed) <= float(threshold),
350
+ 'max_p95_ms': lambda observed, threshold: float(observed) <= float(threshold),
351
+ 'max_p99_ms': lambda observed, threshold: float(observed) <= float(threshold),
352
+ 'max_p99_9_ms': lambda observed, threshold: float(observed) <= float(threshold),
353
+ 'max_time_to_first_byte_ms': lambda observed, threshold: float(observed) <= float(threshold),
354
+ 'max_handshake_latency_ms': lambda observed, threshold: float(observed) <= float(threshold),
355
+ 'max_error_rate': lambda observed, threshold: float(observed) <= float(threshold),
356
+ 'max_scheduler_rejections': lambda observed, threshold: int(observed) <= int(threshold),
357
+ 'max_protocol_stalls': lambda observed, threshold: int(observed) <= int(threshold),
358
+ 'max_rss_kib': lambda observed, threshold: float(observed) <= float(threshold),
359
+ }
360
+ metric_map = {
361
+ 'min_throughput_ops_per_sec': 'throughput_ops_per_sec',
362
+ 'max_p50_ms': 'p50_ms',
363
+ 'max_p95_ms': 'p95_ms',
364
+ 'max_p99_ms': 'p99_ms',
365
+ 'max_p99_9_ms': 'p99_9_ms',
366
+ 'max_time_to_first_byte_ms': 'time_to_first_byte_ms',
367
+ 'max_handshake_latency_ms': 'handshake_latency_ms',
368
+ 'max_error_rate': 'error_rate',
369
+ 'max_scheduler_rejections': 'scheduler_rejections',
370
+ 'max_protocol_stalls': 'protocol_stalls',
371
+ 'max_rss_kib': 'rss_kib',
372
+ }
373
+ for threshold_key, comparator in comparators.items():
374
+ if threshold_key not in thresholds:
375
+ continue
376
+ metric_key = metric_map[threshold_key]
377
+ check(threshold_key, comparator(metrics[metric_key], thresholds[threshold_key]), observed=metrics[metric_key], threshold=thresholds[threshold_key])
378
+
379
+ evaluation['passed'] = not failures
380
+ return evaluation, failures
381
+
382
+
383
+ def _evaluate_relative_regression(profile: PerfProfile, metrics: Mapping[str, Any], baseline_root: Path | None) -> dict[str, Any]:
384
+ if baseline_root is None:
385
+ return {'evaluated': False, 'reason': 'no baseline root configured', 'passed': True}
386
+ baseline_file = baseline_root / profile.profile_id / 'result.json'
387
+ if not baseline_file.exists():
388
+ return {'evaluated': False, 'reason': f'missing baseline artifact {baseline_file}', 'passed': True}
389
+ baseline_payload = json.loads(baseline_file.read_text(encoding='utf-8'))
390
+ budget = dict(profile.relative_regression_budget)
391
+ failures: list[str] = []
392
+ checks: dict[str, Any] = {}
393
+
394
+ baseline_metrics = dict(baseline_payload.get('metrics', {}))
395
+ baseline_throughput = float(baseline_metrics.get('throughput_ops_per_sec', 0.0))
396
+ baseline_p99 = float(baseline_metrics.get('p99_ms', 0.0))
397
+ baseline_p99_9 = float(baseline_metrics.get('p99_9_ms', baseline_p99))
398
+ baseline_cpu = float(baseline_metrics.get('cpu_seconds', 0.0))
399
+ baseline_rss = float(baseline_metrics.get('rss_kib', 0.0))
400
+
401
+ throughput_drop = budget.get('max_throughput_drop_fraction')
402
+ if throughput_drop is not None and baseline_throughput > 0.0:
403
+ minimum_allowed = baseline_throughput * (1.0 - float(throughput_drop))
404
+ observed = float(metrics['throughput_ops_per_sec'])
405
+ passed = observed >= minimum_allowed
406
+ checks['throughput_drop_fraction'] = {
407
+ 'baseline': baseline_throughput,
408
+ 'observed': observed,
409
+ 'minimum_allowed': minimum_allowed,
410
+ 'passed': passed,
411
+ }
412
+ if not passed:
413
+ failures.append(f'{profile.profile_id} throughput regressed below allowed budget')
414
+
415
+ p99_increase = budget.get('max_p99_increase_fraction')
416
+ if p99_increase is not None and baseline_p99 > 0.0:
417
+ absolute_slack = float(budget.get('absolute_p99_slack_ms', 0.25))
418
+ maximum_allowed = max(baseline_p99 * (1.0 + float(p99_increase)), baseline_p99 + absolute_slack)
419
+ observed = float(metrics['p99_ms'])
420
+ passed = observed <= maximum_allowed
421
+ checks['p99_increase_fraction'] = {
422
+ 'baseline': baseline_p99,
423
+ 'observed': observed,
424
+ 'maximum_allowed': maximum_allowed,
425
+ 'absolute_slack_ms': absolute_slack,
426
+ 'passed': passed,
427
+ }
428
+ if not passed:
429
+ failures.append(f'{profile.profile_id} p99 latency regressed above allowed budget')
430
+
431
+ p99_9_increase = budget.get('max_p99_9_increase_fraction')
432
+ if p99_9_increase is not None and baseline_p99_9 > 0.0:
433
+ absolute_slack = float(budget.get('absolute_p99_9_slack_ms', 0.5))
434
+ maximum_allowed = max(baseline_p99_9 * (1.0 + float(p99_9_increase)), baseline_p99_9 + absolute_slack)
435
+ observed = float(metrics['p99_9_ms'])
436
+ passed = observed <= maximum_allowed
437
+ checks['p99_9_increase_fraction'] = {
438
+ 'baseline': baseline_p99_9,
439
+ 'observed': observed,
440
+ 'maximum_allowed': maximum_allowed,
441
+ 'absolute_slack_ms': absolute_slack,
442
+ 'passed': passed,
443
+ }
444
+ if not passed:
445
+ failures.append(f'{profile.profile_id} p99.9 latency regressed above allowed budget')
446
+
447
+ cpu_increase = budget.get('max_cpu_increase_fraction')
448
+ if cpu_increase is not None:
449
+ absolute_slack = float(budget.get('absolute_cpu_slack_seconds', 0.01))
450
+ maximum_allowed = baseline_cpu * (1.0 + float(cpu_increase)) + absolute_slack
451
+ observed = float(metrics['cpu_seconds'])
452
+ passed = observed <= maximum_allowed
453
+ checks['cpu_increase_fraction'] = {
454
+ 'baseline': baseline_cpu,
455
+ 'observed': observed,
456
+ 'maximum_allowed': maximum_allowed,
457
+ 'absolute_slack_seconds': absolute_slack,
458
+ 'passed': passed,
459
+ }
460
+ if not passed:
461
+ failures.append(f'{profile.profile_id} cpu time regressed above allowed budget')
462
+
463
+ rss_increase = budget.get('max_rss_increase_fraction')
464
+ if rss_increase is not None:
465
+ absolute_slack = float(budget.get('absolute_rss_slack_kib', 1024.0))
466
+ maximum_allowed = baseline_rss * (1.0 + float(rss_increase)) + absolute_slack
467
+ observed = float(metrics['rss_kib'])
468
+ passed = observed <= maximum_allowed
469
+ checks['rss_increase_fraction'] = {
470
+ 'baseline': baseline_rss,
471
+ 'observed': observed,
472
+ 'maximum_allowed': maximum_allowed,
473
+ 'absolute_rss_slack_kib': absolute_slack,
474
+ 'passed': passed,
475
+ }
476
+ if not passed:
477
+ failures.append(f'{profile.profile_id} rss regressed above allowed budget')
478
+
479
+ return {
480
+ 'evaluated': True,
481
+ 'baseline_root': str(baseline_root),
482
+ 'baseline_profile': str(baseline_file),
483
+ 'checks': checks,
484
+ 'failure_reasons': failures,
485
+ 'passed': not failures,
486
+ }
487
+
488
+
489
+ def _jsonable(value: Any) -> Any:
490
+ if isinstance(value, (str, int, float, bool)) or value is None:
491
+ return value
492
+ if isinstance(value, bytes):
493
+ try:
494
+ return value.decode('utf-8')
495
+ except UnicodeDecodeError:
496
+ return value.hex()
497
+ if isinstance(value, Path):
498
+ return str(value)
499
+ if isinstance(value, Mapping):
500
+ return {str(key): _jsonable(item) for key, item in value.items()}
501
+ if isinstance(value, (list, tuple, set)):
502
+ return [_jsonable(item) for item in value]
503
+ return repr(value)
504
+
505
+
506
+ def _write_profile_artifacts(
507
+ profile_dir: Path,
508
+ *,
509
+ profile: PerfProfile,
510
+ matrix: PerfMatrix,
511
+ commit_hash: str,
512
+ metrics: Mapping[str, Any],
513
+ environment: Mapping[str, Any],
514
+ correctness: Mapping[str, Any],
515
+ threshold_evaluation: Mapping[str, Any],
516
+ relative_regression: Mapping[str, Any],
517
+ measurement: Mapping[str, Any],
518
+ passed: bool,
519
+ failure_reasons: list[str],
520
+ ) -> None:
521
+ histogram = _build_histogram([float(item) for item in measurement.get('samples_ms', [])])
522
+ percentile_payload = {
523
+ 'profile_id': profile.profile_id,
524
+ 'p50_ms': metrics['p50_ms'],
525
+ 'p95_ms': metrics['p95_ms'],
526
+ 'p99_ms': metrics['p99_ms'],
527
+ 'p99_9_ms': metrics['p99_9_ms'],
528
+ 'time_to_first_byte_ms': metrics['time_to_first_byte_ms'],
529
+ 'handshake_latency_ms': metrics['handshake_latency_ms'],
530
+ 'histogram': histogram,
531
+ }
532
+ command_payload = {
533
+ 'argv': list(environment.get('argv', [])),
534
+ 'profile_id': profile.profile_id,
535
+ 'driver': profile.driver,
536
+ 'deployment_profile': profile.deployment_profile,
537
+ 'lane': profile.lane,
538
+ 'certification_platforms': list(profile.certification_platforms),
539
+ 'live_listener_required': profile.live_listener_required,
540
+ }
541
+ result_payload = {
542
+ 'profile_id': profile.profile_id,
543
+ 'family': profile.family,
544
+ 'description': profile.description,
545
+ 'driver': profile.driver,
546
+ 'deployment_profile': profile.deployment_profile,
547
+ 'lane': profile.lane,
548
+ 'certification_platforms': list(profile.certification_platforms),
549
+ 'live_listener_required': profile.live_listener_required,
550
+ 'rfc_targets': list(profile.rfc_targets),
551
+ 'commit_hash': commit_hash,
552
+ 'passed': passed,
553
+ 'metrics': dict(metrics),
554
+ 'correctness': dict(correctness),
555
+ 'threshold_evaluation': dict(threshold_evaluation),
556
+ 'relative_regression': dict(relative_regression),
557
+ 'failure_reasons': list(failure_reasons),
558
+ 'matrix_name': matrix.matrix_name,
559
+ }
560
+ summary_payload = {
561
+ 'profile_id': profile.profile_id,
562
+ 'lane': profile.lane,
563
+ 'deployment_profile': profile.deployment_profile,
564
+ 'passed': passed,
565
+ 'metrics': {
566
+ 'throughput_ops_per_sec': metrics['throughput_ops_per_sec'],
567
+ 'p50_ms': metrics['p50_ms'],
568
+ 'p95_ms': metrics['p95_ms'],
569
+ 'p99_ms': metrics['p99_ms'],
570
+ 'p99_9_ms': metrics['p99_9_ms'],
571
+ 'time_to_first_byte_ms': metrics['time_to_first_byte_ms'],
572
+ 'handshake_latency_ms': metrics['handshake_latency_ms'],
573
+ 'error_rate': metrics['error_rate'],
574
+ 'cpu_seconds': metrics['cpu_seconds'],
575
+ 'rss_kib': metrics['rss_kib'],
576
+ 'scheduler_rejections': metrics['scheduler_rejections'],
577
+ 'protocol_stalls': metrics['protocol_stalls'],
578
+ },
579
+ 'certification_platforms': list(profile.certification_platforms),
580
+ 'live_listener_required': profile.live_listener_required,
581
+ 'failure_reasons': list(failure_reasons),
582
+ }
583
+ files = {
584
+ 'result.json': result_payload,
585
+ 'summary.json': summary_payload,
586
+ 'env.json': dict(environment),
587
+ 'percentile_histogram.json': percentile_payload,
588
+ 'command.json': command_payload,
589
+ 'correctness.json': dict(correctness),
590
+ }
591
+ for filename, payload in files.items():
592
+ (profile_dir / filename).write_text(json.dumps(_jsonable(payload), indent=2, sort_keys=True) + '\n', encoding='utf-8')
593
+ _write_samples_csv(profile_dir / 'raw_samples.csv', measurement.get('samples_ms', []))
594
+
595
+
596
+ def _write_samples_csv(path: Path, samples: list[Any]) -> None:
597
+ lines = ['index,latency_ms']
598
+ for index, value in enumerate(samples, start=1):
599
+ lines.append(f'{index},{float(value):.9f}')
600
+ path.write_text('\n'.join(lines) + '\n', encoding='utf-8')
601
+
602
+
603
+ def _write_run_summary(artifact_root: Path, summary: PerfRunSummary, environment: Mapping[str, Any], *, profiles: list[PerfProfile]) -> None:
604
+ lane_counts: dict[str, int] = {}
605
+ for profile in profiles:
606
+ lane_counts[profile.lane] = lane_counts.get(profile.lane, 0) + 1
607
+ payload = {
608
+ 'matrix_name': summary.matrix_name,
609
+ 'artifact_root': summary.artifact_root,
610
+ 'baseline_root': summary.baseline_root,
611
+ 'commit_hash': summary.commit_hash,
612
+ 'total': summary.total,
613
+ 'passed': summary.passed,
614
+ 'failed': summary.failed,
615
+ 'lane_counts': lane_counts,
616
+ 'certification_platform': environment.get('certification_platform'),
617
+ 'profiles': [
618
+ {
619
+ 'profile_id': result.profile_id,
620
+ 'passed': result.passed,
621
+ 'artifact_dir': result.artifact_dir,
622
+ 'failure_reasons': result.failure_reasons,
623
+ }
624
+ for result in summary.profiles
625
+ ],
626
+ 'generated_at_epoch': environment.get('generated_at_epoch'),
627
+ }
628
+ (artifact_root / 'summary.json').write_text(json.dumps(_jsonable(payload), indent=2, sort_keys=True) + '\n', encoding='utf-8')
629
+ (artifact_root / 'index.json').write_text(json.dumps(_jsonable(payload), indent=2, sort_keys=True) + '\n', encoding='utf-8')
630
+
631
+
632
+ def _percentiles(samples: list[float]) -> tuple[float, float, float, float]:
633
+ if not samples:
634
+ return 0.0, 0.0, 0.0, 0.0
635
+ ordered = sorted(samples)
636
+ return (
637
+ _percentile(ordered, 50.0),
638
+ _percentile(ordered, 95.0),
639
+ _percentile(ordered, 99.0),
640
+ _percentile(ordered, 99.9),
641
+ )
642
+
643
+
644
+ def _percentile(sorted_samples: list[float], pct: float) -> float:
645
+ if not sorted_samples:
646
+ return 0.0
647
+ if len(sorted_samples) == 1:
648
+ return float(sorted_samples[0])
649
+ rank = (pct / 100.0) * (len(sorted_samples) - 1)
650
+ low = int(rank)
651
+ high = min(low + 1, len(sorted_samples) - 1)
652
+ frac = rank - low
653
+ return float(sorted_samples[low] + ((sorted_samples[high] - sorted_samples[low]) * frac))
654
+
655
+
656
+ def _build_histogram(samples: list[float], *, bucket_count: int = 8) -> list[dict[str, Any]]:
657
+ if not samples:
658
+ return []
659
+ values = sorted(samples)
660
+ minimum = values[0]
661
+ maximum = values[-1]
662
+ if minimum == maximum:
663
+ return [{'lower_ms': minimum, 'upper_ms': maximum, 'count': len(values)}]
664
+ span = maximum - minimum
665
+ bucket_size = span / float(bucket_count)
666
+ buckets = [{'lower_ms': minimum + (bucket_size * index), 'upper_ms': minimum + (bucket_size * (index + 1)), 'count': 0} for index in range(bucket_count)]
667
+ for value in values:
668
+ offset = int(min(bucket_count - 1, (value - minimum) / bucket_size))
669
+ buckets[offset]['count'] += 1
670
+ return buckets
671
+
672
+
673
+ def _derive_time_to_first_byte(measurement: Mapping[str, Any], default: float) -> float:
674
+ explicit = measurement.get('time_to_first_byte_ms')
675
+ if explicit is not None:
676
+ return float(explicit)
677
+ samples = measurement.get('time_to_first_byte_samples_ms')
678
+ if isinstance(samples, list) and samples:
679
+ ordered = sorted(float(item) for item in samples)
680
+ return _percentile(ordered, 50.0)
681
+ return float(default)
682
+
683
+
684
+ def _derive_handshake_latency(measurement: Mapping[str, Any], default: float, profile: PerfProfile) -> float:
685
+ explicit = measurement.get('handshake_latency_ms')
686
+ if explicit is not None:
687
+ return float(explicit)
688
+ samples = measurement.get('handshake_latency_samples_ms')
689
+ if isinstance(samples, list) and samples:
690
+ ordered = sorted(float(item) for item in samples)
691
+ return _percentile(ordered, 50.0)
692
+ if _profile_expects_handshake(profile):
693
+ return float(default)
694
+ return 0.0
695
+
696
+
697
+ def _profile_expects_handshake(profile: PerfProfile) -> bool:
698
+ deployment = profile.deployment_profile.lower()
699
+ return (
700
+ profile.family == 'TLS / PKI'
701
+ or 'tls' in deployment
702
+ or 'quic' in deployment
703
+ or 'http3' in deployment
704
+ or 'websocket_http3' in deployment
705
+ )
706
+
707
+
708
+ def _default_platform_id() -> str:
709
+ implementation = platform.python_implementation().lower()
710
+ return f"{platform.system().lower()}-{platform.machine().lower()}-{implementation}{sys.version_info.major}.{sys.version_info.minor}"
711
+
712
+
713
+ __all__ = [
714
+ 'DEFAULT_BASELINE_ARTIFACT_ROOT',
715
+ 'DEFAULT_CURRENT_ARTIFACT_ROOT',
716
+ 'DEFAULT_PERFORMANCE_MATRIX_PATH',
717
+ 'PerfMatrix',
718
+ 'PerfProfile',
719
+ 'PerfProfileResult',
720
+ 'PerfRunSummary',
721
+ 'PerfRunnerError',
722
+ 'load_performance_matrix',
723
+ 'run_performance_matrix',
724
+ 'validate_performance_artifacts',
725
+ ]