tigrcorn-certification 0.3.16.dev5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tigrcorn_certification/__init__.py +55 -0
- tigrcorn_certification/aioquic_preflight.py +449 -0
- tigrcorn_certification/certification_env.py +419 -0
- tigrcorn_certification/conformance.py +42 -0
- tigrcorn_certification/explicit_surfaces.py +130 -0
- tigrcorn_certification/interop_runner.py +2017 -0
- tigrcorn_certification/perf_runner.py +725 -0
- tigrcorn_certification/py.typed +1 -0
- tigrcorn_certification/release_gates.py +1354 -0
- tigrcorn_certification-0.3.16.dev5.dist-info/METADATA +242 -0
- tigrcorn_certification-0.3.16.dev5.dist-info/RECORD +14 -0
- tigrcorn_certification-0.3.16.dev5.dist-info/WHEEL +5 -0
- tigrcorn_certification-0.3.16.dev5.dist-info/licenses/LICENSE +163 -0
- tigrcorn_certification-0.3.16.dev5.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,725 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import platform
|
|
6
|
+
import subprocess
|
|
7
|
+
import sys
|
|
8
|
+
import time
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, Mapping
|
|
12
|
+
|
|
13
|
+
DEFAULT_PERFORMANCE_MATRIX_PATH = Path('docs/review/performance/performance_matrix.json')
|
|
14
|
+
DEFAULT_BASELINE_ARTIFACT_ROOT = Path('docs/review/performance/artifacts/phase6_reference_baseline')
|
|
15
|
+
DEFAULT_CURRENT_ARTIFACT_ROOT = Path('docs/review/performance/artifacts/phase6_current_release')
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass(slots=True)
|
|
19
|
+
class PerfProfile:
|
|
20
|
+
profile_id: str
|
|
21
|
+
family: str
|
|
22
|
+
description: str
|
|
23
|
+
driver: str
|
|
24
|
+
deployment_profile: str
|
|
25
|
+
lane: str = 'component_regression'
|
|
26
|
+
certification_platforms: list[str] = field(default_factory=list)
|
|
27
|
+
live_listener_required: bool = False
|
|
28
|
+
rfc_targets: list[str] = field(default_factory=list)
|
|
29
|
+
correctness_required: bool = False
|
|
30
|
+
hot_path: bool = False
|
|
31
|
+
iterations: int = 10
|
|
32
|
+
warmups: int = 1
|
|
33
|
+
units_per_iteration: int = 1
|
|
34
|
+
thresholds: dict[str, Any] = field(default_factory=dict)
|
|
35
|
+
relative_regression_budget: dict[str, Any] = field(default_factory=dict)
|
|
36
|
+
driver_config: dict[str, Any] = field(default_factory=dict)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass(slots=True)
|
|
40
|
+
class PerfMatrix:
|
|
41
|
+
matrix_name: str
|
|
42
|
+
baseline_artifact_root: str
|
|
43
|
+
current_artifact_root: str
|
|
44
|
+
profiles: list[PerfProfile]
|
|
45
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass(slots=True)
|
|
49
|
+
class PerfProfileResult:
|
|
50
|
+
profile_id: str
|
|
51
|
+
passed: bool
|
|
52
|
+
artifact_dir: str
|
|
53
|
+
failure_reasons: list[str] = field(default_factory=list)
|
|
54
|
+
metrics: dict[str, Any] = field(default_factory=dict)
|
|
55
|
+
correctness: dict[str, Any] = field(default_factory=dict)
|
|
56
|
+
threshold_evaluation: dict[str, Any] = field(default_factory=dict)
|
|
57
|
+
relative_regression: dict[str, Any] = field(default_factory=dict)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass(slots=True)
|
|
61
|
+
class PerfRunSummary:
|
|
62
|
+
matrix_name: str
|
|
63
|
+
artifact_root: str
|
|
64
|
+
baseline_root: str | None
|
|
65
|
+
commit_hash: str
|
|
66
|
+
total: int
|
|
67
|
+
passed: int
|
|
68
|
+
failed: int
|
|
69
|
+
profiles: list[PerfProfileResult]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class PerfRunnerError(RuntimeError):
|
|
73
|
+
pass
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def load_performance_matrix(path: str | Path) -> PerfMatrix:
|
|
77
|
+
payload = json.loads(Path(path).read_text(encoding='utf-8'))
|
|
78
|
+
matrix_platforms = [str(item) for item in payload.get('metadata', {}).get('certification_platforms', [])]
|
|
79
|
+
profiles = [
|
|
80
|
+
PerfProfile(
|
|
81
|
+
profile_id=item['profile_id'],
|
|
82
|
+
family=item['family'],
|
|
83
|
+
description=item['description'],
|
|
84
|
+
driver=item['driver'],
|
|
85
|
+
deployment_profile=item['deployment_profile'],
|
|
86
|
+
lane=str(item.get('lane', 'component_regression')),
|
|
87
|
+
certification_platforms=[str(entry) for entry in item.get('certification_platforms', matrix_platforms)],
|
|
88
|
+
live_listener_required=bool(item.get('live_listener_required', False)),
|
|
89
|
+
rfc_targets=list(item.get('rfc_targets', [])),
|
|
90
|
+
correctness_required=bool(item.get('correctness_required', False)),
|
|
91
|
+
hot_path=bool(item.get('hot_path', False)),
|
|
92
|
+
iterations=int(item.get('iterations', 10)),
|
|
93
|
+
warmups=int(item.get('warmups', 1)),
|
|
94
|
+
units_per_iteration=int(item.get('units_per_iteration', 1)),
|
|
95
|
+
thresholds=dict(item.get('thresholds', {})),
|
|
96
|
+
relative_regression_budget=dict(item.get('relative_regression_budget', {})),
|
|
97
|
+
driver_config=dict(item.get('driver_config', {})),
|
|
98
|
+
)
|
|
99
|
+
for item in payload.get('profiles', [])
|
|
100
|
+
]
|
|
101
|
+
return PerfMatrix(
|
|
102
|
+
matrix_name=str(payload.get('matrix_name', 'tigrcorn-performance-matrix')),
|
|
103
|
+
baseline_artifact_root=str(payload.get('baseline_artifact_root', DEFAULT_BASELINE_ARTIFACT_ROOT.as_posix())),
|
|
104
|
+
current_artifact_root=str(payload.get('current_artifact_root', DEFAULT_CURRENT_ARTIFACT_ROOT.as_posix())),
|
|
105
|
+
profiles=profiles,
|
|
106
|
+
metadata=dict(payload.get('metadata', {})),
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def run_performance_matrix(
|
|
111
|
+
source_root: str | Path,
|
|
112
|
+
*,
|
|
113
|
+
matrix_path: str | Path | None = None,
|
|
114
|
+
artifact_root: str | Path | None = None,
|
|
115
|
+
baseline_root: str | Path | None = None,
|
|
116
|
+
profile_ids: list[str] | None = None,
|
|
117
|
+
establish_baseline: bool = False,
|
|
118
|
+
) -> PerfRunSummary:
|
|
119
|
+
source_root = Path(source_root)
|
|
120
|
+
matrix_file = source_root / (Path(matrix_path) if matrix_path is not None else DEFAULT_PERFORMANCE_MATRIX_PATH)
|
|
121
|
+
matrix = load_performance_matrix(matrix_file)
|
|
122
|
+
selected_ids = set(profile_ids or [profile.profile_id for profile in matrix.profiles])
|
|
123
|
+
selected_profiles = [profile for profile in matrix.profiles if profile.profile_id in selected_ids]
|
|
124
|
+
if not selected_profiles:
|
|
125
|
+
raise PerfRunnerError('no performance profiles selected')
|
|
126
|
+
|
|
127
|
+
if artifact_root is None:
|
|
128
|
+
default_root = matrix.baseline_artifact_root if establish_baseline else matrix.current_artifact_root
|
|
129
|
+
artifact_root = source_root / Path(default_root)
|
|
130
|
+
else:
|
|
131
|
+
artifact_root = source_root / Path(artifact_root)
|
|
132
|
+
artifact_root = Path(artifact_root)
|
|
133
|
+
artifact_root.mkdir(parents=True, exist_ok=True)
|
|
134
|
+
|
|
135
|
+
if baseline_root is None:
|
|
136
|
+
baseline_path = None if establish_baseline else source_root / Path(matrix.baseline_artifact_root)
|
|
137
|
+
else:
|
|
138
|
+
baseline_path = source_root / Path(baseline_root)
|
|
139
|
+
|
|
140
|
+
commit_hash = _resolve_commit_hash(source_root)
|
|
141
|
+
environment = _environment_snapshot(matrix=matrix, command=sys.argv)
|
|
142
|
+
|
|
143
|
+
from benchmarks.registry import get_driver
|
|
144
|
+
|
|
145
|
+
results: list[PerfProfileResult] = []
|
|
146
|
+
for profile in selected_profiles:
|
|
147
|
+
driver = get_driver(profile.driver)
|
|
148
|
+
measurement = driver(profile, source_root=source_root)
|
|
149
|
+
profile_dir = artifact_root / profile.profile_id
|
|
150
|
+
profile_dir.mkdir(parents=True, exist_ok=True)
|
|
151
|
+
metrics = _summarize_measurement(measurement, profile=profile)
|
|
152
|
+
threshold_eval, failures = _evaluate_thresholds(profile, metrics)
|
|
153
|
+
correctness = {
|
|
154
|
+
'required': profile.correctness_required,
|
|
155
|
+
'checks': measurement.get('correctness_checks', {}),
|
|
156
|
+
'passed': all(measurement.get('correctness_checks', {}).values()) if profile.correctness_required else True,
|
|
157
|
+
'note': measurement.get('correctness_note', 'same-stack correctness-under-load checks'),
|
|
158
|
+
'lane': profile.lane,
|
|
159
|
+
'live_listener_required': profile.live_listener_required,
|
|
160
|
+
}
|
|
161
|
+
if not correctness['passed']:
|
|
162
|
+
failures.append('correctness-under-load checks failed')
|
|
163
|
+
relative_regression = _evaluate_relative_regression(profile, metrics, baseline_path)
|
|
164
|
+
if relative_regression.get('evaluated') and not relative_regression.get('passed', True):
|
|
165
|
+
failures.extend(relative_regression.get('failure_reasons', []))
|
|
166
|
+
_write_profile_artifacts(
|
|
167
|
+
profile_dir,
|
|
168
|
+
profile=profile,
|
|
169
|
+
matrix=matrix,
|
|
170
|
+
commit_hash=commit_hash,
|
|
171
|
+
metrics=metrics,
|
|
172
|
+
environment=environment,
|
|
173
|
+
correctness=correctness,
|
|
174
|
+
threshold_evaluation=threshold_eval,
|
|
175
|
+
relative_regression=relative_regression,
|
|
176
|
+
measurement=measurement,
|
|
177
|
+
passed=not failures,
|
|
178
|
+
failure_reasons=failures,
|
|
179
|
+
)
|
|
180
|
+
results.append(
|
|
181
|
+
PerfProfileResult(
|
|
182
|
+
profile_id=profile.profile_id,
|
|
183
|
+
passed=not failures,
|
|
184
|
+
artifact_dir=str(profile_dir),
|
|
185
|
+
failure_reasons=failures,
|
|
186
|
+
metrics=metrics,
|
|
187
|
+
correctness=correctness,
|
|
188
|
+
threshold_evaluation=threshold_eval,
|
|
189
|
+
relative_regression=relative_regression,
|
|
190
|
+
)
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
summary = PerfRunSummary(
|
|
194
|
+
matrix_name=matrix.matrix_name,
|
|
195
|
+
artifact_root=str(artifact_root),
|
|
196
|
+
baseline_root=str(baseline_path) if baseline_path is not None else None,
|
|
197
|
+
commit_hash=commit_hash,
|
|
198
|
+
total=len(results),
|
|
199
|
+
passed=sum(1 for result in results if result.passed),
|
|
200
|
+
failed=sum(1 for result in results if not result.passed),
|
|
201
|
+
profiles=results,
|
|
202
|
+
)
|
|
203
|
+
_write_run_summary(artifact_root, summary, environment, profiles=selected_profiles)
|
|
204
|
+
return summary
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def validate_performance_artifacts(
|
|
208
|
+
source_root: str | Path,
|
|
209
|
+
*,
|
|
210
|
+
matrix_path: str | Path | None = None,
|
|
211
|
+
artifact_root: str | Path | None = None,
|
|
212
|
+
baseline_root: str | Path | None = None,
|
|
213
|
+
require_relative_regression: bool = False,
|
|
214
|
+
) -> list[str]:
|
|
215
|
+
source_root = Path(source_root)
|
|
216
|
+
matrix_file = source_root / (Path(matrix_path) if matrix_path is not None else DEFAULT_PERFORMANCE_MATRIX_PATH)
|
|
217
|
+
matrix = load_performance_matrix(matrix_file)
|
|
218
|
+
artifact_base = source_root / (Path(artifact_root) if artifact_root is not None else Path(matrix.current_artifact_root))
|
|
219
|
+
baseline_path = source_root / Path(baseline_root) if baseline_root is not None else None
|
|
220
|
+
|
|
221
|
+
failures: list[str] = []
|
|
222
|
+
if not artifact_base.exists():
|
|
223
|
+
return [f'missing performance artifact root: {artifact_base}']
|
|
224
|
+
|
|
225
|
+
for filename in ('summary.json', 'index.json'):
|
|
226
|
+
if not (artifact_base / filename).exists():
|
|
227
|
+
failures.append(f'missing performance summary file: {artifact_base / filename}')
|
|
228
|
+
|
|
229
|
+
for profile in matrix.profiles:
|
|
230
|
+
profile_dir = artifact_base / profile.profile_id
|
|
231
|
+
if not profile_dir.exists():
|
|
232
|
+
failures.append(f'missing profile artifact directory: {profile_dir}')
|
|
233
|
+
continue
|
|
234
|
+
required_files = ('result.json', 'summary.json', 'env.json', 'percentile_histogram.json', 'raw_samples.csv', 'command.json', 'correctness.json')
|
|
235
|
+
missing_for_profile = False
|
|
236
|
+
for filename in required_files:
|
|
237
|
+
if not (profile_dir / filename).exists():
|
|
238
|
+
failures.append(f'missing artifact file for {profile.profile_id}: {profile_dir / filename}')
|
|
239
|
+
missing_for_profile = True
|
|
240
|
+
if missing_for_profile:
|
|
241
|
+
continue
|
|
242
|
+
result = json.loads((profile_dir / 'result.json').read_text(encoding='utf-8'))
|
|
243
|
+
if result.get('profile_id') != profile.profile_id:
|
|
244
|
+
failures.append(f'{profile.profile_id} result.json does not match profile id')
|
|
245
|
+
if result.get('lane') != profile.lane:
|
|
246
|
+
failures.append(f'{profile.profile_id} result.json does not match configured lane')
|
|
247
|
+
if not result.get('passed', False):
|
|
248
|
+
failures.append(f'{profile.profile_id} performance artifact is failing: {result.get("failure_reasons", [])}')
|
|
249
|
+
if profile.correctness_required and not result.get('correctness', {}).get('passed', False):
|
|
250
|
+
failures.append(f'{profile.profile_id} is missing passing correctness-under-load evidence')
|
|
251
|
+
if require_relative_regression and not result.get('relative_regression', {}).get('evaluated', False):
|
|
252
|
+
failures.append(f'{profile.profile_id} did not evaluate relative regression against a baseline')
|
|
253
|
+
if baseline_path is not None and not (baseline_path / profile.profile_id / 'result.json').exists():
|
|
254
|
+
failures.append(f'missing baseline artifact for {profile.profile_id}: {baseline_path / profile.profile_id / "result.json"}')
|
|
255
|
+
return failures
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def _resolve_commit_hash(source_root: Path) -> str:
|
|
259
|
+
env_value = os.environ.get('GIT_COMMIT') or os.environ.get('COMMIT_SHA')
|
|
260
|
+
if env_value:
|
|
261
|
+
return env_value
|
|
262
|
+
try:
|
|
263
|
+
completed = subprocess.run(
|
|
264
|
+
['git', '-C', str(source_root), 'rev-parse', 'HEAD'],
|
|
265
|
+
capture_output=True,
|
|
266
|
+
text=True,
|
|
267
|
+
timeout=5.0,
|
|
268
|
+
check=True,
|
|
269
|
+
)
|
|
270
|
+
except Exception:
|
|
271
|
+
return 'unknown'
|
|
272
|
+
value = completed.stdout.strip()
|
|
273
|
+
return value or 'unknown'
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _environment_snapshot(*, matrix: PerfMatrix, command: list[str]) -> dict[str, Any]:
|
|
277
|
+
clock_info = time.get_clock_info('perf_counter')
|
|
278
|
+
platform_id = _default_platform_id()
|
|
279
|
+
return {
|
|
280
|
+
'matrix_name': matrix.matrix_name,
|
|
281
|
+
'python_version': platform.python_version(),
|
|
282
|
+
'python_implementation': platform.python_implementation(),
|
|
283
|
+
'platform': platform.platform(),
|
|
284
|
+
'machine': platform.machine(),
|
|
285
|
+
'processor': platform.processor(),
|
|
286
|
+
'cpu_count': os.cpu_count(),
|
|
287
|
+
'perf_counter_resolution': clock_info.resolution,
|
|
288
|
+
'perf_counter_monotonic': clock_info.monotonic,
|
|
289
|
+
'argv': list(command),
|
|
290
|
+
'generated_at_epoch': time.time(),
|
|
291
|
+
'certification_platform': platform_id,
|
|
292
|
+
'matrix_declared_platforms': list(matrix.metadata.get('certification_platforms', [])),
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def _summarize_measurement(measurement: Mapping[str, Any], *, profile: PerfProfile) -> dict[str, Any]:
|
|
297
|
+
samples = [float(item) for item in measurement.get('samples_ms', [])]
|
|
298
|
+
total_attempts = int(measurement.get('total_attempts', len(samples)))
|
|
299
|
+
total_units = int(measurement.get('total_units', profile.units_per_iteration * total_attempts))
|
|
300
|
+
total_duration = float(measurement.get('total_duration_seconds', 0.0))
|
|
301
|
+
throughput = 0.0 if total_duration <= 0 else float(total_units) / total_duration
|
|
302
|
+
error_count = int(measurement.get('error_count', 0))
|
|
303
|
+
error_rate = 0.0 if total_attempts <= 0 else error_count / float(total_attempts)
|
|
304
|
+
p50, p95, p99, p99_9 = _percentiles(samples)
|
|
305
|
+
protocol_stall_counts = {str(key): int(value) for key, value in dict(measurement.get('protocol_stall_counts', {})).items()}
|
|
306
|
+
protocol_stalls = sum(protocol_stall_counts.values())
|
|
307
|
+
time_to_first_byte_ms = _derive_time_to_first_byte(measurement, p50)
|
|
308
|
+
handshake_latency_ms = _derive_handshake_latency(measurement, p50, profile)
|
|
309
|
+
return {
|
|
310
|
+
'sample_count': len(samples),
|
|
311
|
+
'total_attempts': total_attempts,
|
|
312
|
+
'total_units': total_units,
|
|
313
|
+
'total_duration_seconds': total_duration,
|
|
314
|
+
'throughput_ops_per_sec': throughput,
|
|
315
|
+
'p50_ms': p50,
|
|
316
|
+
'p95_ms': p95,
|
|
317
|
+
'p99_ms': p99,
|
|
318
|
+
'p99_9_ms': p99_9,
|
|
319
|
+
'time_to_first_byte_ms': time_to_first_byte_ms,
|
|
320
|
+
'handshake_latency_ms': handshake_latency_ms,
|
|
321
|
+
'error_count': error_count,
|
|
322
|
+
'error_rate': error_rate,
|
|
323
|
+
'cpu_seconds': float(measurement.get('cpu_seconds', 0.0)),
|
|
324
|
+
'rss_kib': float(measurement.get('rss_kib', 0.0)),
|
|
325
|
+
'connections': int(measurement.get('connections', 0)),
|
|
326
|
+
'streams': int(measurement.get('streams', 0)),
|
|
327
|
+
'scheduler_rejections': int(measurement.get('scheduler_rejections', 0)),
|
|
328
|
+
'protocol_stalls': protocol_stalls,
|
|
329
|
+
'protocol_stall_counts': protocol_stall_counts,
|
|
330
|
+
'profile_metadata': dict(measurement.get('metadata', {})),
|
|
331
|
+
'lane': profile.lane,
|
|
332
|
+
'certification_platforms': list(profile.certification_platforms),
|
|
333
|
+
'live_listener_required': profile.live_listener_required,
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def _evaluate_thresholds(profile: PerfProfile, metrics: Mapping[str, Any]) -> tuple[dict[str, Any], list[str]]:
|
|
338
|
+
failures: list[str] = []
|
|
339
|
+
thresholds = dict(profile.thresholds)
|
|
340
|
+
evaluation = {'thresholds': thresholds, 'checks': {}, 'passed': True}
|
|
341
|
+
|
|
342
|
+
def check(name: str, condition: bool, *, observed: Any, threshold: Any) -> None:
|
|
343
|
+
evaluation['checks'][name] = {'observed': observed, 'threshold': threshold, 'passed': condition}
|
|
344
|
+
if not condition:
|
|
345
|
+
failures.append(f'{profile.profile_id} failed threshold {name}: observed={observed!r} threshold={threshold!r}')
|
|
346
|
+
|
|
347
|
+
comparators = {
|
|
348
|
+
'min_throughput_ops_per_sec': lambda observed, threshold: float(observed) >= float(threshold),
|
|
349
|
+
'max_p50_ms': lambda observed, threshold: float(observed) <= float(threshold),
|
|
350
|
+
'max_p95_ms': lambda observed, threshold: float(observed) <= float(threshold),
|
|
351
|
+
'max_p99_ms': lambda observed, threshold: float(observed) <= float(threshold),
|
|
352
|
+
'max_p99_9_ms': lambda observed, threshold: float(observed) <= float(threshold),
|
|
353
|
+
'max_time_to_first_byte_ms': lambda observed, threshold: float(observed) <= float(threshold),
|
|
354
|
+
'max_handshake_latency_ms': lambda observed, threshold: float(observed) <= float(threshold),
|
|
355
|
+
'max_error_rate': lambda observed, threshold: float(observed) <= float(threshold),
|
|
356
|
+
'max_scheduler_rejections': lambda observed, threshold: int(observed) <= int(threshold),
|
|
357
|
+
'max_protocol_stalls': lambda observed, threshold: int(observed) <= int(threshold),
|
|
358
|
+
'max_rss_kib': lambda observed, threshold: float(observed) <= float(threshold),
|
|
359
|
+
}
|
|
360
|
+
metric_map = {
|
|
361
|
+
'min_throughput_ops_per_sec': 'throughput_ops_per_sec',
|
|
362
|
+
'max_p50_ms': 'p50_ms',
|
|
363
|
+
'max_p95_ms': 'p95_ms',
|
|
364
|
+
'max_p99_ms': 'p99_ms',
|
|
365
|
+
'max_p99_9_ms': 'p99_9_ms',
|
|
366
|
+
'max_time_to_first_byte_ms': 'time_to_first_byte_ms',
|
|
367
|
+
'max_handshake_latency_ms': 'handshake_latency_ms',
|
|
368
|
+
'max_error_rate': 'error_rate',
|
|
369
|
+
'max_scheduler_rejections': 'scheduler_rejections',
|
|
370
|
+
'max_protocol_stalls': 'protocol_stalls',
|
|
371
|
+
'max_rss_kib': 'rss_kib',
|
|
372
|
+
}
|
|
373
|
+
for threshold_key, comparator in comparators.items():
|
|
374
|
+
if threshold_key not in thresholds:
|
|
375
|
+
continue
|
|
376
|
+
metric_key = metric_map[threshold_key]
|
|
377
|
+
check(threshold_key, comparator(metrics[metric_key], thresholds[threshold_key]), observed=metrics[metric_key], threshold=thresholds[threshold_key])
|
|
378
|
+
|
|
379
|
+
evaluation['passed'] = not failures
|
|
380
|
+
return evaluation, failures
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def _evaluate_relative_regression(profile: PerfProfile, metrics: Mapping[str, Any], baseline_root: Path | None) -> dict[str, Any]:
|
|
384
|
+
if baseline_root is None:
|
|
385
|
+
return {'evaluated': False, 'reason': 'no baseline root configured', 'passed': True}
|
|
386
|
+
baseline_file = baseline_root / profile.profile_id / 'result.json'
|
|
387
|
+
if not baseline_file.exists():
|
|
388
|
+
return {'evaluated': False, 'reason': f'missing baseline artifact {baseline_file}', 'passed': True}
|
|
389
|
+
baseline_payload = json.loads(baseline_file.read_text(encoding='utf-8'))
|
|
390
|
+
budget = dict(profile.relative_regression_budget)
|
|
391
|
+
failures: list[str] = []
|
|
392
|
+
checks: dict[str, Any] = {}
|
|
393
|
+
|
|
394
|
+
baseline_metrics = dict(baseline_payload.get('metrics', {}))
|
|
395
|
+
baseline_throughput = float(baseline_metrics.get('throughput_ops_per_sec', 0.0))
|
|
396
|
+
baseline_p99 = float(baseline_metrics.get('p99_ms', 0.0))
|
|
397
|
+
baseline_p99_9 = float(baseline_metrics.get('p99_9_ms', baseline_p99))
|
|
398
|
+
baseline_cpu = float(baseline_metrics.get('cpu_seconds', 0.0))
|
|
399
|
+
baseline_rss = float(baseline_metrics.get('rss_kib', 0.0))
|
|
400
|
+
|
|
401
|
+
throughput_drop = budget.get('max_throughput_drop_fraction')
|
|
402
|
+
if throughput_drop is not None and baseline_throughput > 0.0:
|
|
403
|
+
minimum_allowed = baseline_throughput * (1.0 - float(throughput_drop))
|
|
404
|
+
observed = float(metrics['throughput_ops_per_sec'])
|
|
405
|
+
passed = observed >= minimum_allowed
|
|
406
|
+
checks['throughput_drop_fraction'] = {
|
|
407
|
+
'baseline': baseline_throughput,
|
|
408
|
+
'observed': observed,
|
|
409
|
+
'minimum_allowed': minimum_allowed,
|
|
410
|
+
'passed': passed,
|
|
411
|
+
}
|
|
412
|
+
if not passed:
|
|
413
|
+
failures.append(f'{profile.profile_id} throughput regressed below allowed budget')
|
|
414
|
+
|
|
415
|
+
p99_increase = budget.get('max_p99_increase_fraction')
|
|
416
|
+
if p99_increase is not None and baseline_p99 > 0.0:
|
|
417
|
+
absolute_slack = float(budget.get('absolute_p99_slack_ms', 0.25))
|
|
418
|
+
maximum_allowed = max(baseline_p99 * (1.0 + float(p99_increase)), baseline_p99 + absolute_slack)
|
|
419
|
+
observed = float(metrics['p99_ms'])
|
|
420
|
+
passed = observed <= maximum_allowed
|
|
421
|
+
checks['p99_increase_fraction'] = {
|
|
422
|
+
'baseline': baseline_p99,
|
|
423
|
+
'observed': observed,
|
|
424
|
+
'maximum_allowed': maximum_allowed,
|
|
425
|
+
'absolute_slack_ms': absolute_slack,
|
|
426
|
+
'passed': passed,
|
|
427
|
+
}
|
|
428
|
+
if not passed:
|
|
429
|
+
failures.append(f'{profile.profile_id} p99 latency regressed above allowed budget')
|
|
430
|
+
|
|
431
|
+
p99_9_increase = budget.get('max_p99_9_increase_fraction')
|
|
432
|
+
if p99_9_increase is not None and baseline_p99_9 > 0.0:
|
|
433
|
+
absolute_slack = float(budget.get('absolute_p99_9_slack_ms', 0.5))
|
|
434
|
+
maximum_allowed = max(baseline_p99_9 * (1.0 + float(p99_9_increase)), baseline_p99_9 + absolute_slack)
|
|
435
|
+
observed = float(metrics['p99_9_ms'])
|
|
436
|
+
passed = observed <= maximum_allowed
|
|
437
|
+
checks['p99_9_increase_fraction'] = {
|
|
438
|
+
'baseline': baseline_p99_9,
|
|
439
|
+
'observed': observed,
|
|
440
|
+
'maximum_allowed': maximum_allowed,
|
|
441
|
+
'absolute_slack_ms': absolute_slack,
|
|
442
|
+
'passed': passed,
|
|
443
|
+
}
|
|
444
|
+
if not passed:
|
|
445
|
+
failures.append(f'{profile.profile_id} p99.9 latency regressed above allowed budget')
|
|
446
|
+
|
|
447
|
+
cpu_increase = budget.get('max_cpu_increase_fraction')
|
|
448
|
+
if cpu_increase is not None:
|
|
449
|
+
absolute_slack = float(budget.get('absolute_cpu_slack_seconds', 0.01))
|
|
450
|
+
maximum_allowed = baseline_cpu * (1.0 + float(cpu_increase)) + absolute_slack
|
|
451
|
+
observed = float(metrics['cpu_seconds'])
|
|
452
|
+
passed = observed <= maximum_allowed
|
|
453
|
+
checks['cpu_increase_fraction'] = {
|
|
454
|
+
'baseline': baseline_cpu,
|
|
455
|
+
'observed': observed,
|
|
456
|
+
'maximum_allowed': maximum_allowed,
|
|
457
|
+
'absolute_slack_seconds': absolute_slack,
|
|
458
|
+
'passed': passed,
|
|
459
|
+
}
|
|
460
|
+
if not passed:
|
|
461
|
+
failures.append(f'{profile.profile_id} cpu time regressed above allowed budget')
|
|
462
|
+
|
|
463
|
+
rss_increase = budget.get('max_rss_increase_fraction')
|
|
464
|
+
if rss_increase is not None:
|
|
465
|
+
absolute_slack = float(budget.get('absolute_rss_slack_kib', 1024.0))
|
|
466
|
+
maximum_allowed = baseline_rss * (1.0 + float(rss_increase)) + absolute_slack
|
|
467
|
+
observed = float(metrics['rss_kib'])
|
|
468
|
+
passed = observed <= maximum_allowed
|
|
469
|
+
checks['rss_increase_fraction'] = {
|
|
470
|
+
'baseline': baseline_rss,
|
|
471
|
+
'observed': observed,
|
|
472
|
+
'maximum_allowed': maximum_allowed,
|
|
473
|
+
'absolute_rss_slack_kib': absolute_slack,
|
|
474
|
+
'passed': passed,
|
|
475
|
+
}
|
|
476
|
+
if not passed:
|
|
477
|
+
failures.append(f'{profile.profile_id} rss regressed above allowed budget')
|
|
478
|
+
|
|
479
|
+
return {
|
|
480
|
+
'evaluated': True,
|
|
481
|
+
'baseline_root': str(baseline_root),
|
|
482
|
+
'baseline_profile': str(baseline_file),
|
|
483
|
+
'checks': checks,
|
|
484
|
+
'failure_reasons': failures,
|
|
485
|
+
'passed': not failures,
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
def _jsonable(value: Any) -> Any:
|
|
490
|
+
if isinstance(value, (str, int, float, bool)) or value is None:
|
|
491
|
+
return value
|
|
492
|
+
if isinstance(value, bytes):
|
|
493
|
+
try:
|
|
494
|
+
return value.decode('utf-8')
|
|
495
|
+
except UnicodeDecodeError:
|
|
496
|
+
return value.hex()
|
|
497
|
+
if isinstance(value, Path):
|
|
498
|
+
return str(value)
|
|
499
|
+
if isinstance(value, Mapping):
|
|
500
|
+
return {str(key): _jsonable(item) for key, item in value.items()}
|
|
501
|
+
if isinstance(value, (list, tuple, set)):
|
|
502
|
+
return [_jsonable(item) for item in value]
|
|
503
|
+
return repr(value)
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
def _write_profile_artifacts(
|
|
507
|
+
profile_dir: Path,
|
|
508
|
+
*,
|
|
509
|
+
profile: PerfProfile,
|
|
510
|
+
matrix: PerfMatrix,
|
|
511
|
+
commit_hash: str,
|
|
512
|
+
metrics: Mapping[str, Any],
|
|
513
|
+
environment: Mapping[str, Any],
|
|
514
|
+
correctness: Mapping[str, Any],
|
|
515
|
+
threshold_evaluation: Mapping[str, Any],
|
|
516
|
+
relative_regression: Mapping[str, Any],
|
|
517
|
+
measurement: Mapping[str, Any],
|
|
518
|
+
passed: bool,
|
|
519
|
+
failure_reasons: list[str],
|
|
520
|
+
) -> None:
|
|
521
|
+
histogram = _build_histogram([float(item) for item in measurement.get('samples_ms', [])])
|
|
522
|
+
percentile_payload = {
|
|
523
|
+
'profile_id': profile.profile_id,
|
|
524
|
+
'p50_ms': metrics['p50_ms'],
|
|
525
|
+
'p95_ms': metrics['p95_ms'],
|
|
526
|
+
'p99_ms': metrics['p99_ms'],
|
|
527
|
+
'p99_9_ms': metrics['p99_9_ms'],
|
|
528
|
+
'time_to_first_byte_ms': metrics['time_to_first_byte_ms'],
|
|
529
|
+
'handshake_latency_ms': metrics['handshake_latency_ms'],
|
|
530
|
+
'histogram': histogram,
|
|
531
|
+
}
|
|
532
|
+
command_payload = {
|
|
533
|
+
'argv': list(environment.get('argv', [])),
|
|
534
|
+
'profile_id': profile.profile_id,
|
|
535
|
+
'driver': profile.driver,
|
|
536
|
+
'deployment_profile': profile.deployment_profile,
|
|
537
|
+
'lane': profile.lane,
|
|
538
|
+
'certification_platforms': list(profile.certification_platforms),
|
|
539
|
+
'live_listener_required': profile.live_listener_required,
|
|
540
|
+
}
|
|
541
|
+
result_payload = {
|
|
542
|
+
'profile_id': profile.profile_id,
|
|
543
|
+
'family': profile.family,
|
|
544
|
+
'description': profile.description,
|
|
545
|
+
'driver': profile.driver,
|
|
546
|
+
'deployment_profile': profile.deployment_profile,
|
|
547
|
+
'lane': profile.lane,
|
|
548
|
+
'certification_platforms': list(profile.certification_platforms),
|
|
549
|
+
'live_listener_required': profile.live_listener_required,
|
|
550
|
+
'rfc_targets': list(profile.rfc_targets),
|
|
551
|
+
'commit_hash': commit_hash,
|
|
552
|
+
'passed': passed,
|
|
553
|
+
'metrics': dict(metrics),
|
|
554
|
+
'correctness': dict(correctness),
|
|
555
|
+
'threshold_evaluation': dict(threshold_evaluation),
|
|
556
|
+
'relative_regression': dict(relative_regression),
|
|
557
|
+
'failure_reasons': list(failure_reasons),
|
|
558
|
+
'matrix_name': matrix.matrix_name,
|
|
559
|
+
}
|
|
560
|
+
summary_payload = {
|
|
561
|
+
'profile_id': profile.profile_id,
|
|
562
|
+
'lane': profile.lane,
|
|
563
|
+
'deployment_profile': profile.deployment_profile,
|
|
564
|
+
'passed': passed,
|
|
565
|
+
'metrics': {
|
|
566
|
+
'throughput_ops_per_sec': metrics['throughput_ops_per_sec'],
|
|
567
|
+
'p50_ms': metrics['p50_ms'],
|
|
568
|
+
'p95_ms': metrics['p95_ms'],
|
|
569
|
+
'p99_ms': metrics['p99_ms'],
|
|
570
|
+
'p99_9_ms': metrics['p99_9_ms'],
|
|
571
|
+
'time_to_first_byte_ms': metrics['time_to_first_byte_ms'],
|
|
572
|
+
'handshake_latency_ms': metrics['handshake_latency_ms'],
|
|
573
|
+
'error_rate': metrics['error_rate'],
|
|
574
|
+
'cpu_seconds': metrics['cpu_seconds'],
|
|
575
|
+
'rss_kib': metrics['rss_kib'],
|
|
576
|
+
'scheduler_rejections': metrics['scheduler_rejections'],
|
|
577
|
+
'protocol_stalls': metrics['protocol_stalls'],
|
|
578
|
+
},
|
|
579
|
+
'certification_platforms': list(profile.certification_platforms),
|
|
580
|
+
'live_listener_required': profile.live_listener_required,
|
|
581
|
+
'failure_reasons': list(failure_reasons),
|
|
582
|
+
}
|
|
583
|
+
files = {
|
|
584
|
+
'result.json': result_payload,
|
|
585
|
+
'summary.json': summary_payload,
|
|
586
|
+
'env.json': dict(environment),
|
|
587
|
+
'percentile_histogram.json': percentile_payload,
|
|
588
|
+
'command.json': command_payload,
|
|
589
|
+
'correctness.json': dict(correctness),
|
|
590
|
+
}
|
|
591
|
+
for filename, payload in files.items():
|
|
592
|
+
(profile_dir / filename).write_text(json.dumps(_jsonable(payload), indent=2, sort_keys=True) + '\n', encoding='utf-8')
|
|
593
|
+
_write_samples_csv(profile_dir / 'raw_samples.csv', measurement.get('samples_ms', []))
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
def _write_samples_csv(path: Path, samples: list[Any]) -> None:
|
|
597
|
+
lines = ['index,latency_ms']
|
|
598
|
+
for index, value in enumerate(samples, start=1):
|
|
599
|
+
lines.append(f'{index},{float(value):.9f}')
|
|
600
|
+
path.write_text('\n'.join(lines) + '\n', encoding='utf-8')
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
def _write_run_summary(artifact_root: Path, summary: PerfRunSummary, environment: Mapping[str, Any], *, profiles: list[PerfProfile]) -> None:
|
|
604
|
+
lane_counts: dict[str, int] = {}
|
|
605
|
+
for profile in profiles:
|
|
606
|
+
lane_counts[profile.lane] = lane_counts.get(profile.lane, 0) + 1
|
|
607
|
+
payload = {
|
|
608
|
+
'matrix_name': summary.matrix_name,
|
|
609
|
+
'artifact_root': summary.artifact_root,
|
|
610
|
+
'baseline_root': summary.baseline_root,
|
|
611
|
+
'commit_hash': summary.commit_hash,
|
|
612
|
+
'total': summary.total,
|
|
613
|
+
'passed': summary.passed,
|
|
614
|
+
'failed': summary.failed,
|
|
615
|
+
'lane_counts': lane_counts,
|
|
616
|
+
'certification_platform': environment.get('certification_platform'),
|
|
617
|
+
'profiles': [
|
|
618
|
+
{
|
|
619
|
+
'profile_id': result.profile_id,
|
|
620
|
+
'passed': result.passed,
|
|
621
|
+
'artifact_dir': result.artifact_dir,
|
|
622
|
+
'failure_reasons': result.failure_reasons,
|
|
623
|
+
}
|
|
624
|
+
for result in summary.profiles
|
|
625
|
+
],
|
|
626
|
+
'generated_at_epoch': environment.get('generated_at_epoch'),
|
|
627
|
+
}
|
|
628
|
+
(artifact_root / 'summary.json').write_text(json.dumps(_jsonable(payload), indent=2, sort_keys=True) + '\n', encoding='utf-8')
|
|
629
|
+
(artifact_root / 'index.json').write_text(json.dumps(_jsonable(payload), indent=2, sort_keys=True) + '\n', encoding='utf-8')
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
def _percentiles(samples: list[float]) -> tuple[float, float, float, float]:
|
|
633
|
+
if not samples:
|
|
634
|
+
return 0.0, 0.0, 0.0, 0.0
|
|
635
|
+
ordered = sorted(samples)
|
|
636
|
+
return (
|
|
637
|
+
_percentile(ordered, 50.0),
|
|
638
|
+
_percentile(ordered, 95.0),
|
|
639
|
+
_percentile(ordered, 99.0),
|
|
640
|
+
_percentile(ordered, 99.9),
|
|
641
|
+
)
|
|
642
|
+
|
|
643
|
+
|
|
644
|
+
def _percentile(sorted_samples: list[float], pct: float) -> float:
|
|
645
|
+
if not sorted_samples:
|
|
646
|
+
return 0.0
|
|
647
|
+
if len(sorted_samples) == 1:
|
|
648
|
+
return float(sorted_samples[0])
|
|
649
|
+
rank = (pct / 100.0) * (len(sorted_samples) - 1)
|
|
650
|
+
low = int(rank)
|
|
651
|
+
high = min(low + 1, len(sorted_samples) - 1)
|
|
652
|
+
frac = rank - low
|
|
653
|
+
return float(sorted_samples[low] + ((sorted_samples[high] - sorted_samples[low]) * frac))
|
|
654
|
+
|
|
655
|
+
|
|
656
|
+
def _build_histogram(samples: list[float], *, bucket_count: int = 8) -> list[dict[str, Any]]:
|
|
657
|
+
if not samples:
|
|
658
|
+
return []
|
|
659
|
+
values = sorted(samples)
|
|
660
|
+
minimum = values[0]
|
|
661
|
+
maximum = values[-1]
|
|
662
|
+
if minimum == maximum:
|
|
663
|
+
return [{'lower_ms': minimum, 'upper_ms': maximum, 'count': len(values)}]
|
|
664
|
+
span = maximum - minimum
|
|
665
|
+
bucket_size = span / float(bucket_count)
|
|
666
|
+
buckets = [{'lower_ms': minimum + (bucket_size * index), 'upper_ms': minimum + (bucket_size * (index + 1)), 'count': 0} for index in range(bucket_count)]
|
|
667
|
+
for value in values:
|
|
668
|
+
offset = int(min(bucket_count - 1, (value - minimum) / bucket_size))
|
|
669
|
+
buckets[offset]['count'] += 1
|
|
670
|
+
return buckets
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
def _derive_time_to_first_byte(measurement: Mapping[str, Any], default: float) -> float:
|
|
674
|
+
explicit = measurement.get('time_to_first_byte_ms')
|
|
675
|
+
if explicit is not None:
|
|
676
|
+
return float(explicit)
|
|
677
|
+
samples = measurement.get('time_to_first_byte_samples_ms')
|
|
678
|
+
if isinstance(samples, list) and samples:
|
|
679
|
+
ordered = sorted(float(item) for item in samples)
|
|
680
|
+
return _percentile(ordered, 50.0)
|
|
681
|
+
return float(default)
|
|
682
|
+
|
|
683
|
+
|
|
684
|
+
def _derive_handshake_latency(measurement: Mapping[str, Any], default: float, profile: PerfProfile) -> float:
|
|
685
|
+
explicit = measurement.get('handshake_latency_ms')
|
|
686
|
+
if explicit is not None:
|
|
687
|
+
return float(explicit)
|
|
688
|
+
samples = measurement.get('handshake_latency_samples_ms')
|
|
689
|
+
if isinstance(samples, list) and samples:
|
|
690
|
+
ordered = sorted(float(item) for item in samples)
|
|
691
|
+
return _percentile(ordered, 50.0)
|
|
692
|
+
if _profile_expects_handshake(profile):
|
|
693
|
+
return float(default)
|
|
694
|
+
return 0.0
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
def _profile_expects_handshake(profile: PerfProfile) -> bool:
|
|
698
|
+
deployment = profile.deployment_profile.lower()
|
|
699
|
+
return (
|
|
700
|
+
profile.family == 'TLS / PKI'
|
|
701
|
+
or 'tls' in deployment
|
|
702
|
+
or 'quic' in deployment
|
|
703
|
+
or 'http3' in deployment
|
|
704
|
+
or 'websocket_http3' in deployment
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
|
|
708
|
+
def _default_platform_id() -> str:
|
|
709
|
+
implementation = platform.python_implementation().lower()
|
|
710
|
+
return f"{platform.system().lower()}-{platform.machine().lower()}-{implementation}{sys.version_info.major}.{sys.version_info.minor}"
|
|
711
|
+
|
|
712
|
+
|
|
713
|
+
__all__ = [
|
|
714
|
+
'DEFAULT_BASELINE_ARTIFACT_ROOT',
|
|
715
|
+
'DEFAULT_CURRENT_ARTIFACT_ROOT',
|
|
716
|
+
'DEFAULT_PERFORMANCE_MATRIX_PATH',
|
|
717
|
+
'PerfMatrix',
|
|
718
|
+
'PerfProfile',
|
|
719
|
+
'PerfProfileResult',
|
|
720
|
+
'PerfRunSummary',
|
|
721
|
+
'PerfRunnerError',
|
|
722
|
+
'load_performance_matrix',
|
|
723
|
+
'run_performance_matrix',
|
|
724
|
+
'validate_performance_artifacts',
|
|
725
|
+
]
|