ssrjson-benchmark 0.0.5__cp314-cp314-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ssrjson-benchmark might be problematic. Click here for more details.
- ssrjson_benchmark/__init__.py +19 -0
- ssrjson_benchmark/__main__.py +97 -0
- ssrjson_benchmark/_files/MotionsQuestionsAnswersQuestions2016.json +1 -0
- ssrjson_benchmark/_files/apache.json +3532 -0
- ssrjson_benchmark/_files/canada.json +56532 -0
- ssrjson_benchmark/_files/ctm.json +48951 -0
- ssrjson_benchmark/_files/github.json +1320 -0
- ssrjson_benchmark/_files/instruments.json +7395 -0
- ssrjson_benchmark/_files/mesh.json +3602 -0
- ssrjson_benchmark/_files/simple_object.json +11 -0
- ssrjson_benchmark/_files/simple_object_zh.json +11 -0
- ssrjson_benchmark/_files/truenull.json +1 -0
- ssrjson_benchmark/_files/tweet.json +135 -0
- ssrjson_benchmark/_files/twitter.json +15195 -0
- ssrjson_benchmark/_ssrjson_benchmark.pyd +0 -0
- ssrjson_benchmark/benchmark_impl.py +826 -0
- ssrjson_benchmark/result_types.py +88 -0
- ssrjson_benchmark/template.md +11 -0
- ssrjson_benchmark-0.0.5.dist-info/METADATA +70 -0
- ssrjson_benchmark-0.0.5.dist-info/RECORD +23 -0
- ssrjson_benchmark-0.0.5.dist-info/WHEEL +5 -0
- ssrjson_benchmark-0.0.5.dist-info/licenses/LICENSE +21 -0
- ssrjson_benchmark-0.0.5.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,826 @@
|
|
|
1
|
+
import gc
|
|
2
|
+
import io
|
|
3
|
+
import json
|
|
4
|
+
import math
|
|
5
|
+
import multiprocessing
|
|
6
|
+
import os
|
|
7
|
+
import pathlib
|
|
8
|
+
import platform
|
|
9
|
+
import re
|
|
10
|
+
import sys
|
|
11
|
+
import time
|
|
12
|
+
from importlib.util import find_spec
|
|
13
|
+
from typing import TYPE_CHECKING, Any, Callable, List
|
|
14
|
+
|
|
15
|
+
import matplotlib as mpl
|
|
16
|
+
import matplotlib.pyplot as plt
|
|
17
|
+
import orjson
|
|
18
|
+
import ssrjson
|
|
19
|
+
import ujson
|
|
20
|
+
|
|
21
|
+
from . import _ssrjson_benchmark
|
|
22
|
+
from .result_types import BenchmarkFinalResult, BenchmarkResultPerFile
|
|
23
|
+
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from reportlab.pdfgen import canvas
|
|
26
|
+
|
|
27
|
+
mpl.use("Agg")
|
|
28
|
+
mpl.rcParams["svg.fonttype"] = "none"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
CUR_FILE = os.path.abspath(__file__)
|
|
32
|
+
CUR_DIR = os.path.dirname(CUR_FILE)
|
|
33
|
+
CWD = os.getcwd()
|
|
34
|
+
_NS_IN_ONE_S = 1000000000
|
|
35
|
+
|
|
36
|
+
PDF_HEADING_FONT = "Helvetica-Bold"
|
|
37
|
+
PDF_TEXT_FONT = "Courier"
|
|
38
|
+
|
|
39
|
+
# baseline is the first one.
|
|
40
|
+
LIBRARIES_COLORS = {
|
|
41
|
+
"json": "#74c476",
|
|
42
|
+
"ujson": "#c994c7",
|
|
43
|
+
"orjson": "#2c7fb8",
|
|
44
|
+
"ssrjson": "#fd8d3c",
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
MAX_BIN_BYTES_SIZE = 512 * 1024 * 1024 # 512MiB
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class BenchmarkFunction:
|
|
51
|
+
def __init__(self, func: Callable, library_name: str) -> None:
|
|
52
|
+
self.func = func
|
|
53
|
+
self.library_name = library_name
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class BenchmarkGroup:
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
benchmarker: Callable,
|
|
60
|
+
functions: list[BenchmarkFunction],
|
|
61
|
+
group_name: str,
|
|
62
|
+
input_preprocessor: Callable[[Any], Any] = lambda x: x,
|
|
63
|
+
) -> None:
|
|
64
|
+
self.benchmarker = benchmarker
|
|
65
|
+
self.functions = functions
|
|
66
|
+
self.group_name = group_name
|
|
67
|
+
self.input_preprocessor = input_preprocessor
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
# benchmarkers
|
|
71
|
+
def _benchmark(repeat_time: int, times_per_bin: int, func, *args):
|
|
72
|
+
"""
|
|
73
|
+
Run repeat benchmark, with utf-8 cache.
|
|
74
|
+
returns time used (ns).
|
|
75
|
+
"""
|
|
76
|
+
# times_per_bin not used
|
|
77
|
+
# disable automatic GC
|
|
78
|
+
gc_was_enabled = _gc_prepare()
|
|
79
|
+
try:
|
|
80
|
+
# warm up
|
|
81
|
+
_ssrjson_benchmark.run_object_accumulate_benchmark(func, 100, args)
|
|
82
|
+
return _ssrjson_benchmark.run_object_accumulate_benchmark(
|
|
83
|
+
func, repeat_time, args
|
|
84
|
+
)
|
|
85
|
+
finally:
|
|
86
|
+
if gc_was_enabled:
|
|
87
|
+
gc.enable()
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _benchmark_unicode_arg(repeat_time: int, times_per_bin: int, func, unicode: str):
|
|
91
|
+
"""
|
|
92
|
+
Run repeat benchmark, disabling utf-8 cache.
|
|
93
|
+
returns time used (ns).
|
|
94
|
+
"""
|
|
95
|
+
# disable automatic GC
|
|
96
|
+
gc_was_enabled = _gc_prepare()
|
|
97
|
+
try:
|
|
98
|
+
times_left = repeat_time
|
|
99
|
+
total = 0
|
|
100
|
+
while times_left != 0:
|
|
101
|
+
cur_bin_size = min(times_left, times_per_bin)
|
|
102
|
+
times_left -= cur_bin_size
|
|
103
|
+
# prepare identical data, without sharing objects
|
|
104
|
+
benchmark_data = _ssrjson_benchmark.copy_unicode_list_invalidate_cache(
|
|
105
|
+
unicode, cur_bin_size + 1
|
|
106
|
+
)
|
|
107
|
+
# warm up
|
|
108
|
+
_ssrjson_benchmark.run_object_benchmark(func, (benchmark_data[0],))
|
|
109
|
+
#
|
|
110
|
+
for i in range(1, cur_bin_size + 1):
|
|
111
|
+
total += _ssrjson_benchmark.run_object_benchmark(
|
|
112
|
+
func, (benchmark_data[i],)
|
|
113
|
+
)
|
|
114
|
+
del benchmark_data
|
|
115
|
+
return total
|
|
116
|
+
finally:
|
|
117
|
+
if gc_was_enabled:
|
|
118
|
+
gc.enable()
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _benchmark_invalidate_dump_cache(
|
|
122
|
+
repeat_time: int, times_per_bin: int, func, raw_bytes: bytes
|
|
123
|
+
):
|
|
124
|
+
"""
|
|
125
|
+
Invalidate utf-8 cache for the same input.
|
|
126
|
+
returns time used (ns).
|
|
127
|
+
"""
|
|
128
|
+
# disable automatic GC
|
|
129
|
+
gc_was_enabled = _gc_prepare()
|
|
130
|
+
try:
|
|
131
|
+
times_left = repeat_time
|
|
132
|
+
total = 0
|
|
133
|
+
while times_left != 0:
|
|
134
|
+
cur_bin_size = min(times_left, times_per_bin)
|
|
135
|
+
times_left -= cur_bin_size
|
|
136
|
+
# prepare identical data, without sharing objects
|
|
137
|
+
benchmark_data = [json.loads(raw_bytes) for _ in range(cur_bin_size + 1)]
|
|
138
|
+
# warm up
|
|
139
|
+
_ssrjson_benchmark.run_object_benchmark(func, (benchmark_data[0],))
|
|
140
|
+
#
|
|
141
|
+
for i in range(1, cur_bin_size + 1):
|
|
142
|
+
total += _ssrjson_benchmark.run_object_benchmark(
|
|
143
|
+
func, (benchmark_data[i],)
|
|
144
|
+
)
|
|
145
|
+
del benchmark_data
|
|
146
|
+
return total
|
|
147
|
+
finally:
|
|
148
|
+
if gc_was_enabled:
|
|
149
|
+
gc.enable()
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _get_benchmark_defs() -> tuple[BenchmarkGroup, ...]:
|
|
153
|
+
return (
|
|
154
|
+
BenchmarkGroup(
|
|
155
|
+
_benchmark_invalidate_dump_cache,
|
|
156
|
+
[
|
|
157
|
+
BenchmarkFunction(lambda x: json.dumps(x, ensure_ascii=False), "json"),
|
|
158
|
+
BenchmarkFunction(
|
|
159
|
+
lambda x: ujson.dumps(x, ensure_ascii=False), "ujson"
|
|
160
|
+
),
|
|
161
|
+
BenchmarkFunction(lambda x: orjson.dumps(x).decode("utf-8"), "orjson"),
|
|
162
|
+
BenchmarkFunction(ssrjson.dumps, "ssrjson"),
|
|
163
|
+
],
|
|
164
|
+
"dumps",
|
|
165
|
+
),
|
|
166
|
+
BenchmarkGroup(
|
|
167
|
+
_benchmark_invalidate_dump_cache,
|
|
168
|
+
[
|
|
169
|
+
BenchmarkFunction(
|
|
170
|
+
lambda x: json.dumps(x, indent=2, ensure_ascii=False), "json"
|
|
171
|
+
),
|
|
172
|
+
BenchmarkFunction(
|
|
173
|
+
lambda x: ujson.dumps(x, indent=2, ensure_ascii=False), "ujson"
|
|
174
|
+
),
|
|
175
|
+
BenchmarkFunction(
|
|
176
|
+
lambda x: orjson.dumps(x, option=orjson.OPT_INDENT_2).decode(
|
|
177
|
+
"utf-8"
|
|
178
|
+
),
|
|
179
|
+
"orjson",
|
|
180
|
+
),
|
|
181
|
+
BenchmarkFunction(lambda x: ssrjson.dumps(x, indent=2), "ssrjson"),
|
|
182
|
+
],
|
|
183
|
+
"dumps(indented2)",
|
|
184
|
+
),
|
|
185
|
+
BenchmarkGroup(
|
|
186
|
+
_benchmark_invalidate_dump_cache,
|
|
187
|
+
[
|
|
188
|
+
BenchmarkFunction(
|
|
189
|
+
lambda x: json.dumps(x, ensure_ascii=False).encode("utf-8"), "json"
|
|
190
|
+
),
|
|
191
|
+
BenchmarkFunction(
|
|
192
|
+
lambda x: ujson.dumps(x, ensure_ascii=False).encode("utf-8"),
|
|
193
|
+
"ujson",
|
|
194
|
+
),
|
|
195
|
+
BenchmarkFunction(orjson.dumps, "orjson"),
|
|
196
|
+
BenchmarkFunction(ssrjson.dumps_to_bytes, "ssrjson"),
|
|
197
|
+
],
|
|
198
|
+
"dumps_to_bytes",
|
|
199
|
+
),
|
|
200
|
+
BenchmarkGroup(
|
|
201
|
+
_benchmark_invalidate_dump_cache,
|
|
202
|
+
[
|
|
203
|
+
BenchmarkFunction(
|
|
204
|
+
lambda x: json.dumps(x, indent=2, ensure_ascii=False).encode(
|
|
205
|
+
"utf-8"
|
|
206
|
+
),
|
|
207
|
+
"json",
|
|
208
|
+
),
|
|
209
|
+
BenchmarkFunction(
|
|
210
|
+
lambda x: ujson.dumps(x, indent=2, ensure_ascii=False).encode(
|
|
211
|
+
"utf-8"
|
|
212
|
+
),
|
|
213
|
+
"ujson",
|
|
214
|
+
),
|
|
215
|
+
BenchmarkFunction(
|
|
216
|
+
lambda x: orjson.dumps(x, option=orjson.OPT_INDENT_2), "orjson"
|
|
217
|
+
),
|
|
218
|
+
BenchmarkFunction(
|
|
219
|
+
lambda x: ssrjson.dumps_to_bytes(x, indent=2), "ssrjson"
|
|
220
|
+
),
|
|
221
|
+
],
|
|
222
|
+
"dumps_to_bytes(indented2)",
|
|
223
|
+
),
|
|
224
|
+
BenchmarkGroup(
|
|
225
|
+
_benchmark_unicode_arg,
|
|
226
|
+
[
|
|
227
|
+
BenchmarkFunction(json.loads, "json"),
|
|
228
|
+
BenchmarkFunction(ujson.loads, "ujson"),
|
|
229
|
+
BenchmarkFunction(orjson.loads, "orjson"),
|
|
230
|
+
BenchmarkFunction(ssrjson.loads, "ssrjson"),
|
|
231
|
+
],
|
|
232
|
+
"loads(str)",
|
|
233
|
+
input_preprocessor=lambda x: x.decode("utf-8"),
|
|
234
|
+
),
|
|
235
|
+
BenchmarkGroup(
|
|
236
|
+
_benchmark,
|
|
237
|
+
[
|
|
238
|
+
BenchmarkFunction(json.loads, "json"),
|
|
239
|
+
BenchmarkFunction(ujson.loads, "ujson"),
|
|
240
|
+
BenchmarkFunction(orjson.loads, "orjson"),
|
|
241
|
+
BenchmarkFunction(ssrjson.loads, "ssrjson"),
|
|
242
|
+
],
|
|
243
|
+
"loads(bytes)",
|
|
244
|
+
),
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _get_benchmark_libraries() -> dict[str, BenchmarkGroup]:
|
|
249
|
+
return {x.group_name: x for x in _get_benchmark_defs()}
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _gc_prepare():
|
|
253
|
+
"""
|
|
254
|
+
Call collect once, and then disable automatic GC.
|
|
255
|
+
Return True if automatic GC was enabled.
|
|
256
|
+
"""
|
|
257
|
+
gc.collect()
|
|
258
|
+
gc_was_enabled = gc.isenabled()
|
|
259
|
+
if gc_was_enabled:
|
|
260
|
+
gc.disable()
|
|
261
|
+
return gc_was_enabled
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def _get_processed_size(func: Callable, input_data, is_dumps):
|
|
265
|
+
if is_dumps:
|
|
266
|
+
# get output size of dumps
|
|
267
|
+
data_obj = json.loads(input_data)
|
|
268
|
+
output = func(data_obj)
|
|
269
|
+
if isinstance(output, bytes):
|
|
270
|
+
size = len(output)
|
|
271
|
+
else:
|
|
272
|
+
size = _ssrjson_benchmark.inspect_pyunicode(output)[1]
|
|
273
|
+
else:
|
|
274
|
+
# get loads input size
|
|
275
|
+
size = (
|
|
276
|
+
len(input_data)
|
|
277
|
+
if isinstance(input_data, bytes)
|
|
278
|
+
else _ssrjson_benchmark.inspect_pyunicode(input_data)[1]
|
|
279
|
+
)
|
|
280
|
+
return size
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def benchmark_multiprocess_wrapper(
|
|
284
|
+
benchmarker, args, result_multiprocess_queue: multiprocessing.Queue
|
|
285
|
+
):
|
|
286
|
+
ret = benchmarker(*args)
|
|
287
|
+
result_multiprocess_queue.put(ret)
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def _run_benchmark(
|
|
291
|
+
cur_result_file: BenchmarkResultPerFile,
|
|
292
|
+
repeat_times: int,
|
|
293
|
+
times_per_bin: int,
|
|
294
|
+
input_data: str | bytes,
|
|
295
|
+
benchmark_group: BenchmarkGroup,
|
|
296
|
+
):
|
|
297
|
+
group_name = benchmark_group.group_name
|
|
298
|
+
cur_target = cur_result_file[group_name]
|
|
299
|
+
|
|
300
|
+
input_data = benchmark_group.input_preprocessor(input_data)
|
|
301
|
+
|
|
302
|
+
result_multiprocess_queue = multiprocessing.Queue() # type: ignore
|
|
303
|
+
|
|
304
|
+
for benchmark_target in benchmark_group.functions:
|
|
305
|
+
prefix = f"[{benchmark_target.library_name}][{benchmark_group.group_name}]"
|
|
306
|
+
print(
|
|
307
|
+
prefix
|
|
308
|
+
+ (" " * max(0, 40 - len(prefix)))
|
|
309
|
+
+ f"repeat_times={repeat_times} times_per_bin={times_per_bin}"
|
|
310
|
+
)
|
|
311
|
+
p = multiprocessing.Process(
|
|
312
|
+
target=benchmark_multiprocess_wrapper,
|
|
313
|
+
args=(
|
|
314
|
+
benchmark_group.benchmarker,
|
|
315
|
+
(repeat_times, times_per_bin, benchmark_target.func, input_data),
|
|
316
|
+
result_multiprocess_queue,
|
|
317
|
+
),
|
|
318
|
+
)
|
|
319
|
+
p.start()
|
|
320
|
+
p.join()
|
|
321
|
+
speed = result_multiprocess_queue.get()
|
|
322
|
+
cur_lib = cur_target[benchmark_target.library_name]
|
|
323
|
+
cur_lib.speed = speed
|
|
324
|
+
|
|
325
|
+
baseline_name = "json"
|
|
326
|
+
baseline_data = cur_target[baseline_name]
|
|
327
|
+
for benchmark_target in benchmark_group.functions:
|
|
328
|
+
cur_lib = cur_target[benchmark_target.library_name]
|
|
329
|
+
if benchmark_target.library_name == "ssrjson":
|
|
330
|
+
# calculate bytes per sec for ssrJSON
|
|
331
|
+
size = _get_processed_size(
|
|
332
|
+
benchmark_target.func, input_data, "dumps" in group_name
|
|
333
|
+
)
|
|
334
|
+
cur_target.ssrjson_bytes_per_sec = (
|
|
335
|
+
size * repeat_times / (cur_lib.speed / _NS_IN_ONE_S)
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
cur_lib.ratio = (
|
|
339
|
+
math.inf
|
|
340
|
+
if baseline_data.speed == 0
|
|
341
|
+
else (baseline_data.speed / cur_lib.speed)
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def _run_file_benchmark(
|
|
346
|
+
benchmark_libraries: dict[str, BenchmarkGroup],
|
|
347
|
+
file: pathlib.Path,
|
|
348
|
+
process_bytes: int,
|
|
349
|
+
bin_process_bytes: int,
|
|
350
|
+
):
|
|
351
|
+
print(f"Running benchmark for {file.name}")
|
|
352
|
+
with open(file, "rb") as f:
|
|
353
|
+
raw_bytes = f.read()
|
|
354
|
+
raw = raw_bytes.decode("utf-8")
|
|
355
|
+
base_file_name = os.path.basename(file)
|
|
356
|
+
cur_result_file = BenchmarkResultPerFile()
|
|
357
|
+
cur_result_file.byte_size = bytes_size = len(raw_bytes)
|
|
358
|
+
if bytes_size == 0:
|
|
359
|
+
raise RuntimeError(f"File {file} is empty.")
|
|
360
|
+
kind, str_size, is_ascii, _ = _ssrjson_benchmark.inspect_pyunicode(raw)
|
|
361
|
+
cur_result_file.pyunicode_size = str_size
|
|
362
|
+
cur_result_file.pyunicode_kind = kind
|
|
363
|
+
cur_result_file.pyunicode_is_ascii = is_ascii
|
|
364
|
+
repeat_times = int((process_bytes + bytes_size - 1) // bytes_size)
|
|
365
|
+
times_per_bin = max(1, bin_process_bytes // bytes_size)
|
|
366
|
+
|
|
367
|
+
for benchmark_group in benchmark_libraries.values():
|
|
368
|
+
_run_benchmark(
|
|
369
|
+
cur_result_file, repeat_times, times_per_bin, raw_bytes, benchmark_group
|
|
370
|
+
)
|
|
371
|
+
return base_file_name, cur_result_file
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def _get_head_rev_name():
|
|
375
|
+
return (
|
|
376
|
+
getattr(ssrjson, "__version__", None) or getattr(ssrjson, "ssrjson").__version__
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def _get_real_output_file_name():
|
|
381
|
+
rev = _get_head_rev_name()
|
|
382
|
+
if not rev:
|
|
383
|
+
file = "benchmark_result.json"
|
|
384
|
+
else:
|
|
385
|
+
file = f"benchmark_result_{rev}.json"
|
|
386
|
+
return file
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def _get_cpu_name() -> str:
|
|
390
|
+
cpuinfo_spec = find_spec("cpuinfo")
|
|
391
|
+
if cpuinfo_spec is not None:
|
|
392
|
+
import cpuinfo
|
|
393
|
+
|
|
394
|
+
cpu_name = cpuinfo.get_cpu_info().get("brand_raw", "UnknownCPU")
|
|
395
|
+
else:
|
|
396
|
+
# fallback
|
|
397
|
+
cpu_name: str = platform.processor()
|
|
398
|
+
if cpu_name.strip() == "":
|
|
399
|
+
# linux fallback
|
|
400
|
+
if os.path.exists("/proc/cpuinfo"):
|
|
401
|
+
with open(file="/proc/cpuinfo", mode="r") as file:
|
|
402
|
+
cpu_info_lines = file.readlines()
|
|
403
|
+
for line in cpu_info_lines:
|
|
404
|
+
if "model name" in line:
|
|
405
|
+
cpu_name = re.sub(
|
|
406
|
+
pattern=r"model name\s+:\s+", repl="", string=line
|
|
407
|
+
)
|
|
408
|
+
break
|
|
409
|
+
else:
|
|
410
|
+
cpu_name = "UnknownCPU"
|
|
411
|
+
# merge nearby spaces
|
|
412
|
+
return re.sub(pattern=r"\s+", repl=" ", string=cpu_name).strip()
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def _get_mem_total() -> str:
|
|
416
|
+
mem_total: int = 0
|
|
417
|
+
if platform.system() == "Linux":
|
|
418
|
+
with open(file="/proc/meminfo", mode="r") as file:
|
|
419
|
+
mem_info_lines = file.readlines()
|
|
420
|
+
for line in mem_info_lines:
|
|
421
|
+
if "MemTotal" in line:
|
|
422
|
+
mem_total = int(re.sub(pattern=r"[^0-9]", repl="", string=line))
|
|
423
|
+
break
|
|
424
|
+
elif platform.system() == "Windows":
|
|
425
|
+
import psutil
|
|
426
|
+
|
|
427
|
+
mem_total = psutil.virtual_memory().total // (1024 * 1024)
|
|
428
|
+
return f"{mem_total / (1024**2):.3f}GiB"
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def _get_ratio_color(ratio: float) -> str:
|
|
432
|
+
if ratio < 1:
|
|
433
|
+
return "#d63031" # red (worse than baseline)
|
|
434
|
+
elif ratio == 1:
|
|
435
|
+
return "black" # black (baseline)
|
|
436
|
+
elif ratio < 2:
|
|
437
|
+
return "#e67e22" # orange (similar/slightly better)
|
|
438
|
+
elif ratio < 4:
|
|
439
|
+
return "#f39c12" # amber (decent improvement)
|
|
440
|
+
elif ratio < 8:
|
|
441
|
+
return "#27ae60" # green (good)
|
|
442
|
+
elif ratio < 16:
|
|
443
|
+
return "#2980b9" # blue (great)
|
|
444
|
+
else:
|
|
445
|
+
return "#8e44ad" # purple (exceptional)
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def _plot_relative_ops(
|
|
449
|
+
catagories: list[str], data: dict, doc_name: str, index_s: str
|
|
450
|
+
) -> io.BytesIO:
|
|
451
|
+
libs = list(LIBRARIES_COLORS.keys())
|
|
452
|
+
colors = [LIBRARIES_COLORS[n] for n in libs]
|
|
453
|
+
n = len(catagories)
|
|
454
|
+
bar_width = 0.2
|
|
455
|
+
inner_pad = 0
|
|
456
|
+
|
|
457
|
+
fig, axs = plt.subplots(
|
|
458
|
+
1,
|
|
459
|
+
n,
|
|
460
|
+
figsize=(4 * n, 6),
|
|
461
|
+
sharey=False,
|
|
462
|
+
tight_layout=True,
|
|
463
|
+
gridspec_kw={"wspace": 0},
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
x_positions = [i * (bar_width + inner_pad) for i in range(len(libs))]
|
|
467
|
+
|
|
468
|
+
for ax, cat in zip(axs, catagories):
|
|
469
|
+
vals = [1.0] + [data[cat][name]["ratio"] for name in libs[1:]]
|
|
470
|
+
gbps = (data[cat]["ssrjson_bytes_per_sec"]) / (1024**3)
|
|
471
|
+
|
|
472
|
+
for xi, val, col in zip(x_positions, vals, colors):
|
|
473
|
+
ax.bar(xi, val, width=bar_width, color=col)
|
|
474
|
+
ax.text(
|
|
475
|
+
xi,
|
|
476
|
+
val + 0.05,
|
|
477
|
+
f"{val:.2f}x",
|
|
478
|
+
ha="center",
|
|
479
|
+
va="bottom",
|
|
480
|
+
fontsize=9,
|
|
481
|
+
color=_get_ratio_color(val),
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
ssrjson_index = libs.index("ssrjson")
|
|
485
|
+
ax.text(
|
|
486
|
+
x_positions[ssrjson_index],
|
|
487
|
+
vals[ssrjson_index] / 2,
|
|
488
|
+
f"{gbps:.2f} GB/s",
|
|
489
|
+
ha="center",
|
|
490
|
+
va="center",
|
|
491
|
+
fontsize=10,
|
|
492
|
+
color="#2c3e50",
|
|
493
|
+
fontweight="bold",
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
# baseline line
|
|
497
|
+
ax.axhline(1.0, color="gray", linestyle="--", linewidth=1)
|
|
498
|
+
# height = 1.1 * max bar height
|
|
499
|
+
ax.set_ylim(0, max(vals + [1.0]) * 1.1)
|
|
500
|
+
|
|
501
|
+
# hide all tick
|
|
502
|
+
ax.tick_params(
|
|
503
|
+
axis="both",
|
|
504
|
+
which="both",
|
|
505
|
+
left=False,
|
|
506
|
+
bottom=False,
|
|
507
|
+
labelleft=False,
|
|
508
|
+
labelbottom=False,
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
# and spine
|
|
512
|
+
for spine in ("left", "top", "right"):
|
|
513
|
+
ax.spines[spine].set_visible(False)
|
|
514
|
+
|
|
515
|
+
ax.set_xlabel(cat, fontsize=10, labelpad=6)
|
|
516
|
+
|
|
517
|
+
fig.suptitle(
|
|
518
|
+
doc_name,
|
|
519
|
+
fontsize=20,
|
|
520
|
+
fontweight="bold",
|
|
521
|
+
y=0.98,
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
# color legend
|
|
525
|
+
legend_elements = [
|
|
526
|
+
plt.Line2D([0], [0], color=col, lw=4, label=name)
|
|
527
|
+
for name, col in LIBRARIES_COLORS.items()
|
|
528
|
+
]
|
|
529
|
+
fig.legend(
|
|
530
|
+
handles=legend_elements,
|
|
531
|
+
loc="upper right",
|
|
532
|
+
bbox_to_anchor=(0.98, 0.95),
|
|
533
|
+
ncol=len(libs),
|
|
534
|
+
fontsize=14,
|
|
535
|
+
frameon=False,
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
fig.text(
|
|
539
|
+
0.5,
|
|
540
|
+
0,
|
|
541
|
+
"Higher is better",
|
|
542
|
+
ha="center",
|
|
543
|
+
va="bottom",
|
|
544
|
+
fontsize=8,
|
|
545
|
+
style="italic",
|
|
546
|
+
color="#555555",
|
|
547
|
+
)
|
|
548
|
+
|
|
549
|
+
buf = io.BytesIO()
|
|
550
|
+
plt.savefig(buf, format="svg", bbox_inches="tight")
|
|
551
|
+
buf.seek(0)
|
|
552
|
+
plt.close(fig)
|
|
553
|
+
return buf
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
def _draw_page_number(c: "canvas.Canvas", page_num: int):
|
|
557
|
+
from reportlab.lib.pagesizes import A4
|
|
558
|
+
|
|
559
|
+
width, _ = A4
|
|
560
|
+
c.setFont("Helvetica-Oblique", 8) # italic
|
|
561
|
+
c.setFillColorRGB(0.5, 0.5, 0.5) # grey
|
|
562
|
+
c.drawRightString(width - 40, 20, f"{page_num}")
|
|
563
|
+
|
|
564
|
+
|
|
565
|
+
def _generate_pdf_report(
|
|
566
|
+
figures: List[List[io.BytesIO]], header_text: str, output_pdf_path: str
|
|
567
|
+
) -> str:
|
|
568
|
+
from reportlab.graphics import renderPDF
|
|
569
|
+
from reportlab.lib.pagesizes import A4
|
|
570
|
+
from reportlab.pdfgen import canvas
|
|
571
|
+
from svglib.svglib import svg2rlg
|
|
572
|
+
|
|
573
|
+
try:
|
|
574
|
+
from svglib.fonts import FontMap
|
|
575
|
+
|
|
576
|
+
font_map = FontMap()
|
|
577
|
+
font_map.register_default_fonts()
|
|
578
|
+
# workaround for matplotlib using 700 to represent bold font, but svg2rlg using 700 as normal.
|
|
579
|
+
font_map.register_font("Helvetica", weight="700", rlgFontName="Helvetica-Bold")
|
|
580
|
+
except ImportError:
|
|
581
|
+
font_map = None
|
|
582
|
+
|
|
583
|
+
c = canvas.Canvas(output_pdf_path, pagesize=A4)
|
|
584
|
+
width, height = A4
|
|
585
|
+
|
|
586
|
+
# heading info
|
|
587
|
+
heading = header_text.splitlines()
|
|
588
|
+
# first line is # header
|
|
589
|
+
header, heading_info = heading[0].removeprefix("#").strip(), heading[1:]
|
|
590
|
+
c.setFont(PDF_HEADING_FONT, 20)
|
|
591
|
+
text_obj = c.beginText(40, height - 50)
|
|
592
|
+
text_obj.textLine(header)
|
|
593
|
+
c.drawText(text_obj)
|
|
594
|
+
|
|
595
|
+
# Wrap heading_info lines if overflow
|
|
596
|
+
max_width = width - 80 # 40 margin on both sides
|
|
597
|
+
wrapped_heading_info = []
|
|
598
|
+
for line in heading_info:
|
|
599
|
+
while c.stringWidth(line, PDF_TEXT_FONT, 10) > max_width:
|
|
600
|
+
# Find a split point
|
|
601
|
+
split_idx = int(max_width // c.stringWidth(" ", PDF_TEXT_FONT, 10))
|
|
602
|
+
# Try to split at nearest space before split_idx
|
|
603
|
+
space_idx = line.rfind(" ", 0, split_idx)
|
|
604
|
+
if space_idx == -1:
|
|
605
|
+
space_idx = split_idx
|
|
606
|
+
wrapped_heading_info.append(line[:space_idx])
|
|
607
|
+
# TODO fixed indent
|
|
608
|
+
line = " " + line[space_idx:].lstrip()
|
|
609
|
+
wrapped_heading_info.append(line)
|
|
610
|
+
heading_info = wrapped_heading_info
|
|
611
|
+
|
|
612
|
+
c.setFont(PDF_TEXT_FONT, 10)
|
|
613
|
+
text_obj = c.beginText(40, height - 70)
|
|
614
|
+
for line in heading_info:
|
|
615
|
+
text_obj.textLine(line)
|
|
616
|
+
c.drawText(text_obj)
|
|
617
|
+
|
|
618
|
+
c.setFont("Helvetica-Oblique", 8)
|
|
619
|
+
text = "This report was generated by https://github.com/Nambers/ssrJSON-benchmark"
|
|
620
|
+
c.drawString(40, 20, text)
|
|
621
|
+
link_start = 40 + c.stringWidth("This report was generated by ")
|
|
622
|
+
link_end = link_start + c.stringWidth(
|
|
623
|
+
"https://github.com/Nambers/ssrJSON-benchmark"
|
|
624
|
+
)
|
|
625
|
+
text_height = 5 # Adjusted height to better fit the link area
|
|
626
|
+
c.linkURL(
|
|
627
|
+
"https://github.com/Nambers/ssrJSON-benchmark",
|
|
628
|
+
(link_start, 20, link_end, 20 + text_height),
|
|
629
|
+
relative=1,
|
|
630
|
+
)
|
|
631
|
+
|
|
632
|
+
header_lines = header_text.count("\n") + 1
|
|
633
|
+
header_height = header_lines * 14 + 10
|
|
634
|
+
# subheading spacing = 30
|
|
635
|
+
y_pos = height - header_height - 30
|
|
636
|
+
bottom_margin = 20
|
|
637
|
+
vertical_gap = 20
|
|
638
|
+
|
|
639
|
+
p = 0
|
|
640
|
+
|
|
641
|
+
for name, figs in zip(["speed"], figures):
|
|
642
|
+
text_obj = c.beginText()
|
|
643
|
+
text_obj.setTextOrigin(40, y_pos)
|
|
644
|
+
text_obj.setFont(PDF_HEADING_FONT, 14)
|
|
645
|
+
text_obj.textLine(f"{name}")
|
|
646
|
+
c.drawText(text_obj)
|
|
647
|
+
c.bookmarkHorizontal(name, 0, y_pos + 20)
|
|
648
|
+
c.addOutlineEntry(name, name, level=0)
|
|
649
|
+
y_pos -= 20
|
|
650
|
+
for svg_io in figs:
|
|
651
|
+
svg_io.seek(0)
|
|
652
|
+
drawing = svg2rlg(svg_io, font_map=font_map)
|
|
653
|
+
|
|
654
|
+
avail_w = width - 80
|
|
655
|
+
scale = avail_w / drawing.width
|
|
656
|
+
drawing.width *= scale
|
|
657
|
+
drawing.height *= scale
|
|
658
|
+
drawing.scale(scale, scale)
|
|
659
|
+
|
|
660
|
+
img_h = drawing.height
|
|
661
|
+
# no enough space
|
|
662
|
+
if y_pos - img_h - vertical_gap < bottom_margin:
|
|
663
|
+
_draw_page_number(c, p)
|
|
664
|
+
p += 1
|
|
665
|
+
c.showPage()
|
|
666
|
+
y_pos = height - bottom_margin
|
|
667
|
+
|
|
668
|
+
c.setStrokeColorRGB(0.9, 0.9, 0.9)
|
|
669
|
+
c.setLineWidth(0.4)
|
|
670
|
+
c.line(40, y_pos, width - 40, y_pos)
|
|
671
|
+
|
|
672
|
+
renderPDF.draw(drawing, c, 40, y_pos - img_h)
|
|
673
|
+
y_pos -= img_h + vertical_gap
|
|
674
|
+
|
|
675
|
+
_draw_page_number(c, p)
|
|
676
|
+
c.save()
|
|
677
|
+
return output_pdf_path
|
|
678
|
+
|
|
679
|
+
|
|
680
|
+
def _fetch_header(rev) -> str:
|
|
681
|
+
with open(os.path.join(CUR_DIR, "template.md"), "r") as f:
|
|
682
|
+
template = f.read()
|
|
683
|
+
return template.format(
|
|
684
|
+
REV=rev,
|
|
685
|
+
TIME=time.strftime("%Y-%m-%d %H:%M:%S %Z", time.localtime()),
|
|
686
|
+
OS=f"{platform.system()} {platform.machine()} {platform.release()} {platform.version()}",
|
|
687
|
+
PYTHON=sys.version,
|
|
688
|
+
ORJSON_VER=orjson.__version__,
|
|
689
|
+
UJSON_VER=ujson.__version__,
|
|
690
|
+
SIMD_FLAGS=ssrjson.get_current_features(),
|
|
691
|
+
CHIPSET=_get_cpu_name(),
|
|
692
|
+
MEM=_get_mem_total(),
|
|
693
|
+
)
|
|
694
|
+
|
|
695
|
+
|
|
696
|
+
def generate_report_pdf(result: BenchmarkFinalResult, file: str, out_dir: str = CWD):
|
|
697
|
+
"""
|
|
698
|
+
Generate PDF report, using `result`.
|
|
699
|
+
"""
|
|
700
|
+
catagories = result.catagories
|
|
701
|
+
file = file.removesuffix(".json")
|
|
702
|
+
report_name = f"{file}.pdf"
|
|
703
|
+
|
|
704
|
+
figures = []
|
|
705
|
+
|
|
706
|
+
index_s = "speed"
|
|
707
|
+
tmp = []
|
|
708
|
+
for bench_filename in result.results:
|
|
709
|
+
print(f"Processing {bench_filename} (PDF)")
|
|
710
|
+
tmp.append(
|
|
711
|
+
_plot_relative_ops(
|
|
712
|
+
catagories,
|
|
713
|
+
result.results[bench_filename],
|
|
714
|
+
bench_filename,
|
|
715
|
+
index_s,
|
|
716
|
+
)
|
|
717
|
+
)
|
|
718
|
+
figures.append(tmp)
|
|
719
|
+
|
|
720
|
+
template = _fetch_header(
|
|
721
|
+
file.removeprefix("benchmark_result_").removesuffix(".json")
|
|
722
|
+
)
|
|
723
|
+
out_path = _generate_pdf_report(
|
|
724
|
+
figures,
|
|
725
|
+
header_text=template,
|
|
726
|
+
output_pdf_path=os.path.join(out_dir, report_name),
|
|
727
|
+
)
|
|
728
|
+
print(f"Report saved to {out_path}")
|
|
729
|
+
return out_path
|
|
730
|
+
|
|
731
|
+
|
|
732
|
+
def generate_report_markdown(
|
|
733
|
+
result: BenchmarkFinalResult, file: str, out_dir: str = CWD
|
|
734
|
+
):
|
|
735
|
+
"""
|
|
736
|
+
Generate Markdown report, using `result`.
|
|
737
|
+
"""
|
|
738
|
+
file = file.removesuffix(".json")
|
|
739
|
+
report_name = f"{file}.md"
|
|
740
|
+
report_folder = os.path.join(out_dir, f"{file}_report")
|
|
741
|
+
|
|
742
|
+
# mkdir
|
|
743
|
+
if not os.path.exists(report_folder):
|
|
744
|
+
os.makedirs(report_folder)
|
|
745
|
+
|
|
746
|
+
template = _fetch_header(
|
|
747
|
+
file.removeprefix("benchmark_result_").removesuffix(".json")
|
|
748
|
+
)
|
|
749
|
+
|
|
750
|
+
index_s = "speed"
|
|
751
|
+
template += f"\n\n## {index_s}\n\n"
|
|
752
|
+
for bench_filename in result.results:
|
|
753
|
+
print(f"Processing {bench_filename} (Markdown)")
|
|
754
|
+
with open(
|
|
755
|
+
os.path.join(report_folder, bench_filename + ".svg"), "wb"
|
|
756
|
+
) as svg_file:
|
|
757
|
+
svg_file.write(
|
|
758
|
+
_plot_relative_ops(
|
|
759
|
+
result.catagories,
|
|
760
|
+
result.results[bench_filename],
|
|
761
|
+
bench_filename,
|
|
762
|
+
index_s,
|
|
763
|
+
).getvalue()
|
|
764
|
+
)
|
|
765
|
+
# add svg
|
|
766
|
+
template += f"\n\n"
|
|
767
|
+
|
|
768
|
+
ret = os.path.join(report_folder, report_name)
|
|
769
|
+
with open(ret, "w") as f:
|
|
770
|
+
f.write(template)
|
|
771
|
+
print(f"Report saved to {ret}")
|
|
772
|
+
return ret
|
|
773
|
+
|
|
774
|
+
|
|
775
|
+
def parse_file_result(j):
|
|
776
|
+
return BenchmarkFinalResult.parse(j)
|
|
777
|
+
|
|
778
|
+
|
|
779
|
+
def is_unix_except_macos():
|
|
780
|
+
system = platform.system()
|
|
781
|
+
return system in ("Linux", "AIX", "FreeBSD")
|
|
782
|
+
|
|
783
|
+
|
|
784
|
+
def _set_multiprocessing_start_method():
|
|
785
|
+
try:
|
|
786
|
+
multiprocessing.set_start_method("fork")
|
|
787
|
+
except RuntimeError as e:
|
|
788
|
+
if "context has already been set" not in str(e):
|
|
789
|
+
raise
|
|
790
|
+
|
|
791
|
+
|
|
792
|
+
def run_benchmark(
|
|
793
|
+
files: list[pathlib.Path],
|
|
794
|
+
process_bytes: int,
|
|
795
|
+
bin_process_bytes: int,
|
|
796
|
+
):
|
|
797
|
+
"""
|
|
798
|
+
Generate a JSON result of benchmark.
|
|
799
|
+
Also returns a result object.
|
|
800
|
+
"""
|
|
801
|
+
# Set multiprocessing start method to fork, if Python version is 3.14+ on Unix
|
|
802
|
+
if sys.version_info >= (3, 14) and is_unix_except_macos():
|
|
803
|
+
_set_multiprocessing_start_method()
|
|
804
|
+
|
|
805
|
+
file = _get_real_output_file_name()
|
|
806
|
+
|
|
807
|
+
result = BenchmarkFinalResult()
|
|
808
|
+
result.results = dict()
|
|
809
|
+
|
|
810
|
+
benchmark_libraries = _get_benchmark_libraries()
|
|
811
|
+
|
|
812
|
+
result.catagories = sorted(list(benchmark_libraries.keys()))
|
|
813
|
+
|
|
814
|
+
for bench_file in files:
|
|
815
|
+
k, v = _run_file_benchmark(
|
|
816
|
+
benchmark_libraries, bench_file, process_bytes, bin_process_bytes
|
|
817
|
+
)
|
|
818
|
+
result.results[k] = v
|
|
819
|
+
output_result = result.dumps()
|
|
820
|
+
|
|
821
|
+
if os.path.exists(file):
|
|
822
|
+
os.remove(file)
|
|
823
|
+
|
|
824
|
+
with open(f"{file}", "w", encoding="utf-8") as f:
|
|
825
|
+
f.write(output_result)
|
|
826
|
+
return result, file
|