ssrjson-benchmark 0.0.5__cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ssrjson-benchmark might be problematic. Click here for more details.

@@ -0,0 +1,826 @@
1
+ import gc
2
+ import io
3
+ import json
4
+ import math
5
+ import multiprocessing
6
+ import os
7
+ import pathlib
8
+ import platform
9
+ import re
10
+ import sys
11
+ import time
12
+ from importlib.util import find_spec
13
+ from typing import TYPE_CHECKING, Any, Callable, List
14
+
15
+ import matplotlib as mpl
16
+ import matplotlib.pyplot as plt
17
+ import orjson
18
+ import ssrjson
19
+ import ujson
20
+
21
+ from . import _ssrjson_benchmark
22
+ from .result_types import BenchmarkFinalResult, BenchmarkResultPerFile
23
+
24
+ if TYPE_CHECKING:
25
+ from reportlab.pdfgen import canvas
26
+
27
+ mpl.use("Agg")
28
+ mpl.rcParams["svg.fonttype"] = "none"
29
+
30
+
31
+ CUR_FILE = os.path.abspath(__file__)
32
+ CUR_DIR = os.path.dirname(CUR_FILE)
33
+ CWD = os.getcwd()
34
+ _NS_IN_ONE_S = 1000000000
35
+
36
+ PDF_HEADING_FONT = "Helvetica-Bold"
37
+ PDF_TEXT_FONT = "Courier"
38
+
39
+ # baseline is the first one.
40
+ LIBRARIES_COLORS = {
41
+ "json": "#74c476",
42
+ "ujson": "#c994c7",
43
+ "orjson": "#2c7fb8",
44
+ "ssrjson": "#fd8d3c",
45
+ }
46
+
47
+ MAX_BIN_BYTES_SIZE = 512 * 1024 * 1024 # 512MiB
48
+
49
+
50
+ class BenchmarkFunction:
51
+ def __init__(self, func: Callable, library_name: str) -> None:
52
+ self.func = func
53
+ self.library_name = library_name
54
+
55
+
56
+ class BenchmarkGroup:
57
+ def __init__(
58
+ self,
59
+ benchmarker: Callable,
60
+ functions: list[BenchmarkFunction],
61
+ group_name: str,
62
+ input_preprocessor: Callable[[Any], Any] = lambda x: x,
63
+ ) -> None:
64
+ self.benchmarker = benchmarker
65
+ self.functions = functions
66
+ self.group_name = group_name
67
+ self.input_preprocessor = input_preprocessor
68
+
69
+
70
+ # benchmarkers
71
+ def _benchmark(repeat_time: int, times_per_bin: int, func, *args):
72
+ """
73
+ Run repeat benchmark, with utf-8 cache.
74
+ returns time used (ns).
75
+ """
76
+ # times_per_bin not used
77
+ # disable automatic GC
78
+ gc_was_enabled = _gc_prepare()
79
+ try:
80
+ # warm up
81
+ _ssrjson_benchmark.run_object_accumulate_benchmark(func, 100, args)
82
+ return _ssrjson_benchmark.run_object_accumulate_benchmark(
83
+ func, repeat_time, args
84
+ )
85
+ finally:
86
+ if gc_was_enabled:
87
+ gc.enable()
88
+
89
+
90
+ def _benchmark_unicode_arg(repeat_time: int, times_per_bin: int, func, unicode: str):
91
+ """
92
+ Run repeat benchmark, disabling utf-8 cache.
93
+ returns time used (ns).
94
+ """
95
+ # disable automatic GC
96
+ gc_was_enabled = _gc_prepare()
97
+ try:
98
+ times_left = repeat_time
99
+ total = 0
100
+ while times_left != 0:
101
+ cur_bin_size = min(times_left, times_per_bin)
102
+ times_left -= cur_bin_size
103
+ # prepare identical data, without sharing objects
104
+ benchmark_data = _ssrjson_benchmark.copy_unicode_list_invalidate_cache(
105
+ unicode, cur_bin_size + 1
106
+ )
107
+ # warm up
108
+ _ssrjson_benchmark.run_object_benchmark(func, (benchmark_data[0],))
109
+ #
110
+ for i in range(1, cur_bin_size + 1):
111
+ total += _ssrjson_benchmark.run_object_benchmark(
112
+ func, (benchmark_data[i],)
113
+ )
114
+ del benchmark_data
115
+ return total
116
+ finally:
117
+ if gc_was_enabled:
118
+ gc.enable()
119
+
120
+
121
+ def _benchmark_invalidate_dump_cache(
122
+ repeat_time: int, times_per_bin: int, func, raw_bytes: bytes
123
+ ):
124
+ """
125
+ Invalidate utf-8 cache for the same input.
126
+ returns time used (ns).
127
+ """
128
+ # disable automatic GC
129
+ gc_was_enabled = _gc_prepare()
130
+ try:
131
+ times_left = repeat_time
132
+ total = 0
133
+ while times_left != 0:
134
+ cur_bin_size = min(times_left, times_per_bin)
135
+ times_left -= cur_bin_size
136
+ # prepare identical data, without sharing objects
137
+ benchmark_data = [json.loads(raw_bytes) for _ in range(cur_bin_size + 1)]
138
+ # warm up
139
+ _ssrjson_benchmark.run_object_benchmark(func, (benchmark_data[0],))
140
+ #
141
+ for i in range(1, cur_bin_size + 1):
142
+ total += _ssrjson_benchmark.run_object_benchmark(
143
+ func, (benchmark_data[i],)
144
+ )
145
+ del benchmark_data
146
+ return total
147
+ finally:
148
+ if gc_was_enabled:
149
+ gc.enable()
150
+
151
+
152
+ def _get_benchmark_defs() -> tuple[BenchmarkGroup, ...]:
153
+ return (
154
+ BenchmarkGroup(
155
+ _benchmark_invalidate_dump_cache,
156
+ [
157
+ BenchmarkFunction(lambda x: json.dumps(x, ensure_ascii=False), "json"),
158
+ BenchmarkFunction(
159
+ lambda x: ujson.dumps(x, ensure_ascii=False), "ujson"
160
+ ),
161
+ BenchmarkFunction(lambda x: orjson.dumps(x).decode("utf-8"), "orjson"),
162
+ BenchmarkFunction(ssrjson.dumps, "ssrjson"),
163
+ ],
164
+ "dumps",
165
+ ),
166
+ BenchmarkGroup(
167
+ _benchmark_invalidate_dump_cache,
168
+ [
169
+ BenchmarkFunction(
170
+ lambda x: json.dumps(x, indent=2, ensure_ascii=False), "json"
171
+ ),
172
+ BenchmarkFunction(
173
+ lambda x: ujson.dumps(x, indent=2, ensure_ascii=False), "ujson"
174
+ ),
175
+ BenchmarkFunction(
176
+ lambda x: orjson.dumps(x, option=orjson.OPT_INDENT_2).decode(
177
+ "utf-8"
178
+ ),
179
+ "orjson",
180
+ ),
181
+ BenchmarkFunction(lambda x: ssrjson.dumps(x, indent=2), "ssrjson"),
182
+ ],
183
+ "dumps(indented2)",
184
+ ),
185
+ BenchmarkGroup(
186
+ _benchmark_invalidate_dump_cache,
187
+ [
188
+ BenchmarkFunction(
189
+ lambda x: json.dumps(x, ensure_ascii=False).encode("utf-8"), "json"
190
+ ),
191
+ BenchmarkFunction(
192
+ lambda x: ujson.dumps(x, ensure_ascii=False).encode("utf-8"),
193
+ "ujson",
194
+ ),
195
+ BenchmarkFunction(orjson.dumps, "orjson"),
196
+ BenchmarkFunction(ssrjson.dumps_to_bytes, "ssrjson"),
197
+ ],
198
+ "dumps_to_bytes",
199
+ ),
200
+ BenchmarkGroup(
201
+ _benchmark_invalidate_dump_cache,
202
+ [
203
+ BenchmarkFunction(
204
+ lambda x: json.dumps(x, indent=2, ensure_ascii=False).encode(
205
+ "utf-8"
206
+ ),
207
+ "json",
208
+ ),
209
+ BenchmarkFunction(
210
+ lambda x: ujson.dumps(x, indent=2, ensure_ascii=False).encode(
211
+ "utf-8"
212
+ ),
213
+ "ujson",
214
+ ),
215
+ BenchmarkFunction(
216
+ lambda x: orjson.dumps(x, option=orjson.OPT_INDENT_2), "orjson"
217
+ ),
218
+ BenchmarkFunction(
219
+ lambda x: ssrjson.dumps_to_bytes(x, indent=2), "ssrjson"
220
+ ),
221
+ ],
222
+ "dumps_to_bytes(indented2)",
223
+ ),
224
+ BenchmarkGroup(
225
+ _benchmark_unicode_arg,
226
+ [
227
+ BenchmarkFunction(json.loads, "json"),
228
+ BenchmarkFunction(ujson.loads, "ujson"),
229
+ BenchmarkFunction(orjson.loads, "orjson"),
230
+ BenchmarkFunction(ssrjson.loads, "ssrjson"),
231
+ ],
232
+ "loads(str)",
233
+ input_preprocessor=lambda x: x.decode("utf-8"),
234
+ ),
235
+ BenchmarkGroup(
236
+ _benchmark,
237
+ [
238
+ BenchmarkFunction(json.loads, "json"),
239
+ BenchmarkFunction(ujson.loads, "ujson"),
240
+ BenchmarkFunction(orjson.loads, "orjson"),
241
+ BenchmarkFunction(ssrjson.loads, "ssrjson"),
242
+ ],
243
+ "loads(bytes)",
244
+ ),
245
+ )
246
+
247
+
248
+ def _get_benchmark_libraries() -> dict[str, BenchmarkGroup]:
249
+ return {x.group_name: x for x in _get_benchmark_defs()}
250
+
251
+
252
+ def _gc_prepare():
253
+ """
254
+ Call collect once, and then disable automatic GC.
255
+ Return True if automatic GC was enabled.
256
+ """
257
+ gc.collect()
258
+ gc_was_enabled = gc.isenabled()
259
+ if gc_was_enabled:
260
+ gc.disable()
261
+ return gc_was_enabled
262
+
263
+
264
+ def _get_processed_size(func: Callable, input_data, is_dumps):
265
+ if is_dumps:
266
+ # get output size of dumps
267
+ data_obj = json.loads(input_data)
268
+ output = func(data_obj)
269
+ if isinstance(output, bytes):
270
+ size = len(output)
271
+ else:
272
+ size = _ssrjson_benchmark.inspect_pyunicode(output)[1]
273
+ else:
274
+ # get loads input size
275
+ size = (
276
+ len(input_data)
277
+ if isinstance(input_data, bytes)
278
+ else _ssrjson_benchmark.inspect_pyunicode(input_data)[1]
279
+ )
280
+ return size
281
+
282
+
283
+ def benchmark_multiprocess_wrapper(
284
+ benchmarker, args, result_multiprocess_queue: multiprocessing.Queue
285
+ ):
286
+ ret = benchmarker(*args)
287
+ result_multiprocess_queue.put(ret)
288
+
289
+
290
+ def _run_benchmark(
291
+ cur_result_file: BenchmarkResultPerFile,
292
+ repeat_times: int,
293
+ times_per_bin: int,
294
+ input_data: str | bytes,
295
+ benchmark_group: BenchmarkGroup,
296
+ ):
297
+ group_name = benchmark_group.group_name
298
+ cur_target = cur_result_file[group_name]
299
+
300
+ input_data = benchmark_group.input_preprocessor(input_data)
301
+
302
+ result_multiprocess_queue = multiprocessing.Queue() # type: ignore
303
+
304
+ for benchmark_target in benchmark_group.functions:
305
+ prefix = f"[{benchmark_target.library_name}][{benchmark_group.group_name}]"
306
+ print(
307
+ prefix
308
+ + (" " * max(0, 40 - len(prefix)))
309
+ + f"repeat_times={repeat_times} times_per_bin={times_per_bin}"
310
+ )
311
+ p = multiprocessing.Process(
312
+ target=benchmark_multiprocess_wrapper,
313
+ args=(
314
+ benchmark_group.benchmarker,
315
+ (repeat_times, times_per_bin, benchmark_target.func, input_data),
316
+ result_multiprocess_queue,
317
+ ),
318
+ )
319
+ p.start()
320
+ p.join()
321
+ speed = result_multiprocess_queue.get()
322
+ cur_lib = cur_target[benchmark_target.library_name]
323
+ cur_lib.speed = speed
324
+
325
+ baseline_name = "json"
326
+ baseline_data = cur_target[baseline_name]
327
+ for benchmark_target in benchmark_group.functions:
328
+ cur_lib = cur_target[benchmark_target.library_name]
329
+ if benchmark_target.library_name == "ssrjson":
330
+ # calculate bytes per sec for ssrJSON
331
+ size = _get_processed_size(
332
+ benchmark_target.func, input_data, "dumps" in group_name
333
+ )
334
+ cur_target.ssrjson_bytes_per_sec = (
335
+ size * repeat_times / (cur_lib.speed / _NS_IN_ONE_S)
336
+ )
337
+
338
+ cur_lib.ratio = (
339
+ math.inf
340
+ if baseline_data.speed == 0
341
+ else (baseline_data.speed / cur_lib.speed)
342
+ )
343
+
344
+
345
+ def _run_file_benchmark(
346
+ benchmark_libraries: dict[str, BenchmarkGroup],
347
+ file: pathlib.Path,
348
+ process_bytes: int,
349
+ bin_process_bytes: int,
350
+ ):
351
+ print(f"Running benchmark for {file.name}")
352
+ with open(file, "rb") as f:
353
+ raw_bytes = f.read()
354
+ raw = raw_bytes.decode("utf-8")
355
+ base_file_name = os.path.basename(file)
356
+ cur_result_file = BenchmarkResultPerFile()
357
+ cur_result_file.byte_size = bytes_size = len(raw_bytes)
358
+ if bytes_size == 0:
359
+ raise RuntimeError(f"File {file} is empty.")
360
+ kind, str_size, is_ascii, _ = _ssrjson_benchmark.inspect_pyunicode(raw)
361
+ cur_result_file.pyunicode_size = str_size
362
+ cur_result_file.pyunicode_kind = kind
363
+ cur_result_file.pyunicode_is_ascii = is_ascii
364
+ repeat_times = int((process_bytes + bytes_size - 1) // bytes_size)
365
+ times_per_bin = max(1, bin_process_bytes // bytes_size)
366
+
367
+ for benchmark_group in benchmark_libraries.values():
368
+ _run_benchmark(
369
+ cur_result_file, repeat_times, times_per_bin, raw_bytes, benchmark_group
370
+ )
371
+ return base_file_name, cur_result_file
372
+
373
+
374
+ def _get_head_rev_name():
375
+ return (
376
+ getattr(ssrjson, "__version__", None) or getattr(ssrjson, "ssrjson").__version__
377
+ )
378
+
379
+
380
+ def _get_real_output_file_name():
381
+ rev = _get_head_rev_name()
382
+ if not rev:
383
+ file = "benchmark_result.json"
384
+ else:
385
+ file = f"benchmark_result_{rev}.json"
386
+ return file
387
+
388
+
389
+ def _get_cpu_name() -> str:
390
+ cpuinfo_spec = find_spec("cpuinfo")
391
+ if cpuinfo_spec is not None:
392
+ import cpuinfo
393
+
394
+ cpu_name = cpuinfo.get_cpu_info().get("brand_raw", "UnknownCPU")
395
+ else:
396
+ # fallback
397
+ cpu_name: str = platform.processor()
398
+ if cpu_name.strip() == "":
399
+ # linux fallback
400
+ if os.path.exists("/proc/cpuinfo"):
401
+ with open(file="/proc/cpuinfo", mode="r") as file:
402
+ cpu_info_lines = file.readlines()
403
+ for line in cpu_info_lines:
404
+ if "model name" in line:
405
+ cpu_name = re.sub(
406
+ pattern=r"model name\s+:\s+", repl="", string=line
407
+ )
408
+ break
409
+ else:
410
+ cpu_name = "UnknownCPU"
411
+ # merge nearby spaces
412
+ return re.sub(pattern=r"\s+", repl=" ", string=cpu_name).strip()
413
+
414
+
415
+ def _get_mem_total() -> str:
416
+ mem_total: int = 0
417
+ if platform.system() == "Linux":
418
+ with open(file="/proc/meminfo", mode="r") as file:
419
+ mem_info_lines = file.readlines()
420
+ for line in mem_info_lines:
421
+ if "MemTotal" in line:
422
+ mem_total = int(re.sub(pattern=r"[^0-9]", repl="", string=line))
423
+ break
424
+ elif platform.system() == "Windows":
425
+ import psutil
426
+
427
+ mem_total = psutil.virtual_memory().total // (1024 * 1024)
428
+ return f"{mem_total / (1024**2):.3f}GiB"
429
+
430
+
431
+ def _get_ratio_color(ratio: float) -> str:
432
+ if ratio < 1:
433
+ return "#d63031" # red (worse than baseline)
434
+ elif ratio == 1:
435
+ return "black" # black (baseline)
436
+ elif ratio < 2:
437
+ return "#e67e22" # orange (similar/slightly better)
438
+ elif ratio < 4:
439
+ return "#f39c12" # amber (decent improvement)
440
+ elif ratio < 8:
441
+ return "#27ae60" # green (good)
442
+ elif ratio < 16:
443
+ return "#2980b9" # blue (great)
444
+ else:
445
+ return "#8e44ad" # purple (exceptional)
446
+
447
+
448
+ def _plot_relative_ops(
449
+ catagories: list[str], data: dict, doc_name: str, index_s: str
450
+ ) -> io.BytesIO:
451
+ libs = list(LIBRARIES_COLORS.keys())
452
+ colors = [LIBRARIES_COLORS[n] for n in libs]
453
+ n = len(catagories)
454
+ bar_width = 0.2
455
+ inner_pad = 0
456
+
457
+ fig, axs = plt.subplots(
458
+ 1,
459
+ n,
460
+ figsize=(4 * n, 6),
461
+ sharey=False,
462
+ tight_layout=True,
463
+ gridspec_kw={"wspace": 0},
464
+ )
465
+
466
+ x_positions = [i * (bar_width + inner_pad) for i in range(len(libs))]
467
+
468
+ for ax, cat in zip(axs, catagories):
469
+ vals = [1.0] + [data[cat][name]["ratio"] for name in libs[1:]]
470
+ gbps = (data[cat]["ssrjson_bytes_per_sec"]) / (1024**3)
471
+
472
+ for xi, val, col in zip(x_positions, vals, colors):
473
+ ax.bar(xi, val, width=bar_width, color=col)
474
+ ax.text(
475
+ xi,
476
+ val + 0.05,
477
+ f"{val:.2f}x",
478
+ ha="center",
479
+ va="bottom",
480
+ fontsize=9,
481
+ color=_get_ratio_color(val),
482
+ )
483
+
484
+ ssrjson_index = libs.index("ssrjson")
485
+ ax.text(
486
+ x_positions[ssrjson_index],
487
+ vals[ssrjson_index] / 2,
488
+ f"{gbps:.2f} GB/s",
489
+ ha="center",
490
+ va="center",
491
+ fontsize=10,
492
+ color="#2c3e50",
493
+ fontweight="bold",
494
+ )
495
+
496
+ # baseline line
497
+ ax.axhline(1.0, color="gray", linestyle="--", linewidth=1)
498
+ # height = 1.1 * max bar height
499
+ ax.set_ylim(0, max(vals + [1.0]) * 1.1)
500
+
501
+ # hide all tick
502
+ ax.tick_params(
503
+ axis="both",
504
+ which="both",
505
+ left=False,
506
+ bottom=False,
507
+ labelleft=False,
508
+ labelbottom=False,
509
+ )
510
+
511
+ # and spine
512
+ for spine in ("left", "top", "right"):
513
+ ax.spines[spine].set_visible(False)
514
+
515
+ ax.set_xlabel(cat, fontsize=10, labelpad=6)
516
+
517
+ fig.suptitle(
518
+ doc_name,
519
+ fontsize=20,
520
+ fontweight="bold",
521
+ y=0.98,
522
+ )
523
+
524
+ # color legend
525
+ legend_elements = [
526
+ plt.Line2D([0], [0], color=col, lw=4, label=name)
527
+ for name, col in LIBRARIES_COLORS.items()
528
+ ]
529
+ fig.legend(
530
+ handles=legend_elements,
531
+ loc="upper right",
532
+ bbox_to_anchor=(0.98, 0.95),
533
+ ncol=len(libs),
534
+ fontsize=14,
535
+ frameon=False,
536
+ )
537
+
538
+ fig.text(
539
+ 0.5,
540
+ 0,
541
+ "Higher is better",
542
+ ha="center",
543
+ va="bottom",
544
+ fontsize=8,
545
+ style="italic",
546
+ color="#555555",
547
+ )
548
+
549
+ buf = io.BytesIO()
550
+ plt.savefig(buf, format="svg", bbox_inches="tight")
551
+ buf.seek(0)
552
+ plt.close(fig)
553
+ return buf
554
+
555
+
556
+ def _draw_page_number(c: "canvas.Canvas", page_num: int):
557
+ from reportlab.lib.pagesizes import A4
558
+
559
+ width, _ = A4
560
+ c.setFont("Helvetica-Oblique", 8) # italic
561
+ c.setFillColorRGB(0.5, 0.5, 0.5) # grey
562
+ c.drawRightString(width - 40, 20, f"{page_num}")
563
+
564
+
565
+ def _generate_pdf_report(
566
+ figures: List[List[io.BytesIO]], header_text: str, output_pdf_path: str
567
+ ) -> str:
568
+ from reportlab.graphics import renderPDF
569
+ from reportlab.lib.pagesizes import A4
570
+ from reportlab.pdfgen import canvas
571
+ from svglib.svglib import svg2rlg
572
+
573
+ try:
574
+ from svglib.fonts import FontMap
575
+
576
+ font_map = FontMap()
577
+ font_map.register_default_fonts()
578
+ # workaround for matplotlib using 700 to represent bold font, but svg2rlg using 700 as normal.
579
+ font_map.register_font("Helvetica", weight="700", rlgFontName="Helvetica-Bold")
580
+ except ImportError:
581
+ font_map = None
582
+
583
+ c = canvas.Canvas(output_pdf_path, pagesize=A4)
584
+ width, height = A4
585
+
586
+ # heading info
587
+ heading = header_text.splitlines()
588
+ # first line is # header
589
+ header, heading_info = heading[0].removeprefix("#").strip(), heading[1:]
590
+ c.setFont(PDF_HEADING_FONT, 20)
591
+ text_obj = c.beginText(40, height - 50)
592
+ text_obj.textLine(header)
593
+ c.drawText(text_obj)
594
+
595
+ # Wrap heading_info lines if overflow
596
+ max_width = width - 80 # 40 margin on both sides
597
+ wrapped_heading_info = []
598
+ for line in heading_info:
599
+ while c.stringWidth(line, PDF_TEXT_FONT, 10) > max_width:
600
+ # Find a split point
601
+ split_idx = int(max_width // c.stringWidth(" ", PDF_TEXT_FONT, 10))
602
+ # Try to split at nearest space before split_idx
603
+ space_idx = line.rfind(" ", 0, split_idx)
604
+ if space_idx == -1:
605
+ space_idx = split_idx
606
+ wrapped_heading_info.append(line[:space_idx])
607
+ # TODO fixed indent
608
+ line = " " + line[space_idx:].lstrip()
609
+ wrapped_heading_info.append(line)
610
+ heading_info = wrapped_heading_info
611
+
612
+ c.setFont(PDF_TEXT_FONT, 10)
613
+ text_obj = c.beginText(40, height - 70)
614
+ for line in heading_info:
615
+ text_obj.textLine(line)
616
+ c.drawText(text_obj)
617
+
618
+ c.setFont("Helvetica-Oblique", 8)
619
+ text = "This report was generated by https://github.com/Nambers/ssrJSON-benchmark"
620
+ c.drawString(40, 20, text)
621
+ link_start = 40 + c.stringWidth("This report was generated by ")
622
+ link_end = link_start + c.stringWidth(
623
+ "https://github.com/Nambers/ssrJSON-benchmark"
624
+ )
625
+ text_height = 5 # Adjusted height to better fit the link area
626
+ c.linkURL(
627
+ "https://github.com/Nambers/ssrJSON-benchmark",
628
+ (link_start, 20, link_end, 20 + text_height),
629
+ relative=1,
630
+ )
631
+
632
+ header_lines = header_text.count("\n") + 1
633
+ header_height = header_lines * 14 + 10
634
+ # subheading spacing = 30
635
+ y_pos = height - header_height - 30
636
+ bottom_margin = 20
637
+ vertical_gap = 20
638
+
639
+ p = 0
640
+
641
+ for name, figs in zip(["speed"], figures):
642
+ text_obj = c.beginText()
643
+ text_obj.setTextOrigin(40, y_pos)
644
+ text_obj.setFont(PDF_HEADING_FONT, 14)
645
+ text_obj.textLine(f"{name}")
646
+ c.drawText(text_obj)
647
+ c.bookmarkHorizontal(name, 0, y_pos + 20)
648
+ c.addOutlineEntry(name, name, level=0)
649
+ y_pos -= 20
650
+ for svg_io in figs:
651
+ svg_io.seek(0)
652
+ drawing = svg2rlg(svg_io, font_map=font_map)
653
+
654
+ avail_w = width - 80
655
+ scale = avail_w / drawing.width
656
+ drawing.width *= scale
657
+ drawing.height *= scale
658
+ drawing.scale(scale, scale)
659
+
660
+ img_h = drawing.height
661
+ # no enough space
662
+ if y_pos - img_h - vertical_gap < bottom_margin:
663
+ _draw_page_number(c, p)
664
+ p += 1
665
+ c.showPage()
666
+ y_pos = height - bottom_margin
667
+
668
+ c.setStrokeColorRGB(0.9, 0.9, 0.9)
669
+ c.setLineWidth(0.4)
670
+ c.line(40, y_pos, width - 40, y_pos)
671
+
672
+ renderPDF.draw(drawing, c, 40, y_pos - img_h)
673
+ y_pos -= img_h + vertical_gap
674
+
675
+ _draw_page_number(c, p)
676
+ c.save()
677
+ return output_pdf_path
678
+
679
+
680
+ def _fetch_header(rev) -> str:
681
+ with open(os.path.join(CUR_DIR, "template.md"), "r") as f:
682
+ template = f.read()
683
+ return template.format(
684
+ REV=rev,
685
+ TIME=time.strftime("%Y-%m-%d %H:%M:%S %Z", time.localtime()),
686
+ OS=f"{platform.system()} {platform.machine()} {platform.release()} {platform.version()}",
687
+ PYTHON=sys.version,
688
+ ORJSON_VER=orjson.__version__,
689
+ UJSON_VER=ujson.__version__,
690
+ SIMD_FLAGS=ssrjson.get_current_features(),
691
+ CHIPSET=_get_cpu_name(),
692
+ MEM=_get_mem_total(),
693
+ )
694
+
695
+
696
+ def generate_report_pdf(result: BenchmarkFinalResult, file: str, out_dir: str = CWD):
697
+ """
698
+ Generate PDF report, using `result`.
699
+ """
700
+ catagories = result.catagories
701
+ file = file.removesuffix(".json")
702
+ report_name = f"{file}.pdf"
703
+
704
+ figures = []
705
+
706
+ index_s = "speed"
707
+ tmp = []
708
+ for bench_filename in result.results:
709
+ print(f"Processing {bench_filename} (PDF)")
710
+ tmp.append(
711
+ _plot_relative_ops(
712
+ catagories,
713
+ result.results[bench_filename],
714
+ bench_filename,
715
+ index_s,
716
+ )
717
+ )
718
+ figures.append(tmp)
719
+
720
+ template = _fetch_header(
721
+ file.removeprefix("benchmark_result_").removesuffix(".json")
722
+ )
723
+ out_path = _generate_pdf_report(
724
+ figures,
725
+ header_text=template,
726
+ output_pdf_path=os.path.join(out_dir, report_name),
727
+ )
728
+ print(f"Report saved to {out_path}")
729
+ return out_path
730
+
731
+
732
+ def generate_report_markdown(
733
+ result: BenchmarkFinalResult, file: str, out_dir: str = CWD
734
+ ):
735
+ """
736
+ Generate Markdown report, using `result`.
737
+ """
738
+ file = file.removesuffix(".json")
739
+ report_name = f"{file}.md"
740
+ report_folder = os.path.join(out_dir, f"{file}_report")
741
+
742
+ # mkdir
743
+ if not os.path.exists(report_folder):
744
+ os.makedirs(report_folder)
745
+
746
+ template = _fetch_header(
747
+ file.removeprefix("benchmark_result_").removesuffix(".json")
748
+ )
749
+
750
+ index_s = "speed"
751
+ template += f"\n\n## {index_s}\n\n"
752
+ for bench_filename in result.results:
753
+ print(f"Processing {bench_filename} (Markdown)")
754
+ with open(
755
+ os.path.join(report_folder, bench_filename + ".svg"), "wb"
756
+ ) as svg_file:
757
+ svg_file.write(
758
+ _plot_relative_ops(
759
+ result.catagories,
760
+ result.results[bench_filename],
761
+ bench_filename,
762
+ index_s,
763
+ ).getvalue()
764
+ )
765
+ # add svg
766
+ template += f"![{bench_filename}](./{bench_filename}.svg)\n\n"
767
+
768
+ ret = os.path.join(report_folder, report_name)
769
+ with open(ret, "w") as f:
770
+ f.write(template)
771
+ print(f"Report saved to {ret}")
772
+ return ret
773
+
774
+
775
+ def parse_file_result(j):
776
+ return BenchmarkFinalResult.parse(j)
777
+
778
+
779
+ def is_unix_except_macos():
780
+ system = platform.system()
781
+ return system in ("Linux", "AIX", "FreeBSD")
782
+
783
+
784
+ def _set_multiprocessing_start_method():
785
+ try:
786
+ multiprocessing.set_start_method("fork")
787
+ except RuntimeError as e:
788
+ if "context has already been set" not in str(e):
789
+ raise
790
+
791
+
792
+ def run_benchmark(
793
+ files: list[pathlib.Path],
794
+ process_bytes: int,
795
+ bin_process_bytes: int,
796
+ ):
797
+ """
798
+ Generate a JSON result of benchmark.
799
+ Also returns a result object.
800
+ """
801
+ # Set multiprocessing start method to fork, if Python version is 3.14+ on Unix
802
+ if sys.version_info >= (3, 14) and is_unix_except_macos():
803
+ _set_multiprocessing_start_method()
804
+
805
+ file = _get_real_output_file_name()
806
+
807
+ result = BenchmarkFinalResult()
808
+ result.results = dict()
809
+
810
+ benchmark_libraries = _get_benchmark_libraries()
811
+
812
+ result.catagories = sorted(list(benchmark_libraries.keys()))
813
+
814
+ for bench_file in files:
815
+ k, v = _run_file_benchmark(
816
+ benchmark_libraries, bench_file, process_bytes, bin_process_bytes
817
+ )
818
+ result.results[k] = v
819
+ output_result = result.dumps()
820
+
821
+ if os.path.exists(file):
822
+ os.remove(file)
823
+
824
+ with open(f"{file}", "w", encoding="utf-8") as f:
825
+ f.write(output_result)
826
+ return result, file