transcrypto 1.8.0__py3-none-any.whl → 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
transcrypto/base.py DELETED
@@ -1,1637 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright 2026 Daniel Balparda <balparda@github.com>
2
- # SPDX-License-Identifier: Apache-2.0
3
- """Balparda's TransCrypto base library."""
4
-
5
- from __future__ import annotations
6
-
7
- import abc as abstract
8
- import base64
9
- import codecs
10
- import dataclasses
11
- import datetime
12
- import enum
13
- import functools
14
- import hashlib
15
- import json
16
- import logging
17
- import math
18
- import pathlib
19
- import pickle # noqa: S403
20
- import secrets
21
- import sys
22
- import time
23
- from collections import abc
24
- from types import TracebackType
25
- from typing import (
26
- Any,
27
- Protocol,
28
- Self,
29
- cast,
30
- final,
31
- runtime_checkable,
32
- )
33
-
34
- import numpy as np
35
- import zstandard
36
- from scipy import stats
37
-
38
- # Data conversion utils
39
-
40
- # JSON types
41
- type JSONValue = bool | int | float | str | list[JSONValue] | dict[str, JSONValue] | None
42
- type JSONDict = dict[str, JSONValue]
43
-
44
- # Crypto types: add bytes for cryptographic data; has to be encoded for JSON serialization
45
- type CryptValue = bool | int | float | str | bytes | list[CryptValue] | dict[str, CryptValue] | None
46
- type CryptDict = dict[str, CryptValue]
47
- _JSON_DATACLASS_TYPES: set[str] = {
48
- # native support
49
- 'int',
50
- 'float',
51
- 'str',
52
- 'bool',
53
- # support for lists for now, but no nested lists or dicts yet
54
- 'list[int]',
55
- 'list[float]',
56
- 'list[str]',
57
- 'list[bool]',
58
- # need conversion/encoding: see CryptValue/CryptDict
59
- 'bytes',
60
- }
61
-
62
- BytesToHex: abc.Callable[[bytes], str] = lambda b: b.hex()
63
- BytesToInt: abc.Callable[[bytes], int] = lambda b: int.from_bytes(b, 'big', signed=False)
64
- BytesToEncoded: abc.Callable[[bytes], str] = lambda b: base64.urlsafe_b64encode(b).decode('ascii')
65
-
66
- HexToBytes: abc.Callable[[str], bytes] = bytes.fromhex
67
- IntToFixedBytes: abc.Callable[[int, int], bytes] = lambda i, n: i.to_bytes(n, 'big', signed=False)
68
- IntToBytes: abc.Callable[[int], bytes] = lambda i: IntToFixedBytes(i, (i.bit_length() + 7) // 8)
69
- IntToEncoded: abc.Callable[[int], str] = lambda i: BytesToEncoded(IntToBytes(i))
70
- EncodedToBytes: abc.Callable[[str], bytes] = lambda e: base64.urlsafe_b64decode(e.encode('ascii'))
71
-
72
- PadBytesTo: abc.Callable[[bytes, int], bytes] = lambda b, i: b.rjust((i + 7) // 8, b'\x00')
73
-
74
- # Time utils
75
-
76
- MIN_TM = int(datetime.datetime(2000, 1, 1, 0, 0, 0, tzinfo=datetime.UTC).timestamp())
77
- TIME_FORMAT = '%Y/%b/%d-%H:%M:%S-UTC'
78
- TimeStr: abc.Callable[[int | float | None], str] = lambda tm: (
79
- time.strftime(TIME_FORMAT, time.gmtime(tm)) if tm else '-'
80
- )
81
- Now: abc.Callable[[], int] = lambda: int(time.time())
82
- StrNow: abc.Callable[[], str] = lambda: TimeStr(Now())
83
-
84
- # SI prefix table, powers of 1000
85
- _SI_PREFIXES: dict[int, str] = {
86
- -6: 'a', # atto
87
- -5: 'f', # femto
88
- -4: 'p', # pico
89
- -3: 'n', # nano
90
- -2: 'µ', # micro (unicode U+00B5) # noqa: RUF001
91
- -1: 'm', # milli
92
- 0: '', # base
93
- 1: 'k', # kilo
94
- 2: 'M', # mega
95
- 3: 'G', # giga
96
- 4: 'T', # tera
97
- 5: 'P', # peta
98
- 6: 'E', # exa
99
- }
100
-
101
- # these control the pickling of data, do NOT ever change, or you will break all databases
102
- # <https://docs.python.org/3/library/pickle.html#pickle.DEFAULT_PROTOCOL>
103
- _PICKLE_PROTOCOL = 4 # protocol 4 available since python v3.8 # do NOT ever change!
104
- PickleGeneric: abc.Callable[[Any], bytes] = lambda o: pickle.dumps(o, protocol=_PICKLE_PROTOCOL)
105
- UnpickleGeneric: abc.Callable[[bytes], Any] = pickle.loads # noqa: S301
106
- PickleJSON: abc.Callable[[JSONDict], bytes] = lambda d: json.dumps(d, separators=(',', ':')).encode(
107
- 'utf-8'
108
- )
109
- UnpickleJSON: abc.Callable[[bytes], JSONDict] = lambda b: json.loads(b.decode('utf-8'))
110
- _PICKLE_AAD = b'transcrypto.base.Serialize.1.0' # do NOT ever change!
111
- # these help find compressed files, do NOT change unless zstandard changes
112
- _ZSTD_MAGIC_FRAME = 0xFD2FB528
113
- _ZSTD_MAGIC_SKIPPABLE_MIN = 0x184D2A50
114
- _ZSTD_MAGIC_SKIPPABLE_MAX = 0x184D2A5F
115
-
116
-
117
- class Error(Exception):
118
- """TransCrypto exception."""
119
-
120
-
121
- class InputError(Error):
122
- """Input exception (TransCrypto)."""
123
-
124
-
125
- class CryptoError(Error):
126
- """Cryptographic exception (TransCrypto)."""
127
-
128
-
129
- class ImplementationError(Error, NotImplementedError):
130
- """Feature is not implemented yet (TransCrypto)."""
131
-
132
-
133
- def HumanizedBytes(inp_sz: float, /) -> str: # noqa: PLR0911
134
- """Convert a byte count into a human-readable string using binary prefixes (powers of 1024).
135
-
136
- Scales the input size by powers of 1024, returning a value with the
137
- appropriate IEC binary unit suffix: `B`, `KiB`, `MiB`, `GiB`, `TiB`, `PiB`, `EiB`.
138
-
139
- Args:
140
- inp_sz (int | float): Size in bytes. Must be non-negative.
141
-
142
- Returns:
143
- str: Formatted size string with up to two decimal places for units above bytes.
144
-
145
- Raises:
146
- InputError: If `inp_sz` is negative.
147
-
148
- Notes:
149
- - Units follow the IEC binary standard where:
150
- 1 KiB = 1024 bytes
151
- 1 MiB = 1024 KiB
152
- 1 GiB = 1024 MiB
153
- 1 TiB = 1024 GiB
154
- 1 PiB = 1024 TiB
155
- 1 EiB = 1024 PiB
156
- - Values under 1024 bytes are returned as an integer with a space and `B`.
157
-
158
- Examples:
159
- >>> HumanizedBytes(512)
160
- '512 B'
161
- >>> HumanizedBytes(2048)
162
- '2.00 KiB'
163
- >>> HumanizedBytes(5 * 1024**3)
164
- '5.00 GiB'
165
-
166
- """
167
- if inp_sz < 0:
168
- raise InputError(f'input should be >=0 and got {inp_sz}')
169
- if inp_sz < 1024: # noqa: PLR2004
170
- return f'{inp_sz} B' if isinstance(inp_sz, int) else f'{inp_sz:0.3f} B'
171
- if inp_sz < 1024 * 1024:
172
- return f'{(inp_sz / 1024):0.3f} KiB'
173
- if inp_sz < 1024 * 1024 * 1024:
174
- return f'{(inp_sz / (1024 * 1024)):0.3f} MiB'
175
- if inp_sz < 1024 * 1024 * 1024 * 1024:
176
- return f'{(inp_sz / (1024 * 1024 * 1024)):0.3f} GiB'
177
- if inp_sz < 1024 * 1024 * 1024 * 1024 * 1024:
178
- return f'{(inp_sz / (1024 * 1024 * 1024 * 1024)):0.3f} TiB'
179
- if inp_sz < 1024 * 1024 * 1024 * 1024 * 1024 * 1024:
180
- return f'{(inp_sz / (1024 * 1024 * 1024 * 1024 * 1024)):0.3f} PiB'
181
- return f'{(inp_sz / (1024 * 1024 * 1024 * 1024 * 1024 * 1024)):0.3f} EiB'
182
-
183
-
184
- def HumanizedDecimal(inp_sz: float, /, *, unit: str = '') -> str:
185
- """Convert a numeric value into a human-readable string using SI metric prefixes.
186
-
187
- Scales the input value by powers of 1000, returning a value with the
188
- appropriate SI unit prefix. Supports both large multiples (kilo, mega,
189
- giga, … exa) and small sub-multiples (milli, micro, nano, pico, femto, atto).
190
-
191
- Notes:
192
- • Uses decimal multiples: 1 k = 1000 units, 1 m = 1/1000 units.
193
- • Supported large prefixes: k, M, G, T, P, E.
194
- • Supported small prefixes: m, µ, n, p, f, a.
195
- • Unit string is stripped of surrounding whitespace before use.
196
- • Zero is returned as '0' plus unit (no prefix).
197
-
198
- Examples:
199
- >>> HumanizedDecimal(950)
200
- '950'
201
- >>> HumanizedDecimal(1500)
202
- '1.50 k'
203
- >>> HumanizedDecimal(0.123456, unit='V')
204
- '123.456 mV'
205
- >>> HumanizedDecimal(3.2e-7, unit='F')
206
- '320.000 nF'
207
- >>> HumanizedDecimal(9.14e18, unit='Hz')
208
- '9.14 EHz'
209
-
210
- Args:
211
- inp_sz (int | float): Quantity to convert. Must be finite.
212
- unit (str, optional): Base unit to append to the result (e.g., 'Hz', 'm').
213
- If given, it will be separated by a space for unscaled values and
214
- concatenated to the prefix for scaled values.
215
-
216
- Returns:
217
- str: Formatted string with a few decimal places
218
-
219
- Raises:
220
- InputError: If `inp_sz` is not finite.
221
-
222
- """ # noqa: RUF002
223
- if not math.isfinite(inp_sz):
224
- raise InputError(f'input should finite; got {inp_sz!r}')
225
- unit = unit.strip()
226
- pad_unit: str = ' ' + unit if unit else ''
227
- if inp_sz == 0:
228
- return '0' + pad_unit
229
- neg: str = '-' if inp_sz < 0 else ''
230
- inp_sz = abs(inp_sz)
231
- # Find exponent of 1000 that keeps value in [1, 1000)
232
- exp: int = math.floor(math.log10(abs(inp_sz)) / 3)
233
- exp = max(min(exp, max(_SI_PREFIXES)), min(_SI_PREFIXES)) # clamp to supported range
234
- if not exp:
235
- # No scaling: use int or 4-decimal float
236
- if isinstance(inp_sz, int) or inp_sz.is_integer():
237
- return f'{neg}{int(inp_sz)}{pad_unit}'
238
- return f'{neg}{inp_sz:0.3f}{pad_unit}'
239
- # scaled
240
- scaled: float = inp_sz / (1000**exp)
241
- prefix: str = _SI_PREFIXES[exp]
242
- return f'{neg}{scaled:0.3f} {prefix}{unit}'
243
-
244
-
245
- def HumanizedSeconds(inp_secs: float, /) -> str: # noqa: PLR0911
246
- """Convert a duration in seconds into a human-readable time string.
247
-
248
- Selects the appropriate time unit based on the duration's magnitude:
249
- - microseconds (`µs`)
250
- - milliseconds (`ms`)
251
- - seconds (`s`)
252
- - minutes (`min`)
253
- - hours (`h`)
254
- - days (`d`)
255
-
256
- Args:
257
- inp_secs (int | float): Time interval in seconds. Must be finite and non-negative.
258
-
259
- Returns:
260
- str: Human-readable string with the duration and unit
261
-
262
- Raises:
263
- InputError: If `inp_secs` is negative or not finite.
264
-
265
- Notes:
266
- - Uses the micro sign (`µ`, U+00B5) for microseconds.
267
- - Thresholds:
268
- < 0.001 s → µs
269
- < 1 s → ms
270
- < 60 s → seconds
271
- < 3600 s → minutes
272
- < 86400 s → hours
273
- ≥ 86400 s → days
274
-
275
- Examples:
276
- >>> HumanizedSeconds(0)
277
- '0.00 s'
278
- >>> HumanizedSeconds(0.000004)
279
- '4.000 µs'
280
- >>> HumanizedSeconds(0.25)
281
- '250.000 ms'
282
- >>> HumanizedSeconds(42)
283
- '42.00 s'
284
- >>> HumanizedSeconds(3661)
285
- '1.02 h'
286
-
287
- """ # noqa: RUF002
288
- if not math.isfinite(inp_secs) or inp_secs < 0:
289
- raise InputError(f'input should be >=0 and got {inp_secs}')
290
- if inp_secs == 0:
291
- return '0.000 s'
292
- inp_secs = float(inp_secs)
293
- if inp_secs < 0.001: # noqa: PLR2004
294
- return f'{inp_secs * 1000 * 1000:0.3f} µs' # noqa: RUF001
295
- if inp_secs < 1:
296
- return f'{inp_secs * 1000:0.3f} ms'
297
- if inp_secs < 60: # noqa: PLR2004
298
- return f'{inp_secs:0.3f} s'
299
- if inp_secs < 60 * 60:
300
- return f'{(inp_secs / 60):0.3f} min'
301
- if inp_secs < 24 * 60 * 60:
302
- return f'{(inp_secs / (60 * 60)):0.3f} h'
303
- return f'{(inp_secs / (24 * 60 * 60)):0.3f} d'
304
-
305
-
306
- def MeasurementStats(
307
- data: list[int | float], /, *, confidence: float = 0.95
308
- ) -> tuple[int, float, float, float, tuple[float, float], float]:
309
- """Compute descriptive statistics for repeated measurements.
310
-
311
- Given N ≥ 1 measurements, this function computes the sample mean, the
312
- standard error of the mean (SEM), and the symmetric error estimate for
313
- the chosen confidence interval using Student's t distribution.
314
-
315
- Notes:
316
- • If only one measurement is given, SEM and error are reported as +∞ and
317
- the confidence interval is (-∞, +∞).
318
- • This function assumes the underlying distribution is approximately
319
- normal, or n is large enough for the Central Limit Theorem to apply.
320
-
321
- Args:
322
- data (list[int | float]): Sequence of numeric measurements.
323
- confidence (float, optional): Confidence level for the interval, 0.5 <= confidence < 1;
324
- defaults to 0.95 (95% confidence interval).
325
-
326
- Returns:
327
- tuple:
328
- - n (int): number of measurements.
329
- - mean (float): arithmetic mean of the data
330
- - sem (float): standard error of the mean, sigma / √n
331
- - error (float): half-width of the confidence interval (mean ± error)
332
- - ci (tuple[float, float]): lower and upper confidence interval bounds
333
- - confidence (float): the confidence level used
334
-
335
- Raises:
336
- InputError: if the input list is empty.
337
-
338
- """
339
- # test inputs
340
- n: int = len(data)
341
- if not n:
342
- raise InputError('no data')
343
- if not 0.5 <= confidence < 1.0: # noqa: PLR2004
344
- raise InputError(f'invalid confidence: {confidence=}')
345
- # solve trivial case
346
- if n == 1:
347
- return (n, float(data[0]), math.inf, math.inf, (-math.inf, math.inf), confidence)
348
- # call scipy for the science data
349
- np_data = np.array(data)
350
- mean = np.mean(np_data)
351
- sem = stats.sem(np_data)
352
- ci = stats.t.interval(confidence, n - 1, loc=mean, scale=sem)
353
- t_crit = stats.t.ppf((1.0 + confidence) / 2.0, n - 1)
354
- error = t_crit * sem # half-width of the CI
355
- return (n, float(mean), float(sem), float(error), (float(ci[0]), float(ci[1])), confidence)
356
-
357
-
358
- def HumanizedMeasurements(
359
- data: list[int | float],
360
- /,
361
- *,
362
- unit: str = '',
363
- parser: abc.Callable[[float], str] | None = None,
364
- clip_negative: bool = True,
365
- confidence: float = 0.95,
366
- ) -> str:
367
- """Render measurement statistics as a human-readable string.
368
-
369
- Uses `MeasurementStats()` to compute mean and uncertainty, and formats the
370
- result with units, sample count, and confidence interval. Negative values
371
- can optionally be clipped to zero and marked with a leading “*”.
372
-
373
- Notes:
374
- • For a single measurement, error is displayed as “± ?”.
375
- • The output includes the number of samples (@n) and the confidence
376
- interval unless a different confidence was requested upstream.
377
-
378
- Args:
379
- data (list[int | float]): Sequence of numeric measurements.
380
- unit (str, optional): Unit of measurement to append, e.g. "ms" or "s".
381
- Defaults to '' (no unit).
382
- parser (Callable[[float], str] | None, optional): Custom float-to-string
383
- formatter. If None, values are formatted with 3 decimal places.
384
- clip_negative (bool, optional): If True (default), negative values are
385
- clipped to 0.0 and prefixed with '*'.
386
- confidence (float, optional): Confidence level for the interval, 0.5 <= confidence < 1;
387
- defaults to 0.95 (95% confidence interval).
388
-
389
- Returns:
390
- str: A formatted summary string, e.g.: '9.720 ± 1.831 ms [5.253 … 14.187]95%CI@5'
391
-
392
- """
393
- n: int
394
- mean: float
395
- error: float
396
- ci: tuple[float, float]
397
- conf: float
398
- unit = unit.strip()
399
- n, mean, _, error, ci, conf = MeasurementStats(data, confidence=confidence)
400
- f: abc.Callable[[float], str] = lambda x: (
401
- ('*0' if clip_negative and x < 0.0 else str(x))
402
- if parser is None
403
- else (f'*{parser(0.0)}' if clip_negative and x < 0.0 else parser(x))
404
- )
405
- if n == 1:
406
- return f'{f(mean)}{unit} ±? @1'
407
- pct: int = round(conf * 100)
408
- return f'{f(mean)}{unit} ± {f(error)}{unit} [{f(ci[0])}{unit} … {f(ci[1])}{unit}]{pct}%CI@{n}'
409
-
410
-
411
- class Timer:
412
- """An execution timing class that can be used as both a context manager and a decorator.
413
-
414
- Examples:
415
- # As a context manager
416
- with Timer('Block timing'):
417
- time.sleep(1.2)
418
-
419
- # As a decorator
420
- @Timer('Function timing')
421
- def slow_function():
422
- time.sleep(0.8)
423
-
424
- # As a regular object
425
- tm = Timer('Inline timing')
426
- tm.Start()
427
- time.sleep(0.1)
428
- tm.Stop()
429
- print(tm)
430
-
431
- Attributes:
432
- label (str, optional): Timer label
433
- emit_log (bool, optional): If True (default) will logging.info() the timer, else will not
434
- emit_print (bool, optional): If True will print() the timer, else (default) will not
435
-
436
- """
437
-
438
- def __init__(
439
- self,
440
- label: str = '',
441
- /,
442
- *,
443
- emit_log: bool = True,
444
- emit_print: abc.Callable[[str], None] | None = None,
445
- ) -> None:
446
- """Initialize the Timer.
447
-
448
- Args:
449
- label (str, optional): A description or name for the timed block or function
450
- emit_log (bool, optional): Emit a log message when finished; default is True
451
- emit_print (Callable[[str], None] | None, optional): Emit a print() message when
452
- finished using the provided callable; default is None
453
-
454
- """
455
- self.emit_log: bool = emit_log
456
- self.emit_print: abc.Callable[[str], None] | None = emit_print
457
- self.label: str = label.strip()
458
- self.start: float | None = None
459
- self.end: float | None = None
460
-
461
- @property
462
- def elapsed(self) -> float:
463
- """Elapsed time. Will be zero until a measurement is available with start/end.
464
-
465
- Raises:
466
- Error: negative elapsed time
467
-
468
- Returns:
469
- float: elapsed time, in seconds
470
-
471
- """
472
- if self.start is None or self.end is None:
473
- return 0.0
474
- delta: float = self.end - self.start
475
- if delta <= 0.0:
476
- raise Error(f'negative/zero delta: {delta}')
477
- return delta
478
-
479
- def __str__(self) -> str:
480
- """Get current timer value.
481
-
482
- Returns:
483
- str: human-readable representation of current time value
484
-
485
- """
486
- if self.start is None:
487
- return f'{self.label}: <UNSTARTED>' if self.label else '<UNSTARTED>'
488
- if self.end is None:
489
- return (
490
- f'{self.label}: ' if self.label else ''
491
- ) + f'<PARTIAL> {HumanizedSeconds(time.perf_counter() - self.start)}'
492
- return (f'{self.label}: ' if self.label else '') + f'{HumanizedSeconds(self.elapsed)}'
493
-
494
- def Start(self) -> None:
495
- """Start the timer.
496
-
497
- Raises:
498
- Error: if you try to re-start the timer
499
-
500
- """
501
- if self.start is not None:
502
- raise Error('Re-starting timer is forbidden')
503
- self.start = time.perf_counter()
504
-
505
- def __enter__(self) -> Self:
506
- """Start the timer when entering the context.
507
-
508
- Returns:
509
- Timer: context object (self)
510
-
511
- """
512
- self.Start()
513
- return self
514
-
515
- def Stop(self) -> None:
516
- """Stop the timer and emit logging.info with timer message.
517
-
518
- Raises:
519
- Error: trying to re-start timer or stop unstarted timer
520
-
521
- """
522
- if self.start is None:
523
- raise Error('Stopping an unstarted timer')
524
- if self.end is not None:
525
- raise Error('Re-stopping timer is forbidden')
526
- self.end = time.perf_counter()
527
- message: str = str(self)
528
- if self.emit_log:
529
- logging.info(message)
530
- if self.emit_print is not None:
531
- self.emit_print(message)
532
-
533
- def __exit__(
534
- self,
535
- unused_exc_type: type[BaseException] | None,
536
- unused_exc_val: BaseException | None,
537
- exc_tb: TracebackType | None,
538
- ) -> None:
539
- """Stop the timer when exiting the context."""
540
- self.Stop()
541
-
542
- def __call__[**F, R](self, func: abc.Callable[F, R]) -> abc.Callable[F, R]:
543
- """Allow the Timer to be used as a decorator.
544
-
545
- Args:
546
- func: The function to time.
547
-
548
- Returns:
549
- The wrapped function with timing behavior.
550
-
551
- """
552
-
553
- @functools.wraps(func)
554
- def _Wrapper(*args: F.args, **kwargs: F.kwargs) -> R:
555
- with self.__class__(self.label, emit_log=self.emit_log, emit_print=self.emit_print):
556
- return func(*args, **kwargs)
557
-
558
- return _Wrapper
559
-
560
-
561
- def RandBits(n_bits: int, /) -> int:
562
- """Crypto-random integer with guaranteed `n_bits` size (i.e., first bit == 1).
563
-
564
- The fact that the first bit will be 1 means the entropy is ~ (n_bits-1) and
565
- because of this we only allow for a byte or more bits generated. This drawback
566
- is negligible for the large integers a crypto library will work with, in practice.
567
-
568
- Args:
569
- n_bits (int): number of bits to produce, ≥ 8
570
-
571
- Returns:
572
- int with n_bits size
573
-
574
- Raises:
575
- InputError: invalid n_bits
576
-
577
- """
578
- # test inputs
579
- if n_bits < 8: # noqa: PLR2004
580
- raise InputError(f'n_bits must be ≥ 8: {n_bits}')
581
- # call underlying method
582
- n: int = 0
583
- while n.bit_length() != n_bits:
584
- n = secrets.randbits(n_bits) # we could just set the bit, but IMO it is better to get another
585
- return n
586
-
587
-
588
- def RandInt(min_int: int, max_int: int, /) -> int:
589
- """Crypto-random integer uniform over [min_int, max_int].
590
-
591
- Args:
592
- min_int (int): minimum integer, inclusive, ≥ 0
593
- max_int (int): maximum integer, inclusive, > min_int
594
-
595
- Returns:
596
- int between [min_int, max_int] inclusive
597
-
598
- Raises:
599
- InputError: invalid min/max
600
-
601
- """
602
- # test inputs
603
- if min_int < 0 or min_int >= max_int:
604
- raise InputError(f'min_int must be ≥ 0, and < max_int: {min_int} / {max_int}')
605
- # uniform over [min_int, max_int]
606
- span: int = max_int - min_int + 1
607
- n: int = min_int + secrets.randbelow(span)
608
- assert min_int <= n <= max_int, 'should never happen: generated number out of range' # noqa: S101
609
- return n
610
-
611
-
612
- def RandShuffle[T](seq: abc.MutableSequence[T], /) -> None:
613
- """In-place Crypto-random shuffle order for `seq` mutable sequence.
614
-
615
- Args:
616
- seq (MutableSequence[T]): any mutable sequence with 2 or more elements
617
-
618
- Raises:
619
- InputError: not enough elements
620
-
621
- """
622
- # test inputs
623
- if (n_seq := len(seq)) < 2: # noqa: PLR2004
624
- raise InputError(f'seq must have 2 or more elements: {n_seq}')
625
- # cryptographically sound Fisher-Yates using secrets.randbelow
626
- for i in range(n_seq - 1, 0, -1):
627
- j: int = secrets.randbelow(i + 1)
628
- seq[i], seq[j] = seq[j], seq[i]
629
-
630
-
631
- def RandBytes(n_bytes: int, /) -> bytes:
632
- """Crypto-random `n_bytes` bytes. Just plain good quality random bytes.
633
-
634
- Args:
635
- n_bytes (int): number of bits to produce, > 0
636
-
637
- Returns:
638
- bytes: random with len()==n_bytes
639
-
640
- Raises:
641
- InputError: invalid n_bytes
642
-
643
- """
644
- # test inputs
645
- if n_bytes < 1:
646
- raise InputError(f'n_bytes must be ≥ 1: {n_bytes}')
647
- # return from system call
648
- b: bytes = secrets.token_bytes(n_bytes)
649
- assert len(b) == n_bytes, 'should never happen: generated bytes incorrect size' # noqa: S101
650
- return b
651
-
652
-
653
- def GCD(a: int, b: int, /) -> int:
654
- """Greatest Common Divisor for `a` and `b`, integers ≥0. Uses the Euclid method.
655
-
656
- O(log(min(a, b)))
657
-
658
- Args:
659
- a (int): integer a ≥ 0
660
- b (int): integer b ≥ 0 (can't be both zero)
661
-
662
- Returns:
663
- gcd(a, b)
664
-
665
- Raises:
666
- InputError: invalid inputs
667
-
668
- """
669
- # test inputs
670
- if a < 0 or b < 0 or (not a and not b):
671
- raise InputError(f'negative input or undefined gcd(0, 0): {a=} , {b=}')
672
- # algo needs to start with a >= b
673
- if a < b:
674
- a, b = b, a
675
- # euclid
676
- while b:
677
- r: int = a % b
678
- a, b = b, r
679
- return a
680
-
681
-
682
- def ExtendedGCD(a: int, b: int, /) -> tuple[int, int, int]:
683
- """Greatest Common Divisor Extended for `a` and `b`, integers ≥0. Uses the Euclid method.
684
-
685
- O(log(min(a, b)))
686
-
687
- Args:
688
- a (int): integer a ≥ 0
689
- b (int): integer b ≥ 0 (can't be both zero)
690
-
691
- Returns:
692
- (gcd, x, y) so that a * x + b * y = gcd
693
- x and y may be negative integers or zero but won't be both zero.
694
-
695
- Raises:
696
- InputError: invalid inputs
697
-
698
- """
699
- # test inputs
700
- if a < 0 or b < 0 or (not a and not b):
701
- raise InputError(f'negative input or undefined gcd(0, 0): {a=} , {b=}')
702
- # algo needs to start with a >= b (but we remember if we did swap)
703
- swapped = False
704
- if a < b:
705
- a, b = b, a
706
- swapped = True
707
- # trivial case
708
- if not b:
709
- return (a, 0 if swapped else 1, 1 if swapped else 0)
710
- # euclid
711
- x1, x2, y1, y2 = 0, 1, 1, 0
712
- while b:
713
- q, r = divmod(a, b)
714
- x, y = x2 - q * x1, y2 - q * y1
715
- a, b, x1, x2, y1, y2 = b, r, x, x1, y, y1
716
- return (a, y2 if swapped else x2, x2 if swapped else y2)
717
-
718
-
719
- def Hash256(data: bytes, /) -> bytes:
720
- """SHA-256 hash of bytes data. Always a length of 32 bytes.
721
-
722
- Args:
723
- data (bytes): Data to compute hash for
724
-
725
- Returns:
726
- 32 bytes (256 bits) of SHA-256 hash;
727
- if converted to hexadecimal (with BytesToHex() or hex()) will be 64 chars of string;
728
- if converted to int (big-endian, unsigned, with BytesToInt()) will be 0 ≤ i < 2**256
729
-
730
- """
731
- return hashlib.sha256(data).digest()
732
-
733
-
734
- def Hash512(data: bytes, /) -> bytes:
735
- """SHA-512 hash of bytes data. Always a length of 64 bytes.
736
-
737
- Args:
738
- data (bytes): Data to compute hash for
739
-
740
- Returns:
741
- 64 bytes (512 bits) of SHA-512 hash;
742
- if converted to hexadecimal (with BytesToHex() or hex()) will be 128 chars of string;
743
- if converted to int (big-endian, unsigned, with BytesToInt()) will be 0 ≤ i < 2**512
744
-
745
- """
746
- return hashlib.sha512(data).digest()
747
-
748
-
749
- def FileHash(full_path: str, /, *, digest: str = 'sha256') -> bytes:
750
- """SHA-256 hex hash of file on disk. Always a length of 32 bytes (if default digest=='sha256').
751
-
752
- Args:
753
- full_path (str): Path to existing file on disk
754
- digest (str, optional): Hash method to use, accepts 'sha256' (default) or 'sha512'
755
-
756
- Returns:
757
- 32 bytes (256 bits) of SHA-256 hash (if default digest=='sha256');
758
- if converted to hexadecimal (with BytesToHex() or hex()) will be 64 chars of string;
759
- if converted to int (big-endian, unsigned, with BytesToInt()) will be 0 ≤ i < 2**256
760
-
761
- Raises:
762
- InputError: file could not be found
763
-
764
- """
765
- # test inputs
766
- digest = digest.lower().strip().replace('-', '') # normalize so we can accept e.g. "SHA-256"
767
- if digest not in {'sha256', 'sha512'}:
768
- raise InputError(f'unrecognized digest: {digest!r}')
769
- full_path = full_path.strip()
770
- if not full_path or not pathlib.Path(full_path).exists():
771
- raise InputError(f'file {full_path!r} not found for hashing')
772
- # compute hash
773
- logging.info(f'Hashing file {full_path!r}')
774
- with pathlib.Path(full_path).open('rb') as file_obj:
775
- return hashlib.file_digest(file_obj, digest).digest()
776
-
777
-
778
- def ObfuscateSecret(data: str | bytes | int, /) -> str:
779
- """Obfuscate a secret string/key/bytes/int by hashing SHA-512 and only showing the first 4 bytes.
780
-
781
- Always a length of 9 chars, e.g. "aabbccdd…" (always adds '…' at the end).
782
- Known vulnerability: If the secret is small, can be brute-forced!
783
- Use only on large (~>64bits) secrets.
784
-
785
- Args:
786
- data (str | bytes | int): Data to obfuscate
787
-
788
- Raises:
789
- InputError: _description_
790
-
791
- Returns:
792
- str: obfuscated string, e.g. "aabbccdd…"
793
-
794
- """
795
- if isinstance(data, str):
796
- data = data.encode('utf-8')
797
- elif isinstance(data, int):
798
- data = IntToBytes(data)
799
- if not isinstance(data, bytes): # pyright: ignore[reportUnnecessaryIsInstance]
800
- raise InputError(f'invalid type for data: {type(data)}')
801
- return BytesToHex(Hash512(data))[:8] + '…'
802
-
803
-
804
- class CryptoInputType(enum.StrEnum):
805
- """Types of inputs that can represent arbitrary bytes."""
806
-
807
- # prefixes; format prefixes are all 4 bytes
808
- PATH = '@' # @path on disk → read bytes from a file
809
- STDIN = '@-' # stdin
810
- HEX = 'hex:' # hex:deadbeef → decode hex
811
- BASE64 = 'b64:' # b64:... → decode base64
812
- STR = 'str:' # str:hello → UTF-8 encode the literal
813
- RAW = 'raw:' # raw:... → byte literals via \\xNN escapes (rare but handy)
814
-
815
-
816
- def BytesToRaw(b: bytes, /) -> str:
817
- r"""Convert bytes to double-quoted string with \\xNN escapes where needed.
818
-
819
- 1. map bytes 0..255 to same code points (latin1)
820
- 2. escape non-printables/backslash/quotes via unicode_escape
821
-
822
- Args:
823
- b (bytes): input
824
-
825
- Returns:
826
- str: double-quoted string with \\xNN escapes where needed
827
-
828
- """
829
- inner: str = b.decode('latin1').encode('unicode_escape').decode('ascii')
830
- return f'"{inner.replace('"', r"\"")}"'
831
-
832
-
833
- def RawToBytes(s: str, /) -> bytes:
834
- r"""Convert double-quoted string with \\xNN escapes where needed to bytes.
835
-
836
- Args:
837
- s (str): input (expects a double-quoted string; parses \\xNN, \n, \\ etc)
838
-
839
- Returns:
840
- bytes: data
841
-
842
- """
843
- if len(s) >= 2 and s[0] == s[-1] == '"': # noqa: PLR2004
844
- s = s[1:-1]
845
- # decode backslash escapes to code points, then map 0..255 -> bytes
846
- return codecs.decode(s, 'unicode_escape').encode('latin1')
847
-
848
-
849
- def DetectInputType(data_str: str, /) -> CryptoInputType | None:
850
- """Auto-detect `data_str` type, if possible.
851
-
852
- Args:
853
- data_str (str): data to process, putatively a bytes blob
854
-
855
- Returns:
856
- CryptoInputType | None: type if has a known prefix, None otherwise
857
-
858
- """
859
- data_str = data_str.strip()
860
- if data_str == CryptoInputType.STDIN:
861
- return CryptoInputType.STDIN
862
- for t in (
863
- CryptoInputType.PATH,
864
- CryptoInputType.STR,
865
- CryptoInputType.HEX,
866
- CryptoInputType.BASE64,
867
- CryptoInputType.RAW,
868
- ):
869
- if data_str.startswith(t):
870
- return t
871
- return None
872
-
873
-
874
- def BytesFromInput(data_str: str, /, *, expect: CryptoInputType | None = None) -> bytes: # noqa: C901, PLR0911, PLR0912
875
- """Parse input `data_str` into `bytes`. May auto-detect or enforce a type of input.
876
-
877
- Can load from disk ('@'). Can load from stdin ('@-').
878
-
879
- Args:
880
- data_str (str): data to process, putatively a bytes blob
881
- expect (CryptoInputType | None, optional): If not given (None) will try to auto-detect the
882
- input type by looking at the prefix on `data_str` and if none is found will suppose
883
- a 'str:' was given; if one of the supported CryptoInputType is given then will enforce
884
- that specific type prefix or no prefix
885
-
886
- Returns:
887
- bytes: data
888
-
889
- Raises:
890
- InputError: unexpected type or conversion error
891
-
892
- """
893
- data_str = data_str.strip()
894
- # auto-detect
895
- detected_type: CryptoInputType | None = DetectInputType(data_str)
896
- expect = CryptoInputType.STR if expect is None and detected_type is None else expect
897
- if detected_type is not None and expect is not None and detected_type != expect:
898
- raise InputError(f'Expected type {expect=} is different from detected type {detected_type=}')
899
- # now we know they don't conflict, so unify them; remove prefix if we have it
900
- expect = detected_type if expect is None else expect
901
- assert expect is not None, 'should never happen: type should be known here' # noqa: S101
902
- data_str = data_str.removeprefix(expect)
903
- # for every type something different will happen now
904
- try:
905
- match expect:
906
- case CryptoInputType.STDIN:
907
- # read raw bytes from stdin: prefer the binary buffer; if unavailable,
908
- # fall back to text stream encoded as UTF-8 (consistent with str: policy).
909
- stream = getattr(sys.stdin, 'buffer', None)
910
- if stream is None:
911
- text: str = sys.stdin.read()
912
- if not isinstance(text, str): # pyright: ignore[reportUnnecessaryIsInstance]
913
- raise InputError('sys.stdin.read() produced non-text data') # noqa: TRY301
914
- return text.encode('utf-8')
915
- data: bytes = stream.read()
916
- if not isinstance(data, bytes): # pyright: ignore[reportUnnecessaryIsInstance]
917
- raise InputError('sys.stdin.buffer.read() produced non-binary data') # noqa: TRY301
918
- return data
919
- case CryptoInputType.PATH:
920
- if not pathlib.Path(data_str).exists():
921
- raise InputError(f'cannot find file {data_str!r}') # noqa: TRY301
922
- return pathlib.Path(data_str).read_bytes()
923
- case CryptoInputType.STR:
924
- return data_str.encode('utf-8')
925
- case CryptoInputType.HEX:
926
- return HexToBytes(data_str)
927
- case CryptoInputType.BASE64:
928
- return EncodedToBytes(data_str)
929
- case CryptoInputType.RAW:
930
- return RawToBytes(data_str)
931
- case _:
932
- raise InputError(f'invalid type {expect!r}') # noqa: TRY301
933
- except Exception as err:
934
- raise InputError(f'invalid input: {err}') from err
935
-
936
-
937
- @dataclasses.dataclass(kw_only=True, slots=True, frozen=True, repr=False)
938
- class CryptoKey(abstract.ABC):
939
- """A cryptographic key."""
940
-
941
- @abstract.abstractmethod
942
- def __post_init__(self) -> None:
943
- """Check data."""
944
- # every sub-class of CryptoKey has to implement its own version of __post_init__()
945
-
946
- @abstract.abstractmethod
947
- def __str__(self) -> str:
948
- """Safe (no secrets) string representation of the key.
949
-
950
- Returns:
951
- string representation of the key without leaking secrets
952
-
953
- """
954
- # every sub-class of CryptoKey has to implement its own version of __str__()
955
-
956
- @final
957
- def __repr__(self) -> str:
958
- """Safe (no secrets) string representation of the key. Same as __str__().
959
-
960
- Returns:
961
- string representation of the key without leaking secrets
962
-
963
- """
964
- # concrete __repr__() delegates to the (abstract) __str__():
965
- # this avoids marking __repr__() abstract while still unifying behavior
966
- return self.__str__()
967
-
968
- @final
969
- def _DebugDump(self) -> str:
970
- """Debug dump of the key object. NOT for logging, NOT for regular use, EXPOSES secrets.
971
-
972
- We disable default __repr__() for the CryptoKey classes for security reasons, so we won't
973
- leak private key values into logs, but this method allows for explicit access to the
974
- class fields for debugging purposes by mimicking the usual dataclass __repr__().
975
-
976
- Returns:
977
- string with all the object's fields explicit values
978
-
979
- """
980
- cls: str = type(self).__name__
981
- parts: list[str] = []
982
- for field in dataclasses.fields(self):
983
- val: Any = getattr(self, field.name) # getattr is fine with frozen/slots
984
- parts.append(f'{field.name}={val!r}')
985
- return f'{cls}({", ".join(parts)})'
986
-
987
- @final
988
- @property
989
- def _json_dict(self) -> JSONDict:
990
- """Dictionary representation of the object suitable for JSON conversion.
991
-
992
- Returns:
993
- JSONDict: representation of the object suitable for JSON conversion
994
-
995
- Raises:
996
- ImplementationError: object has types that are not supported in JSON
997
-
998
- """
999
- self_dict: CryptDict = dataclasses.asdict(self)
1000
- for field in dataclasses.fields(self):
1001
- # check the type is OK
1002
- if field.type not in _JSON_DATACLASS_TYPES:
1003
- raise ImplementationError(
1004
- f'Unsupported JSON field {field.name!r}/{field.type} not in {_JSON_DATACLASS_TYPES}'
1005
- )
1006
- # convert types that we accept but JSON does not
1007
- if field.type == 'bytes':
1008
- self_dict[field.name] = BytesToEncoded(cast('bytes', self_dict[field.name]))
1009
- return cast('JSONDict', self_dict)
1010
-
1011
- @final
1012
- @property
1013
- def json(self) -> str:
1014
- """JSON representation of the object, tightly packed, not for humans.
1015
-
1016
- Returns:
1017
- str: JSON representation of the object, tightly packed
1018
-
1019
- """
1020
- return json.dumps(self._json_dict, separators=(',', ':'))
1021
-
1022
- @final
1023
- @property
1024
- def formatted_json(self) -> str:
1025
- """JSON representation of the object formatted for humans.
1026
-
1027
- Returns:
1028
- str: JSON representation of the object formatted for humans
1029
-
1030
- """
1031
- return json.dumps(self._json_dict, indent=4, sort_keys=True)
1032
-
1033
- @final
1034
- @classmethod
1035
- def _FromJSONDict(cls, json_dict: JSONDict, /) -> Self:
1036
- """Create object from JSON representation.
1037
-
1038
- Args:
1039
- json_dict (JSONDict): JSON dict
1040
-
1041
- Returns:
1042
- a CryptoKey object ready for use
1043
-
1044
- Raises:
1045
- InputError: unexpected type/fields
1046
- ImplementationError: unsupported JSON field
1047
-
1048
- """
1049
- # check we got exactly the fields we needed
1050
- cls_fields: set[str] = {f.name for f in dataclasses.fields(cls)}
1051
- json_fields: set[str] = set(json_dict)
1052
- if cls_fields != json_fields:
1053
- raise InputError(f'JSON data decoded to unexpected fields: {cls_fields=} / {json_fields=}')
1054
- # reconstruct the types we meddled with inside self._json_dict
1055
- for field in dataclasses.fields(cls):
1056
- if field.type not in _JSON_DATACLASS_TYPES:
1057
- raise ImplementationError(
1058
- f'Unsupported JSON field {field.name!r}/{field.type} not in {_JSON_DATACLASS_TYPES}'
1059
- )
1060
- if field.type == 'bytes':
1061
- json_dict[field.name] = EncodedToBytes(json_dict[field.name]) # type: ignore[assignment, arg-type]
1062
- # build the object
1063
- return cls(**json_dict)
1064
-
1065
- @final
1066
- @classmethod
1067
- def FromJSON(cls, json_data: str, /) -> Self:
1068
- """Create object from JSON representation.
1069
-
1070
- Args:
1071
- json_data (str): JSON string
1072
-
1073
- Returns:
1074
- a CryptoKey object ready for use
1075
-
1076
- Raises:
1077
- InputError: unexpected type/fields
1078
-
1079
- """
1080
- # get the dict back
1081
- json_dict: JSONDict = json.loads(json_data)
1082
- if not isinstance(json_dict, dict): # pyright: ignore[reportUnnecessaryIsInstance]
1083
- raise InputError(f'JSON data decoded to unexpected type: {type(json_dict)}')
1084
- return cls._FromJSONDict(json_dict)
1085
-
1086
- @final
1087
- @property
1088
- def blob(self) -> bytes:
1089
- """Serial (bytes) representation of the object.
1090
-
1091
- Returns:
1092
- bytes, pickled, representation of the object
1093
-
1094
- """
1095
- return self.Blob()
1096
-
1097
- @final
1098
- def Blob(self, /, *, key: Encryptor | None = None, silent: bool = True) -> bytes:
1099
- """Get serial (bytes) representation of the object with more options, including encryption.
1100
-
1101
- Args:
1102
- key (Encryptor, optional): if given will key.Encrypt() data before saving
1103
- silent (bool, optional): if True (default) will not log
1104
-
1105
- Returns:
1106
- bytes, pickled, representation of the object
1107
-
1108
- """
1109
- return Serialize(self._json_dict, compress=-2, key=key, silent=silent, pickler=PickleJSON)
1110
-
1111
- @final
1112
- @property
1113
- def encoded(self) -> str:
1114
- """Base-64 representation of the object.
1115
-
1116
- Returns:
1117
- str, pickled, base64, representation of the object
1118
-
1119
- """
1120
- return self.Encoded()
1121
-
1122
- @final
1123
- def Encoded(self, /, *, key: Encryptor | None = None, silent: bool = True) -> str:
1124
- """Base-64 representation of the object with more options, including encryption.
1125
-
1126
- Args:
1127
- key (Encryptor, optional): if given will key.Encrypt() data before saving
1128
- silent (bool, optional): if True (default) will not log
1129
-
1130
- Returns:
1131
- str, pickled, base64, representation of the object
1132
-
1133
- """
1134
- return CryptoInputType.BASE64 + BytesToEncoded(self.Blob(key=key, silent=silent))
1135
-
1136
- @final
1137
- @property
1138
- def hex(self) -> str:
1139
- """Hexadecimal representation of the object.
1140
-
1141
- Returns:
1142
- str, pickled, hexadecimal, representation of the object
1143
-
1144
- """
1145
- return self.Hex()
1146
-
1147
- @final
1148
- def Hex(self, /, *, key: Encryptor | None = None, silent: bool = True) -> str:
1149
- """Hexadecimal representation of the object with more options, including encryption.
1150
-
1151
- Args:
1152
- key (Encryptor, optional): if given will key.Encrypt() data before saving
1153
- silent (bool, optional): if True (default) will not log
1154
-
1155
- Returns:
1156
- str, pickled, hexadecimal, representation of the object
1157
-
1158
- """
1159
- return CryptoInputType.HEX + BytesToHex(self.Blob(key=key, silent=silent))
1160
-
1161
- @final
1162
- @property
1163
- def raw(self) -> str:
1164
- """Raw escaped binary representation of the object.
1165
-
1166
- Returns:
1167
- str, pickled, raw escaped binary, representation of the object
1168
-
1169
- """
1170
- return self.Raw()
1171
-
1172
- @final
1173
- def Raw(self, /, *, key: Encryptor | None = None, silent: bool = True) -> str:
1174
- """Raw escaped binary representation of the object with more options, including encryption.
1175
-
1176
- Args:
1177
- key (Encryptor, optional): if given will key.Encrypt() data before saving
1178
- silent (bool, optional): if True (default) will not log
1179
-
1180
- Returns:
1181
- str, pickled, raw escaped binary, representation of the object
1182
-
1183
- """
1184
- return CryptoInputType.RAW + BytesToRaw(self.Blob(key=key, silent=silent))
1185
-
1186
- @final
1187
- @classmethod
1188
- def Load(cls, data: str | bytes, /, *, key: Decryptor | None = None, silent: bool = True) -> Self:
1189
- """Load (create) object from serialized bytes or string.
1190
-
1191
- Args:
1192
- data (str | bytes): if bytes is assumed from CryptoKey.blob/Blob(), and
1193
- if string is assumed from CryptoKey.encoded/Encoded()
1194
- key (Decryptor, optional): if given will key.Encrypt() data before saving
1195
- silent (bool, optional): if True (default) will not log
1196
-
1197
- Returns:
1198
- a CryptoKey object ready for use
1199
-
1200
- Raises:
1201
- InputError: decode error
1202
-
1203
- """
1204
- # if this is a string, then we suppose it is base64
1205
- if isinstance(data, str):
1206
- data = BytesFromInput(data)
1207
- # we now have bytes and we suppose it came from CryptoKey.blob()/CryptoKey.CryptoBlob()
1208
- try:
1209
- json_dict: JSONDict = DeSerialize(data=data, key=key, silent=silent, unpickler=UnpickleJSON)
1210
- return cls._FromJSONDict(json_dict)
1211
- except Exception as err:
1212
- raise InputError(f'input decode error: {err}') from err
1213
-
1214
-
1215
- @runtime_checkable
1216
- class Encryptor(Protocol):
1217
- """Abstract interface for a class that has encryption.
1218
-
1219
- Contract:
1220
- - If algorithm accepts a `nonce` or `tag` these have to be handled internally by the
1221
- implementation and appended to the `ciphertext`/`signature`.
1222
- - If AEAD is supported, `associated_data` (AAD) must be authenticated. If not supported
1223
- then `associated_data` different from None must raise InputError.
1224
-
1225
- Notes:
1226
- The interface is deliberately minimal: byte-in / byte-out.
1227
- Metadata like nonce/tag may be:
1228
- - returned alongside `ciphertext`/`signature`, or
1229
- - bundled/serialized into `ciphertext`/`signature` by the implementation.
1230
-
1231
- """
1232
-
1233
- @abstract.abstractmethod
1234
- def Encrypt(self, plaintext: bytes, /, *, associated_data: bytes | None = None) -> bytes:
1235
- """Encrypt `plaintext` and return `ciphertext`.
1236
-
1237
- Args:
1238
- plaintext (bytes): Data to encrypt.
1239
- associated_data (bytes, optional): Optional AAD for AEAD modes; must be
1240
- provided again on decrypt
1241
-
1242
- Returns:
1243
- bytes: Ciphertext; if a nonce/tag is needed for decryption, the implementation
1244
- must encode it within the returned bytes (or document how to retrieve it)
1245
-
1246
- Raises:
1247
- InputError: invalid inputs
1248
- CryptoError: internal crypto failures
1249
-
1250
- """
1251
-
1252
-
1253
- @runtime_checkable
1254
- class Decryptor(Protocol):
1255
- """Abstract interface for a class that has decryption (see contract/notes in Encryptor)."""
1256
-
1257
- @abstract.abstractmethod
1258
- def Decrypt(self, ciphertext: bytes, /, *, associated_data: bytes | None = None) -> bytes:
1259
- """Decrypt `ciphertext` and return the original `plaintext`.
1260
-
1261
- Args:
1262
- ciphertext (bytes): Data to decrypt (including any embedded nonce/tag if applicable)
1263
- associated_data (bytes, optional): Optional AAD (must match what was used during encrypt)
1264
-
1265
- Returns:
1266
- bytes: Decrypted plaintext bytes
1267
-
1268
- Raises:
1269
- InputError: invalid inputs
1270
- CryptoError: internal crypto failures, authentication failure, key mismatch, etc
1271
-
1272
- """
1273
-
1274
-
1275
- @runtime_checkable
1276
- class Verifier(Protocol):
1277
- """Abstract interface for asymmetric signature verify. (see contract/notes in Encryptor)."""
1278
-
1279
- @abstract.abstractmethod
1280
- def Verify(
1281
- self, message: bytes, signature: bytes, /, *, associated_data: bytes | None = None
1282
- ) -> bool:
1283
- """Verify a `signature` for `message`. True if OK; False if failed verification.
1284
-
1285
- Args:
1286
- message (bytes): Data that was signed (including any embedded nonce/tag if applicable)
1287
- signature (bytes): Signature data to verify (including any embedded nonce/tag if applicable)
1288
- associated_data (bytes, optional): Optional AAD (must match what was used during signing)
1289
-
1290
- Returns:
1291
- True if signature is valid, False otherwise
1292
-
1293
- Raises:
1294
- InputError: invalid inputs
1295
- CryptoError: internal crypto failures, authentication failure, key mismatch, etc
1296
-
1297
- """
1298
-
1299
-
1300
- @runtime_checkable
1301
- class Signer(Protocol):
1302
- """Abstract interface for asymmetric signing. (see contract/notes in Encryptor)."""
1303
-
1304
- @abstract.abstractmethod
1305
- def Sign(self, message: bytes, /, *, associated_data: bytes | None = None) -> bytes:
1306
- """Sign `message` and return the `signature`.
1307
-
1308
- Args:
1309
- message (bytes): Data to sign.
1310
- associated_data (bytes, optional): Optional AAD for AEAD modes; must be
1311
- provided again on decrypt
1312
-
1313
- Returns:
1314
- bytes: Signature; if a nonce/tag is needed for decryption, the implementation
1315
- must encode it within the returned bytes (or document how to retrieve it)
1316
-
1317
- Raises:
1318
- InputError: invalid inputs
1319
- CryptoError: internal crypto failures
1320
-
1321
- """
1322
-
1323
-
1324
- def Serialize[T](
1325
- python_obj: T,
1326
- /,
1327
- *,
1328
- file_path: str | None = None,
1329
- compress: int | None = 3,
1330
- key: Encryptor | None = None,
1331
- silent: bool = False,
1332
- pickler: abc.Callable[[T], bytes] = PickleGeneric,
1333
- ) -> bytes:
1334
- """Serialize a Python object into a BLOB, optionally compress / encrypt / save to disk.
1335
-
1336
- Data path is:
1337
-
1338
- `obj` => [pickler] => (compress) => (encrypt) => (save to `file_path`) => return
1339
-
1340
- At every step of the data path the data will be measured, in bytes.
1341
- Every data conversion will be timed. The measurements/times will be logged (once).
1342
-
1343
- Compression levels / speed can be controlled by `compress`. Use this as reference:
1344
-
1345
- | Level | Speed | Compression ratio | Typical use case |
1346
- | -------- | ------------| ------------------------| --------------------------------------- |
1347
- | -5 to -1 | Fastest | Poor (better than none) | Real-time / very latency-sensitive |
1348
- | 0…3 | Very fast | Good ratio | Default CLI choice, safe baseline |
1349
- | 4…6 | Moderate | Better ratio | Good compromise for general persistence |
1350
- | 7…10 | Slower | Marginally better ratio | Only if storage space is precious |
1351
- | 11…15 | Much slower | Slight gains | Large archives, not for runtime use |
1352
- | 16…22 | Very slow | Tiny gains | Archival-only, multi-GB datasets |
1353
-
1354
- Args:
1355
- python_obj (Any): serializable Python object
1356
- file_path (str, optional): full path to optionally save the data to
1357
- compress (int | None, optional): Compress level before encrypting/saving; -22 ≤ compress ≤ 22;
1358
- None is no compression; default is 3, which is fast, see table above for other values
1359
- key (Encryptor, optional): if given will key.Encrypt() data before saving
1360
- silent (bool, optional): if True will not log; default is False (will log)
1361
- pickler (Callable[[Any], bytes], optional): if not given, will just be the `pickle` module;
1362
- if given will be a method to convert any Python object to its `bytes` representation;
1363
- PickleGeneric is the default, but another useful value is PickleJSON
1364
-
1365
- Returns:
1366
- bytes: serialized binary data corresponding to obj + (compression) + (encryption)
1367
-
1368
- """
1369
- messages: list[str] = []
1370
- with Timer('Serialization complete', emit_log=False) as tm_all:
1371
- # pickle
1372
- with Timer('PICKLE', emit_log=False) as tm_pickle:
1373
- obj: bytes = pickler(python_obj)
1374
- if not silent:
1375
- messages.append(f' {tm_pickle}, {HumanizedBytes(len(obj))}')
1376
- # compress, if needed
1377
- if compress is not None:
1378
- compress = max(compress, -22)
1379
- compress = min(compress, 22)
1380
- with Timer(f'COMPRESS@{compress}', emit_log=False) as tm_compress:
1381
- obj = zstandard.ZstdCompressor(level=compress).compress(obj)
1382
- if not silent:
1383
- messages.append(f' {tm_compress}, {HumanizedBytes(len(obj))}')
1384
- # encrypt, if needed
1385
- if key is not None:
1386
- with Timer('ENCRYPT', emit_log=False) as tm_crypto:
1387
- obj = key.Encrypt(obj, associated_data=_PICKLE_AAD)
1388
- if not silent:
1389
- messages.append(f' {tm_crypto}, {HumanizedBytes(len(obj))}')
1390
- # optionally save to disk
1391
- if file_path is not None:
1392
- with Timer('SAVE', emit_log=False) as tm_save:
1393
- pathlib.Path(file_path).write_bytes(obj)
1394
- if not silent:
1395
- messages.append(f' {tm_save}, to {file_path!r}')
1396
- # log and return
1397
- if not silent:
1398
- logging.info(f'{tm_all}; parts:\n{"\n".join(messages)}')
1399
- return obj
1400
-
1401
-
1402
- def DeSerialize[T]( # noqa: C901
1403
- *,
1404
- data: bytes | None = None,
1405
- file_path: str | None = None,
1406
- key: Decryptor | None = None,
1407
- silent: bool = False,
1408
- unpickler: abc.Callable[[bytes], T] = UnpickleGeneric,
1409
- ) -> T:
1410
- """Load (de-serializes) a BLOB back to a Python object, optionally decrypting / decompressing.
1411
-
1412
- Data path is:
1413
-
1414
- `data` or `file_path` => (decrypt) => (decompress) => [unpickler] => return object
1415
-
1416
- At every step of the data path the data will be measured, in bytes.
1417
- Every data conversion will be timed. The measurements/times will be logged (once).
1418
- Compression versus no compression will be automatically detected.
1419
-
1420
- Args:
1421
- data (bytes | None, optional): if given, use this as binary data string (input);
1422
- if you use this option, `file_path` will be ignored
1423
- file_path (str | None, optional): if given, use this as file path to load binary data
1424
- string (input); if you use this option, `data` will be ignored. Defaults to None.
1425
- key (Decryptor | None, optional): if given will key.Decrypt() data before decompressing/loading.
1426
- Defaults to None.
1427
- silent (bool, optional): if True will not log; default is False (will log). Defaults to False.
1428
- unpickler (Callable[[bytes], Any], optional): if not given, will just be the `pickle` module;
1429
- if given will be a method to convert a `bytes` representation back to a Python object;
1430
- UnpickleGeneric is the default, but another useful value is UnpickleJSON.
1431
- Defaults to UnpickleGeneric.
1432
-
1433
- Returns:
1434
- De-Serialized Python object corresponding to data
1435
-
1436
- Raises:
1437
- InputError: invalid inputs
1438
- CryptoError: internal crypto failures, authentication failure, key mismatch, etc
1439
-
1440
- """ # noqa: DOC502
1441
- # test inputs
1442
- if (data is None and file_path is None) or (data is not None and file_path is not None):
1443
- raise InputError('you must provide only one of either `data` or `file_path`')
1444
- if file_path and not pathlib.Path(file_path).exists():
1445
- raise InputError(f'invalid file_path: {file_path!r}')
1446
- if data and len(data) < 4: # noqa: PLR2004
1447
- raise InputError('invalid data: too small')
1448
- # start the pipeline
1449
- obj: bytes = data or b''
1450
- messages: list[str] = [f'DATA: {HumanizedBytes(len(obj))}'] if data and not silent else []
1451
- with Timer('De-Serialization complete', emit_log=False) as tm_all:
1452
- # optionally load from disk
1453
- if file_path:
1454
- assert not obj, 'should never happen: if we have a file obj should be empty' # noqa: S101
1455
- with Timer('LOAD', emit_log=False) as tm_load:
1456
- obj = pathlib.Path(file_path).read_bytes()
1457
- if not silent:
1458
- messages.append(f' {tm_load}, {HumanizedBytes(len(obj))}, from {file_path!r}')
1459
- # decrypt, if needed
1460
- if key is not None:
1461
- with Timer('DECRYPT', emit_log=False) as tm_crypto:
1462
- obj = key.Decrypt(obj, associated_data=_PICKLE_AAD)
1463
- if not silent:
1464
- messages.append(f' {tm_crypto}, {HumanizedBytes(len(obj))}')
1465
- # decompress: we try to detect compression to determine if we must call zstandard
1466
- if (
1467
- len(obj) >= 4 # noqa: PLR2004
1468
- and (
1469
- ((magic := int.from_bytes(obj[:4], 'little')) == _ZSTD_MAGIC_FRAME)
1470
- or (_ZSTD_MAGIC_SKIPPABLE_MIN <= magic <= _ZSTD_MAGIC_SKIPPABLE_MAX)
1471
- )
1472
- ):
1473
- with Timer('DECOMPRESS', emit_log=False) as tm_decompress:
1474
- obj = zstandard.ZstdDecompressor().decompress(obj)
1475
- if not silent:
1476
- messages.append(f' {tm_decompress}, {HumanizedBytes(len(obj))}')
1477
- elif not silent:
1478
- messages.append(' (no compression detected)')
1479
- # create the actual object = unpickle
1480
- with Timer('UNPICKLE', emit_log=False) as tm_unpickle:
1481
- python_obj: T = unpickler(obj)
1482
- if not silent:
1483
- messages.append(f' {tm_unpickle}')
1484
- # log and return
1485
- if not silent:
1486
- logging.info(f'{tm_all}; parts:\n{"\n".join(messages)}')
1487
- return python_obj
1488
-
1489
-
1490
- @dataclasses.dataclass(kw_only=True, slots=True, frozen=True, repr=False)
1491
- class PublicBid512(CryptoKey):
1492
- """Public commitment to a (cryptographically secure) bid that can be revealed/validated later.
1493
-
1494
- Bid is computed as: public_hash = Hash512(public_key || private_key || secret_bid)
1495
-
1496
- Everything is bytes. The public part is (public_key, public_hash) and the private
1497
- part is (private_key, secret_bid). The whole computation can be checked later.
1498
-
1499
- No measures are taken here to prevent timing attacks (probably not a concern).
1500
-
1501
- Attributes:
1502
- public_key (bytes): 512-bits random value
1503
- public_hash (bytes): SHA-512 hash of (public_key || private_key || secret_bid)
1504
-
1505
- """
1506
-
1507
- public_key: bytes
1508
- public_hash: bytes
1509
-
1510
- def __post_init__(self) -> None:
1511
- """Check data.
1512
-
1513
- Raises:
1514
- InputError: invalid inputs
1515
-
1516
- """
1517
- if len(self.public_key) != 64 or len(self.public_hash) != 64: # noqa: PLR2004
1518
- raise InputError(f'invalid public_key or public_hash: {self}')
1519
-
1520
- def __str__(self) -> str:
1521
- """Safe string representation of the PublicBid.
1522
-
1523
- Returns:
1524
- string representation of PublicBid
1525
-
1526
- """
1527
- return (
1528
- 'PublicBid512('
1529
- f'public_key={BytesToEncoded(self.public_key)}, '
1530
- f'public_hash={BytesToHex(self.public_hash)})'
1531
- )
1532
-
1533
- def VerifyBid(self, private_key: bytes, secret: bytes, /) -> bool:
1534
- """Verify a bid. True if OK; False if failed verification.
1535
-
1536
- Args:
1537
- private_key (bytes): 512-bits private key
1538
- secret (bytes): Any number of bytes (≥1) to bid on (e.g., UTF-8 encoded string)
1539
-
1540
- Returns:
1541
- True if bid is valid, False otherwise
1542
-
1543
- """
1544
- try:
1545
- # creating the PrivateBid object will validate everything; InputError we allow to propagate
1546
- PrivateBid512(
1547
- public_key=self.public_key,
1548
- public_hash=self.public_hash,
1549
- private_key=private_key,
1550
- secret_bid=secret,
1551
- )
1552
- return True # if we got here, all is good
1553
- except CryptoError:
1554
- return False # bid does not match the public commitment
1555
-
1556
- @classmethod
1557
- def Copy(cls, other: PublicBid512, /) -> Self:
1558
- """Initialize a public bid by taking the public parts of a public/private bid.
1559
-
1560
- Args:
1561
- other (PublicBid512): the bid to copy from
1562
-
1563
- Returns:
1564
- Self: an initialized PublicBid512
1565
-
1566
- """
1567
- return cls(public_key=other.public_key, public_hash=other.public_hash)
1568
-
1569
-
1570
- @dataclasses.dataclass(kw_only=True, slots=True, frozen=True, repr=False)
1571
- class PrivateBid512(PublicBid512):
1572
- """Private bid that can be revealed and validated against a public commitment (see PublicBid).
1573
-
1574
- Attributes:
1575
- private_key (bytes): 512-bits random value
1576
- secret_bid (bytes): Any number of bytes (≥1) to bid on (e.g., UTF-8 encoded string)
1577
-
1578
- """
1579
-
1580
- private_key: bytes
1581
- secret_bid: bytes
1582
-
1583
- def __post_init__(self) -> None:
1584
- """Check data.
1585
-
1586
- Raises:
1587
- InputError: invalid inputs
1588
- CryptoError: bid does not match the public commitment
1589
-
1590
- """
1591
- super(PrivateBid512, self).__post_init__()
1592
- if len(self.private_key) != 64 or len(self.secret_bid) < 1: # noqa: PLR2004
1593
- raise InputError(f'invalid private_key or secret_bid: {self}')
1594
- if self.public_hash != Hash512(self.public_key + self.private_key + self.secret_bid):
1595
- raise CryptoError(f'inconsistent bid: {self}')
1596
-
1597
- def __str__(self) -> str:
1598
- """Safe (no secrets) string representation of the PrivateBid.
1599
-
1600
- Returns:
1601
- string representation of PrivateBid without leaking secrets
1602
-
1603
- """
1604
- return (
1605
- 'PrivateBid512('
1606
- f'{super(PrivateBid512, self).__str__()}, '
1607
- f'private_key={ObfuscateSecret(self.private_key)}, '
1608
- f'secret_bid={ObfuscateSecret(self.secret_bid)})'
1609
- )
1610
-
1611
- @classmethod
1612
- def New(cls, secret: bytes, /) -> Self:
1613
- """Make the `secret` into a new bid.
1614
-
1615
- Args:
1616
- secret (bytes): Any number of bytes (≥1) to bid on (e.g., UTF-8 encoded string)
1617
-
1618
- Returns:
1619
- PrivateBid object ready for use (use PublicBid.Copy() to get the public part)
1620
-
1621
- Raises:
1622
- InputError: invalid inputs
1623
-
1624
- """
1625
- # test inputs
1626
- if len(secret) < 1:
1627
- raise InputError(f'invalid secret length: {len(secret)}')
1628
- # generate random values
1629
- public_key: bytes = RandBytes(64) # 512 bits
1630
- private_key: bytes = RandBytes(64) # 512 bits
1631
- # build object
1632
- return cls(
1633
- public_key=public_key,
1634
- public_hash=Hash512(public_key + private_key + secret),
1635
- private_key=private_key,
1636
- secret_bid=secret,
1637
- )