transcrypto 1.0.2__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
transcrypto/base.py ADDED
@@ -0,0 +1,1018 @@
1
+ #!/usr/bin/env python3
2
+ #
3
+ # Copyright 2025 Daniel Balparda (balparda@github.com) - Apache-2.0 license
4
+ #
5
+ """Balparda's TransCrypto base library."""
6
+
7
+ from __future__ import annotations
8
+
9
+ import abc
10
+ import base64
11
+ import dataclasses
12
+ # import datetime
13
+ import functools
14
+ import hashlib
15
+ import logging
16
+ import math
17
+ import os.path
18
+ import pickle
19
+ # import pdb
20
+ import secrets
21
+ import time
22
+ from typing import Any, Callable, final, MutableSequence, Self, TypeVar
23
+
24
+ import zstandard
25
+
26
+ __author__ = 'balparda@github.com'
27
+ __version__ = '1.1.1' # v1.1.1, 2025-08-29
28
+ __version_tuple__: tuple[int, ...] = tuple(int(v) for v in __version__.split('.'))
29
+
30
+ # MIN_TM = int( # minimum allowed timestamp
31
+ # datetime.datetime(2000, 1, 1, 0, 0, 0).replace(tzinfo=datetime.timezone.utc).timestamp())
32
+
33
+ BytesToHex: Callable[[bytes], str] = lambda b: b.hex()
34
+ BytesToInt: Callable[[bytes], int] = lambda b: int.from_bytes(b, 'big', signed=False)
35
+ BytesToEncoded: Callable[[bytes], str] = lambda b: base64.urlsafe_b64encode(b).decode('ascii')
36
+
37
+ HexToBytes: Callable[[str], bytes] = bytes.fromhex
38
+ IntToBytes: Callable[[int], bytes] = lambda i: i.to_bytes(
39
+ (i.bit_length() + 7) // 8, 'big', signed=False)
40
+ IntToEncoded: Callable[[int], str] = lambda i: BytesToEncoded(IntToBytes(i))
41
+ EncodedToBytes: Callable[[str], bytes] = lambda e: base64.urlsafe_b64decode(e.encode('ascii'))
42
+
43
+ PadBytesTo: Callable[[bytes, int], bytes] = lambda b, i: b.rjust((i + 7) // 8, b'\x00')
44
+
45
+
46
+ # these control the pickling of data, do NOT ever change, or you will break all databases
47
+ # <https://docs.python.org/3/library/pickle.html#pickle.DEFAULT_PROTOCOL>
48
+ _PICKLE_PROTOCOL = 4 # protocol 4 available since python v3.8 # do NOT ever change!
49
+ _PICKLE_AAD = b'transcrypto.base.Serialize' # do NOT ever change!
50
+ # these help find compressed files, do NOT change unless zstandard changes
51
+ _ZSTD_MAGIC_FRAME = 0xFD2FB528
52
+ _ZSTD_MAGIC_SKIPPABLE_MIN = 0x184D2A50
53
+ _ZSTD_MAGIC_SKIPPABLE_MAX = 0x184D2A5F
54
+
55
+
56
+ class Error(Exception):
57
+ """TransCrypto exception."""
58
+
59
+
60
+ class InputError(Error):
61
+ """Input exception (TransCrypto)."""
62
+
63
+
64
+ class CryptoError(Error):
65
+ """Cryptographic exception (TransCrypto)."""
66
+
67
+
68
+ def HumanizedBytes(inp_sz: int, /) -> str: # pylint: disable=too-many-return-statements
69
+ """Convert a byte count into a human-readable string using binary prefixes (powers of 1024).
70
+
71
+ Scales the input size by powers of 1024, returning a value with the
72
+ appropriate IEC binary unit suffix: `B`, `KiB`, `MiB`, `GiB`, `TiB`, `PiB`, `EiB`.
73
+
74
+ Args:
75
+ inp_sz (int): Size in bytes. Must be a non-negative integer.
76
+
77
+ Returns:
78
+ str: Formatted size string with up to two decimal places for units above bytes.
79
+
80
+ Raises:
81
+ InputError: If `inp_sz` is negative.
82
+
83
+ Notes:
84
+ - Units follow the IEC binary standard where:
85
+ 1 KiB = 1024 bytes
86
+ 1 MiB = 1024 KiB
87
+ 1 GiB = 1024 MiB
88
+ 1 TiB = 1024 GiB
89
+ 1 PiB = 1024 TiB
90
+ 1 EiB = 1024 PiB
91
+ - Values under 1024 bytes are returned as an integer with a space and `B`.
92
+
93
+ Examples:
94
+ >>> HumanizedBytes(512)
95
+ '512 B'
96
+ >>> HumanizedBytes(2048)
97
+ '2.00 KiB'
98
+ >>> HumanizedBytes(5 * 1024**3)
99
+ '5.00 GiB'
100
+ """
101
+ if inp_sz < 0:
102
+ raise InputError(f'input should be >=0 and got {inp_sz}')
103
+ if inp_sz < 1024:
104
+ return f'{inp_sz} B'
105
+ if inp_sz < 1024 * 1024:
106
+ return f'{(inp_sz / 1024):0.2f} KiB'
107
+ if inp_sz < 1024 * 1024 * 1024:
108
+ return f'{(inp_sz / (1024 * 1024)):0.2f} MiB'
109
+ if inp_sz < 1024 * 1024 * 1024 * 1024:
110
+ return f'{(inp_sz / (1024 * 1024 * 1024)):0.2f} GiB'
111
+ if inp_sz < 1024 * 1024 * 1024 * 1024 * 1024:
112
+ return f'{(inp_sz / (1024 * 1024 * 1024 * 1024)):0.2f} TiB'
113
+ if inp_sz < 1024 * 1024 * 1024 * 1024 * 1024 * 1024:
114
+ return f'{(inp_sz / (1024 * 1024 * 1024 * 1024 * 1024)):0.2f} PiB'
115
+ return f'{(inp_sz / (1024 * 1024 * 1024 * 1024 * 1024 * 1024)):0.2f} EiB'
116
+
117
+
118
+ def HumanizedDecimal(inp_sz: int | float, unit: str = '', /) -> str: # pylint: disable=too-many-return-statements
119
+ """Convert a numeric value into a human-readable string using metric prefixes (powers of 1000).
120
+
121
+ Scales the input value by powers of 1000, returning a value with the
122
+ appropriate SI metric unit prefix: `k`, `M`, `G`, `T`, `P`, `E`. The caller
123
+ can optionally specify a base unit (e.g., `'Hz'`, `'m'`).
124
+
125
+ Args:
126
+ inp_sz (int | float): Quantity to convert. Must be finite and non-negative.
127
+ unit (str, optional): Base unit to append to the result (e.g., `'Hz'`).
128
+ If given, it will be separated by a space for values <1000 and appended
129
+ without a space for scaled values.
130
+
131
+ Returns:
132
+ str: Formatted string with up to two decimal places for scaled values
133
+ and up to four decimal places for small floats.
134
+
135
+ Raises:
136
+ InputError: If `inp_sz` is negative or not finite.
137
+
138
+ Notes:
139
+ - Uses decimal multiples: 1 k = 1000 units.
140
+ - Values <1000 are returned as-is (integer) or with four decimal places (float).
141
+ - Unit string is stripped of surrounding whitespace before use.
142
+
143
+ Examples:
144
+ >>> HumanizedDecimal(950)
145
+ '950'
146
+ >>> HumanizedDecimal(1500)
147
+ '1.50 k'
148
+ >>> HumanizedDecimal(1500, ' Hz ')
149
+ '1.50 kHz'
150
+ >>> HumanizedDecimal(0.123456, 'V')
151
+ '0.1235 V'
152
+ """
153
+ if not math.isfinite(inp_sz) or inp_sz < 0:
154
+ raise InputError(f'input should be >=0 and got {inp_sz} / {unit!r}')
155
+ unit = unit.strip()
156
+ if inp_sz < 1000:
157
+ return (f'{inp_sz:0.4f}{" " + unit if unit else ""}' if isinstance(inp_sz, float) else
158
+ f'{inp_sz}{" " + unit if unit else ""}')
159
+ if inp_sz < 1000 * 1000:
160
+ return f'{(inp_sz / 1000):0.2f} k{unit}'
161
+ if inp_sz < 1000 * 1000 * 1000:
162
+ return f'{(inp_sz / (1000 * 1000)):0.2f} M{unit}'
163
+ if inp_sz < 1000 * 1000 * 1000 * 1000:
164
+ return f'{(inp_sz / (1000 * 1000 * 1000)):0.2f} G{unit}'
165
+ if inp_sz < 1000 * 1000 * 1000 * 1000 * 1000:
166
+ return f'{(inp_sz / (1000 * 1000 * 1000 * 1000)):0.2f} T{unit}'
167
+ if inp_sz < 1000 * 1000 * 1000 * 1000 * 1000 * 1000:
168
+ return f'{(inp_sz / (1000 * 1000 * 1000 * 1000 * 1000)):0.2f} P{unit}'
169
+ return f'{(inp_sz / (1000 * 1000 * 1000 * 1000 * 1000 * 1000)):0.2f} E{unit}'
170
+
171
+
172
+ def HumanizedSeconds(inp_secs: int | float, /) -> str: # pylint: disable=too-many-return-statements
173
+ """Convert a duration in seconds into a human-readable time string.
174
+
175
+ Selects the appropriate time unit based on the duration's magnitude:
176
+ - microseconds (`µs`)
177
+ - milliseconds (`ms`)
178
+ - seconds (`s`)
179
+ - minutes (`min`)
180
+ - hours (`h`)
181
+ - days (`d`)
182
+
183
+ Args:
184
+ inp_secs (int | float): Time interval in seconds. Must be finite and non-negative.
185
+
186
+ Returns:
187
+ str: Human-readable string with the duration and unit. Precision depends
188
+ on the chosen unit:
189
+ - µs / ms: 3 decimal places
190
+ - seconds ≥1: 2 decimal places
191
+ - minutes, hours, days: 2 decimal places
192
+
193
+ Raises:
194
+ InputError: If `inp_secs` is negative or not finite.
195
+
196
+ Notes:
197
+ - Uses the micro sign (`µ`, U+00B5) for microseconds.
198
+ - Thresholds:
199
+ < 0.001 s → µs
200
+ < 1 s → ms
201
+ < 60 s → seconds
202
+ < 3600 s → minutes
203
+ < 86400 s → hours
204
+ ≥ 86400 s → days
205
+
206
+ Examples:
207
+ >>> HumanizedSeconds(0)
208
+ '0.00 s'
209
+ >>> HumanizedSeconds(0.000004)
210
+ '4.000 µs'
211
+ >>> HumanizedSeconds(0.25)
212
+ '250.000 ms'
213
+ >>> HumanizedSeconds(42)
214
+ '42.00 s'
215
+ >>> HumanizedSeconds(3661)
216
+ '1.02 h'
217
+ """
218
+ if not math.isfinite(inp_secs) or inp_secs < 0:
219
+ raise InputError(f'input should be >=0 and got {inp_secs}')
220
+ if inp_secs == 0:
221
+ return '0.00 s'
222
+ inp_secs = float(inp_secs)
223
+ if inp_secs < 0.001:
224
+ return f'{inp_secs * 1000 * 1000:0.3f} µs'
225
+ if inp_secs < 1:
226
+ return f'{inp_secs * 1000:0.3f} ms'
227
+ if inp_secs < 60:
228
+ return f'{inp_secs:0.2f} s'
229
+ if inp_secs < 60 * 60:
230
+ return f'{(inp_secs / 60):0.2f} min'
231
+ if inp_secs < 24 * 60 * 60:
232
+ return f'{(inp_secs / (60 * 60)):0.2f} h'
233
+ return f'{(inp_secs / (24 * 60 * 60)):0.2f} d'
234
+
235
+
236
+ class Timer:
237
+ """An execution timing class that can be used as both a context manager and a decorator.
238
+
239
+ Examples:
240
+
241
+ # As a context manager
242
+ with Timer('Block timing'):
243
+ time.sleep(1.2)
244
+
245
+ # As a decorator
246
+ @Timer('Function timing')
247
+ def slow_function():
248
+ time.sleep(0.8)
249
+
250
+ # As a regular object
251
+ tm = Timer('Inline timing')
252
+ tm.Start()
253
+ time.sleep(0.1)
254
+ tm.Stop()
255
+ print(tm)
256
+
257
+ Attributes:
258
+ label (str): Timer label
259
+ emit_print (bool): If True will print() the timer, else will logging.info() the timer
260
+ start (float | None): Start time
261
+ end (float | None): End time
262
+ elapsed (float | None): Time delta
263
+ """
264
+
265
+ def __init__(
266
+ self, label: str = 'Elapsed time', /, *,
267
+ emit_log: bool = True, emit_print: bool = False) -> None:
268
+ """Initialize the Timer.
269
+
270
+ Args:
271
+ label (str, optional): A description or name for the timed block or function
272
+ emit_log (bool, optional): Emit a log message when finished; default is True
273
+ emit_print (bool, optional): Emit a print() message when finished; default is False
274
+
275
+ Raises:
276
+ InputError: empty label
277
+ """
278
+ self.emit_log: bool = emit_log
279
+ self.emit_print: bool = emit_print
280
+ self.label: str = label.strip()
281
+ if not self.label:
282
+ raise InputError('Empty label')
283
+ self.start: float | None = None
284
+ self.end: float | None = None
285
+ self.elapsed: float | None = None
286
+
287
+ def __str__(self) -> str:
288
+ """Current timer value."""
289
+ if self.start is None:
290
+ return f'{self.label}: <UNSTARTED>'
291
+ if self.end is None or self.elapsed is None:
292
+ return f'{self.label}: <PARTIAL> {HumanizedSeconds(time.perf_counter() - self.start)}'
293
+ return f'{self.label}: {HumanizedSeconds(self.elapsed)}'
294
+
295
+ def Start(self) -> None:
296
+ """Start the timer."""
297
+ if self.start is not None:
298
+ raise Error('Re-starting timer is forbidden')
299
+ self.start = time.perf_counter()
300
+
301
+ def __enter__(self) -> Timer:
302
+ """Start the timer when entering the context."""
303
+ self.Start()
304
+ return self
305
+
306
+ def Stop(self) -> None:
307
+ """Stop the timer and emit logging.info with timer message."""
308
+ if self.start is None:
309
+ raise Error('Stopping an unstarted timer')
310
+ if self.end is not None or self.elapsed is not None:
311
+ raise Error('Re-stopping timer is forbidden')
312
+ self.end = time.perf_counter()
313
+ self.elapsed = self.end - self.start
314
+ message: str = str(self)
315
+ if self.emit_log:
316
+ logging.info(message)
317
+ if self.emit_print:
318
+ print(message)
319
+
320
+ def __exit__(
321
+ self, unused_exc_type: type[BaseException] | None,
322
+ unused_exc_val: BaseException | None, exc_tb: Any) -> None:
323
+ """Stop the timer when exiting the context, emit logging.info and optionally print elapsed time.
324
+
325
+ Args:
326
+ exc_type (type | None): Exception type, if any.
327
+ exc_val (BaseException | None): Exception value, if any.
328
+ exc_tb (Any): Traceback object, if any.
329
+ """
330
+ self.Stop()
331
+
332
+ _F = TypeVar('_F', bound=Callable[..., Any])
333
+
334
+ def __call__(self, func: Timer._F) -> Timer._F:
335
+ """Allow the Timer to be used as a decorator.
336
+
337
+ Args:
338
+ func: The function to time.
339
+
340
+ Returns:
341
+ The wrapped function with timing behavior.
342
+ """
343
+
344
+ @functools.wraps(func)
345
+ def _Wrapper(*args: Any, **kwargs: Any) -> Any:
346
+ with self.__class__(self.label, emit_log=self.emit_log, emit_print=self.emit_print):
347
+ return func(*args, **kwargs)
348
+
349
+ return _Wrapper # type:ignore
350
+
351
+
352
+ def RandBits(n_bits: int, /) -> int:
353
+ """Crypto-random integer with guaranteed `n_bits` size (i.e., first bit == 1).
354
+
355
+ The fact that the first bit will be 1 means the entropy is ~ (n_bits-1) and
356
+ because of this we only allow for a byte or more bits generated. This drawback
357
+ is negligible for the large integers a crypto library will work with, in practice.
358
+
359
+ Args:
360
+ n_bits (int): number of bits to produce, ≥ 8
361
+
362
+ Returns:
363
+ int with n_bits size
364
+
365
+ Raises:
366
+ InputError: invalid n_bits
367
+ """
368
+ # test inputs
369
+ if n_bits < 8:
370
+ raise InputError(f'n_bits must be ≥ 8: {n_bits}')
371
+ # call underlying method
372
+ n: int = 0
373
+ while n.bit_length() != n_bits:
374
+ n = secrets.randbits(n_bits) # we could just set the bit, but IMO it is better to get another
375
+ return n
376
+
377
+
378
+ def RandInt(min_int: int, max_int: int, /) -> int:
379
+ """Crypto-random integer uniform over [min_int, max_int].
380
+
381
+ Args:
382
+ min_int (int): minimum integer, inclusive, ≥ 0
383
+ max_int (int): maximum integer, inclusive, > min_int
384
+
385
+ Returns:
386
+ int between [min_int, max_int] inclusive
387
+
388
+ Raises:
389
+ InputError: invalid min/max
390
+ """
391
+ # test inputs
392
+ if min_int < 0 or min_int >= max_int:
393
+ raise InputError(f'min_int must be ≥ 0, and < max_int: {min_int} / {max_int}')
394
+ # uniform over [min_int, max_int]
395
+ span: int = max_int - min_int + 1
396
+ n: int = min_int + secrets.randbelow(span)
397
+ assert min_int <= n <= max_int, 'should never happen: generated number out of range'
398
+ return n
399
+
400
+
401
+ def RandShuffle[T: Any](seq: MutableSequence[T], /) -> None:
402
+ """In-place Crypto-random shuffle order for `seq` mutable sequence.
403
+
404
+ Args:
405
+ seq (MutableSequence[T]): any mutable sequence with 2 or more elements
406
+
407
+ Raises:
408
+ InputError: not enough elements
409
+ """
410
+ # test inputs
411
+ if (n_seq := len(seq)) < 2:
412
+ raise InputError(f'seq must have 2 or more elements: {n_seq}')
413
+ # cryptographically sound Fisher–Yates using secrets.randbelow
414
+ for i in range(n_seq - 1, 0, -1):
415
+ j: int = secrets.randbelow(i + 1)
416
+ seq[i], seq[j] = seq[j], seq[i]
417
+
418
+
419
+ def RandBytes(n_bytes: int, /) -> bytes:
420
+ """Crypto-random `n_bytes` bytes. Just plain good quality random bytes.
421
+
422
+ Args:
423
+ n_bytes (int): number of bits to produce, > 0
424
+
425
+ Returns:
426
+ bytes: random with len()==n_bytes
427
+
428
+ Raises:
429
+ InputError: invalid n_bytes
430
+ """
431
+ # test inputs
432
+ if n_bytes < 1:
433
+ raise InputError(f'n_bytes must be ≥ 1: {n_bytes}')
434
+ # return from system call
435
+ b: bytes = secrets.token_bytes(n_bytes)
436
+ assert len(b) == n_bytes, 'should never happen: generated bytes incorrect size'
437
+ return b
438
+
439
+
440
+ def GCD(a: int, b: int, /) -> int:
441
+ """Greatest Common Divisor for `a` and `b`, integers ≥0. Uses the Euclid method.
442
+
443
+ O(log(min(a, b)))
444
+
445
+ Args:
446
+ a (int): integer a ≥ 0
447
+ b (int): integer b ≥ 0 (can't be both zero)
448
+
449
+ Returns:
450
+ gcd(a, b)
451
+
452
+ Raises:
453
+ InputError: invalid inputs
454
+ """
455
+ # test inputs
456
+ if a < 0 or b < 0 or (not a and not b):
457
+ raise InputError(f'negative input or undefined gcd(0, 0): {a=} , {b=}')
458
+ # algo needs to start with a >= b
459
+ if a < b:
460
+ a, b = b, a
461
+ # euclid
462
+ while b:
463
+ r: int = a % b
464
+ a, b = b, r
465
+ return a
466
+
467
+
468
+ def ExtendedGCD(a: int, b: int, /) -> tuple[int, int, int]:
469
+ """Greatest Common Divisor Extended for `a` and `b`, integers ≥0. Uses the Euclid method.
470
+
471
+ O(log(min(a, b)))
472
+
473
+ Args:
474
+ a (int): integer a ≥ 0
475
+ b (int): integer b ≥ 0 (can't be both zero)
476
+
477
+ Returns:
478
+ (gcd, x, y) so that a * x + b * y = gcd
479
+ x and y may be negative integers or zero but won't be both zero.
480
+
481
+ Raises:
482
+ InputError: invalid inputs
483
+ """
484
+ # test inputs
485
+ if a < 0 or b < 0 or (not a and not b):
486
+ raise InputError(f'negative input or undefined gcd(0, 0): {a=} , {b=}')
487
+ # algo needs to start with a >= b (but we remember if we did swap)
488
+ swapped = False
489
+ if a < b:
490
+ a, b = b, a
491
+ swapped = True
492
+ # trivial case
493
+ if not b:
494
+ return (a, 0 if swapped else 1, 1 if swapped else 0)
495
+ # euclid
496
+ x1, x2, y1, y2 = 0, 1, 1, 0
497
+ while b:
498
+ q, r = divmod(a, b)
499
+ x, y = x2 - q * x1, y2 - q * y1
500
+ a, b, x1, x2, y1, y2 = b, r, x, x1, y, y1
501
+ return (a, y2 if swapped else x2, x2 if swapped else y2)
502
+
503
+
504
+ def Hash256(data: bytes, /) -> bytes:
505
+ """SHA-256 hash of bytes data. Always a length of 32 bytes.
506
+
507
+ Args:
508
+ data (bytes): Data to compute hash for
509
+
510
+ Returns:
511
+ 32 bytes (256 bits) of SHA-256 hash;
512
+ if converted to hexadecimal (with BytesToHex() or hex()) will be 64 chars of string;
513
+ if converted to int (big-endian, unsigned, with BytesToInt()) will be 0 ≤ i < 2**256
514
+ """
515
+ return hashlib.sha256(data).digest()
516
+
517
+
518
+ def Hash512(data: bytes, /) -> bytes:
519
+ """SHA-512 hash of bytes data. Always a length of 64 bytes.
520
+
521
+ Args:
522
+ data (bytes): Data to compute hash for
523
+
524
+ Returns:
525
+ 64 bytes (512 bits) of SHA-512 hash;
526
+ if converted to hexadecimal (with BytesToHex() or hex()) will be 128 chars of string;
527
+ if converted to int (big-endian, unsigned, with BytesToInt()) will be 0 ≤ i < 2**512
528
+ """
529
+ return hashlib.sha512(data).digest()
530
+
531
+
532
+ def FileHash(full_path: str, /, *, digest: str = 'sha256') -> bytes:
533
+ """SHA-256 hex hash of file on disk. Always a length of 32 bytes (if default digest=='sha256').
534
+
535
+ Args:
536
+ full_path (str): Path to existing file on disk
537
+ digest (str, optional): Hash method to use, accepts 'sha256' (default) or 'sha512'
538
+
539
+ Returns:
540
+ 32 bytes (256 bits) of SHA-256 hash (if default digest=='sha256');
541
+ if converted to hexadecimal (with BytesToHex() or hex()) will be 64 chars of string;
542
+ if converted to int (big-endian, unsigned, with BytesToInt()) will be 0 ≤ i < 2**256
543
+
544
+ Raises:
545
+ InputError: file could not be found
546
+ """
547
+ # test inputs
548
+ digest = digest.lower().strip().replace('-', '') # normalize so we can accept e.g. "SHA-256"
549
+ if digest not in ('sha256', 'sha512'):
550
+ raise InputError(f'unrecognized digest: {digest!r}')
551
+ full_path = full_path.strip()
552
+ if not full_path or not os.path.exists(full_path):
553
+ raise InputError(f'file {full_path!r} not found for hashing')
554
+ # compute hash
555
+ logging.info(f'Hashing file {full_path!r}')
556
+ with open(full_path, 'rb') as file_obj:
557
+ return hashlib.file_digest(file_obj, digest).digest()
558
+
559
+
560
+ def ObfuscateSecret(data: str | bytes | int, /) -> str:
561
+ """Obfuscate a secret string/key/bytes/int by hashing SHA-512 and only showing the first 4 bytes.
562
+
563
+ Always a length of 9 chars, e.g. "aabbccdd…" (always adds '…' at the end).
564
+ Known vulnerability: If the secret is small, can be brute-forced!
565
+ Use only on large (~>64bits) secrets.
566
+
567
+ Args:
568
+ data (str | bytes | int): Data to obfuscate
569
+
570
+ Returns:
571
+ obfuscated string, e.g. "aabbccdd…"
572
+ """
573
+ if isinstance(data, str):
574
+ data = data.encode('utf-8')
575
+ elif isinstance(data, int):
576
+ data = IntToBytes(data)
577
+ if not isinstance(data, bytes):
578
+ raise InputError(f'invalid type for data: {type(data)}')
579
+ return BytesToHex(Hash512(data))[:8] + '…'
580
+
581
+
582
+ @dataclasses.dataclass(kw_only=True, slots=True, frozen=True, repr=False)
583
+ class CryptoKey(abc.ABC):
584
+ """A cryptographic key."""
585
+
586
+ def __post_init__(self) -> None:
587
+ """Check data."""
588
+
589
+ @abc.abstractmethod
590
+ def __str__(self) -> str:
591
+ """Safe (no secrets) string representation of the key.
592
+
593
+ Returns:
594
+ string representation of the key without leaking secrets
595
+ """
596
+ # every sub-class of CryptoKey has to implement its own version of __str__()
597
+ # TODO: make printing a part of the CLI
598
+
599
+ @final
600
+ def __repr__(self) -> str:
601
+ """Safe (no secrets) string representation of the key. Same as __str__().
602
+
603
+ Returns:
604
+ string representation of the key without leaking secrets
605
+ """
606
+ # concrete __repr__() delegates to the (abstract) __str__():
607
+ # this avoids marking __repr__() abstract while still unifying behavior
608
+ return self.__str__()
609
+
610
+ @final
611
+ def _DebugDump(self) -> str:
612
+ """Debug dump of the key object. NOT for logging, NOT for regular use, EXPOSES secrets.
613
+
614
+ We disable default __repr__() for the CryptoKey classes for security reasons, so we won't
615
+ leak private key values into logs, but this method allows for explicit access to the
616
+ class fields for debugging purposes by mimicking the usual dataclass __repr__().
617
+
618
+ Returns:
619
+ string with all the object's fields explicit values
620
+ """
621
+ cls: str = type(self).__name__
622
+ parts: list[str] = []
623
+ for field in dataclasses.fields(self):
624
+ val: Any = getattr(self, field.name) # getattr is fine with frozen/slots
625
+ parts.append(f'{field.name}={repr(val)}')
626
+ return f'{cls}({", ".join(parts)})'
627
+
628
+ @final
629
+ @property
630
+ def blob(self) -> bytes:
631
+ """Serial (bytes) representation of the object.
632
+
633
+ Returns:
634
+ bytes, pickled, representation of the object
635
+ """
636
+ return Serialize(self, compress=-2, silent=True)
637
+
638
+ @final
639
+ @property
640
+ def encoded(self) -> str:
641
+ """Base-64 representation of the object.
642
+
643
+ Returns:
644
+ str, pickled, base64, representation of the object
645
+ """
646
+ return BytesToEncoded(self.blob)
647
+
648
+ @final
649
+ def Blob(self, /, *, key: SymmetricCrypto | None = None, silent: bool = True) -> bytes:
650
+ """Serial (bytes) representation of the object with more options, including encryption.
651
+
652
+ Args:
653
+ key (SymmetricCrypto, optional): if given will key.Encrypt() data before saving
654
+ silent (bool, optional): if True (default) will not log
655
+
656
+ Returns:
657
+ bytes, pickled, representation of the object
658
+ """
659
+ return Serialize(self, compress=-2, key=key, silent=silent)
660
+
661
+ @final
662
+ def Encoded(self, /, *, key: SymmetricCrypto | None = None, silent: bool = True) -> str:
663
+ """Base-64 representation of the object with more options, including encryption.
664
+
665
+ Args:
666
+ key (SymmetricCrypto, optional): if given will key.Encrypt() data before saving
667
+ silent (bool, optional): if True (default) will not log
668
+
669
+ Returns:
670
+ str, pickled, base64, representation of the object
671
+ """
672
+ return BytesToEncoded(self.Blob(key=key, silent=silent))
673
+
674
+ @final
675
+ @classmethod
676
+ def Load(
677
+ cls, data: str | bytes, /, *,
678
+ key: SymmetricCrypto | None = None, silent: bool = True) -> Self:
679
+ """Load (create) object from serialized bytes or string.
680
+
681
+ Args:
682
+ data (str | bytes): if bytes is assumed from CryptoKey.blob/Blob(), and
683
+ if string is assumed from CryptoKey.encoded/Encoded()
684
+ key (SymmetricCrypto, optional): if given will key.Encrypt() data before saving
685
+ silent (bool, optional): if True (default) will not log
686
+
687
+ Returns:
688
+ a CryptoKey object ready for use
689
+ """
690
+ # if this is a string, then we suppose it is base64
691
+ if isinstance(data, str):
692
+ data = EncodedToBytes(data)
693
+ # we now have bytes and we suppose it came from CryptoKey.blob()/CryptoKey.CryptoBlob()
694
+ obj: CryptoKey = DeSerialize(data=data, key=key, silent=silent)
695
+ # make sure we've got an object that makes sense
696
+ if not isinstance(obj, CryptoKey): # type:ignore
697
+ raise InputError(f'serialized data is not a CryptoKey: {type(obj)}')
698
+ return obj # type:ignore
699
+
700
+
701
+ class SymmetricCrypto(abc.ABC):
702
+ """Abstract interface for symmetric encryption.
703
+
704
+ Contract:
705
+ - If algorithm accepts a `nonce` or `tag` these have to be handled internally by the
706
+ implementation and appended to the ciphertext.
707
+ - If AEAD is supported, `associated_data` (AAD) must be authenticated. If not supported
708
+ then `associated_data` different from None must raise InputError.
709
+
710
+ Notes:
711
+ The interface is deliberately minimal: byte-in / byte-out.
712
+ Metadata like nonce/tag may be:
713
+ - returned alongside ciphertext, or
714
+ - bundled/serialized into `ciphertext` by the implementation.
715
+ """
716
+
717
+ @abc.abstractmethod
718
+ def Encrypt(self, plaintext: bytes, /, *, associated_data: bytes | None = None) -> bytes:
719
+ """Encrypt `plaintext` and return `ciphertext`.
720
+
721
+ Args:
722
+ plaintext (bytes): Data to encrypt.
723
+ associated_data (bytes, optional): Optional AAD for AEAD modes; must be
724
+ provided again on decrypt
725
+
726
+ Returns:
727
+ bytes: Ciphertext; if a nonce/tag is needed for decryption, the implementation
728
+ must encode it within the returned bytes (or document how to retrieve it)
729
+
730
+ Raises:
731
+ InputError: invalid inputs
732
+ CryptoError: internal crypto failures
733
+ """
734
+
735
+ @abc.abstractmethod
736
+ def Decrypt(self, ciphertext: bytes, /, *, associated_data: bytes | None = None) -> bytes:
737
+ """Decrypt `ciphertext` and return the original `plaintext`.
738
+
739
+ Args:
740
+ ciphertext (bytes): Data to decrypt (including any embedded nonce/tag if applicable)
741
+ associated_data (bytes, optional): Optional AAD (must match what was used during encrypt)
742
+
743
+ Returns:
744
+ bytes: Decrypted plaintext bytes
745
+
746
+ Raises:
747
+ InputError: invalid inputs
748
+ CryptoError: internal crypto failures, authentication failure, key mismatch, etc
749
+ """
750
+
751
+
752
+ def Serialize(
753
+ python_obj: Any, /, *, file_path: str | None = None,
754
+ compress: int | None = 3, key: SymmetricCrypto | None = None, silent: bool = False) -> bytes:
755
+ """Serialize a Python object into a BLOB, optionally compress / encrypt / save to disk.
756
+
757
+ Data path is:
758
+
759
+ `obj` => pickle => (compress) => (encrypt) => (save to `file_path`) => return
760
+
761
+ At every step of the data path the data will be measured, in bytes.
762
+ Every data conversion will be timed. The measurements/times will be logged (once).
763
+
764
+ Compression levels / speed can be controlled by `compress`. Use this as reference:
765
+
766
+ | Level | Speed | Compression ratio | Typical use case |
767
+ | -------- | ------------| --------------------------------- | --------------------------------------- |
768
+ | -5 to -1 | Fastest | Poor (better than no compression) | Real-time or very latency-sensitive |
769
+ | 0…3 | Very fast | Good ratio | Default CLI choice, safe baseline |
770
+ | 4…6 | Moderate | Better ratio | Good compromise for general persistence |
771
+ | 7…10 | Slower | Marginally better ratio | Only if storage space is precious |
772
+ | 11…15 | Much slower | Slight gains | Large archives, not for runtime use |
773
+ | 16…22 | Very slow | Tiny gains | Archival-only, multi-GB datasets |
774
+
775
+ Args:
776
+ python_obj (Any): serializable Python object
777
+ file_path (str, optional): full path to optionally save the data to
778
+ compress (int | None, optional): Compress level before encrypting/saving; -22 ≤ compress ≤ 22;
779
+ None is no compression; default is 3, which is fast, see table above for other values
780
+ key (SymmetricCrypto, optional): if given will key.Encrypt() data before saving
781
+ silent (bool, optional): if True will not log; default is False (will log)
782
+
783
+ Returns:
784
+ bytes: serialized binary data corresponding to obj + (compression) + (encryption)
785
+ """
786
+ messages: list[str] = []
787
+ with Timer('Serialization complete', emit_log=False) as tm_all:
788
+ # pickle
789
+ with Timer('PICKLE', emit_log=False) as tm_pickle:
790
+ obj: bytes = pickle.dumps(python_obj, protocol=_PICKLE_PROTOCOL)
791
+ if not silent:
792
+ messages.append(f' {tm_pickle}, {HumanizedBytes(len(obj))}')
793
+ # compress, if needed
794
+ if compress is not None:
795
+ compress = -22 if compress < -22 else compress
796
+ compress = 22 if compress > 22 else compress
797
+ with Timer(f'COMPRESS@{compress}', emit_log=False) as tm_compress:
798
+ obj = zstandard.ZstdCompressor(level=compress).compress(obj)
799
+ if not silent:
800
+ messages.append(f' {tm_compress}, {HumanizedBytes(len(obj))}')
801
+ # encrypt, if needed
802
+ if key is not None:
803
+ with Timer('ENCRYPT', emit_log=False) as tm_crypto:
804
+ obj = key.Encrypt(obj, associated_data=_PICKLE_AAD)
805
+ if not silent:
806
+ messages.append(f' {tm_crypto}, {HumanizedBytes(len(obj))}')
807
+ # optionally save to disk
808
+ if file_path is not None:
809
+ with Timer('SAVE', emit_log=False) as tm_save:
810
+ with open(file_path, 'wb') as file_obj:
811
+ file_obj.write(obj)
812
+ if not silent:
813
+ messages.append(f' {tm_save}, to {file_path!r}')
814
+ # log and return
815
+ if not silent:
816
+ logging.info(f'{tm_all}; parts:\n' + '\n'.join(messages))
817
+ return obj
818
+
819
+
820
+ def DeSerialize(
821
+ *, data: bytes | None = None, file_path: str | None = None,
822
+ key: SymmetricCrypto | None = None, silent: bool = False) -> Any:
823
+ """Loads (de-serializes) a BLOB back to a Python object, optionally decrypting / decompressing.
824
+
825
+ Data path is:
826
+
827
+ `data` or `file_path` => (decrypt) => (decompress) => unpickle => return object
828
+
829
+ At every step of the data path the data will be measured, in bytes.
830
+ Every data conversion will be timed. The measurements/times will be logged (once).
831
+ Compression versus no compression will be automatically detected.
832
+
833
+ Args:
834
+ data (bytes, optional): if given, use this as binary data string (input);
835
+ if you use this option, `file_path` will be ignored
836
+ file_path (str, optional): if given, use this as file path to load binary data string (input);
837
+ if you use this option, `data` will be ignored
838
+ key (SymmetricCrypto, optional): if given will key.Decrypt() data before decompressing/loading
839
+ silent (bool, optional): if True will not log; default is False (will log)
840
+
841
+ Returns:
842
+ De-Serialized Python object corresponding to data
843
+
844
+ Raises:
845
+ InputError: invalid inputs
846
+ CryptoError: internal crypto failures, authentication failure, key mismatch, etc
847
+ """
848
+ # test inputs
849
+ if (data is None and file_path is None) or (data is not None and file_path is not None):
850
+ raise InputError('you must provide only one of either `data` or `file_path`')
851
+ if file_path and not os.path.exists(file_path):
852
+ raise InputError(f'invalid file_path: {file_path!r}')
853
+ if data and len(data) < 4:
854
+ raise InputError('invalid data: too small')
855
+ # start the pipeline
856
+ obj: bytes = data if data else b''
857
+ messages: list[str] = [f'DATA: {HumanizedBytes(len(obj))}'] if data and not silent else []
858
+ with Timer('De-Serialization complete', emit_log=False) as tm_all:
859
+ # optionally load from disk
860
+ if file_path:
861
+ assert not obj, 'should never happen: if we have a file obj should be empty'
862
+ with Timer('LOAD', emit_log=False) as tm_load:
863
+ with open(file_path, 'rb') as file_obj:
864
+ obj = file_obj.read()
865
+ if not silent:
866
+ messages.append(f' {tm_load}, {HumanizedBytes(len(obj))}, from {file_path!r}')
867
+ # decrypt, if needed
868
+ if key is not None:
869
+ with Timer('DECRYPT', emit_log=False) as tm_crypto:
870
+ obj = key.Decrypt(obj, associated_data=_PICKLE_AAD)
871
+ if not silent:
872
+ messages.append(f' {tm_crypto}, {HumanizedBytes(len(obj))}')
873
+ # decompress: we try to detect compression to determine if we must call zstandard
874
+ if (len(obj) >= 4 and
875
+ (((magic := int.from_bytes(obj[:4], 'little')) == _ZSTD_MAGIC_FRAME) or
876
+ (_ZSTD_MAGIC_SKIPPABLE_MIN <= magic <= _ZSTD_MAGIC_SKIPPABLE_MAX))):
877
+ with Timer('DECOMPRESS', emit_log=False) as tm_decompress:
878
+ obj = zstandard.ZstdDecompressor().decompress(obj)
879
+ if not silent:
880
+ messages.append(f' {tm_decompress}, {HumanizedBytes(len(obj))}')
881
+ else:
882
+ if not silent:
883
+ messages.append(' (no compression detected)')
884
+ # create the actual object = unpickle
885
+ with Timer('UNPICKLE', emit_log=False) as tm_unpickle:
886
+ python_obj: Any = pickle.loads(obj)
887
+ if not silent:
888
+ messages.append(f' {tm_unpickle}')
889
+ # log and return
890
+ if not silent:
891
+ logging.info(f'{tm_all}; parts:\n' + '\n'.join(messages))
892
+ return python_obj
893
+
894
+
895
+ @dataclasses.dataclass(kw_only=True, slots=True, frozen=True, repr=False)
896
+ class PublicBid(CryptoKey):
897
+ """Public commitment to a (cryptographically secure) bid that can be revealed/validated later.
898
+
899
+ Bid is computed as: public_hash = Hash512(public_key || private_key || secret_bid)
900
+
901
+ Everything is bytes. The public part is (public_key, public_hash) and the private
902
+ part is (private_key, secret_bid). The whole computation can be checked later.
903
+
904
+ Attributes:
905
+ public_key (bytes): 512-bits random value
906
+ public_hash (bytes): SHA-512 hash of (public_key || private_key || secret_bid)
907
+ """
908
+
909
+ public_key: bytes
910
+ public_hash: bytes
911
+
912
+ def __post_init__(self) -> None:
913
+ """Check data.
914
+
915
+ Raises:
916
+ InputError: invalid inputs
917
+ """
918
+ super(PublicBid, self).__post_init__() # pylint: disable=super-with-arguments # needed here b/c: dataclass
919
+ if len(self.public_key) != 64 or len(self.public_hash) != 64:
920
+ raise InputError(f'invalid public_key or public_hash: {self}')
921
+
922
+ def __str__(self) -> str:
923
+ """Safe string representation of the PublicBid.
924
+
925
+ Returns:
926
+ string representation of PublicBid
927
+ """
928
+ return (f'PublicBid(public_key={BytesToEncoded(self.public_key)}, '
929
+ f'public_hash={BytesToHex(self.public_hash)})')
930
+
931
+ def VerifyBid(self, private_key: bytes, secret: bytes, /) -> bool:
932
+ """Verify a bid. True if OK; False if failed verification.
933
+
934
+ Args:
935
+ private_key (bytes): 512-bits private key
936
+ secret (bytes): Any number of bytes (≥1) to bid on (e.g., UTF-8 encoded string)
937
+
938
+ Returns:
939
+ True if bid is valid, False otherwise
940
+
941
+ Raises:
942
+ InputError: invalid inputs
943
+ """
944
+ try:
945
+ # creating the PrivateBid object will validate everything; InputError we allow to propagate
946
+ PrivateBid(
947
+ public_key=self.public_key, public_hash=self.public_hash,
948
+ private_key=private_key, secret_bid=secret)
949
+ return True # if we got here, all is good
950
+ except CryptoError:
951
+ return False # bid does not match the public commitment
952
+
953
+ @classmethod
954
+ def Copy(cls, other: PublicBid, /) -> Self:
955
+ """Initialize a public bid by taking the public parts of a public/private bid."""
956
+ return cls(public_key=other.public_key, public_hash=other.public_hash)
957
+
958
+
959
+ @dataclasses.dataclass(kw_only=True, slots=True, frozen=True, repr=False)
960
+ class PrivateBid(PublicBid):
961
+ """Private bid that can be revealed and validated against a public commitment (see PublicBid).
962
+
963
+ Attributes:
964
+ private_key (bytes): 512-bits random value
965
+ secret_bid (bytes): Any number of bytes (≥1) to bid on (e.g., UTF-8 encoded string)
966
+ """
967
+
968
+ private_key: bytes
969
+ secret_bid: bytes
970
+
971
+ def __post_init__(self) -> None:
972
+ """Check data.
973
+
974
+ Raises:
975
+ InputError: invalid inputs
976
+ CryptoError: bid does not match the public commitment
977
+ """
978
+ super(PrivateBid, self).__post_init__() # pylint: disable=super-with-arguments # needed here b/c: dataclass
979
+ if len(self.private_key) != 64 or len(self.secret_bid) < 1:
980
+ raise InputError(f'invalid private_key or secret_bid: {self}')
981
+ if self.public_hash != Hash512(self.public_key + self.private_key + self.secret_bid):
982
+ raise CryptoError(f'inconsistent bid: {self}')
983
+
984
+ def __str__(self) -> str:
985
+ """Safe (no secrets) string representation of the PrivateBid.
986
+
987
+ Returns:
988
+ string representation of PrivateBid without leaking secrets
989
+ """
990
+ return (f'PrivateBid({super(PrivateBid, self).__str__()}, ' # pylint: disable=super-with-arguments
991
+ f'private_key={ObfuscateSecret(self.private_key)}, '
992
+ f'secret_bid={ObfuscateSecret(self.secret_bid)})')
993
+
994
+ @classmethod
995
+ def New(cls, secret: bytes, /) -> Self:
996
+ """Make the `secret` into a new bid.
997
+
998
+ Args:
999
+ secret (bytes): Any number of bytes (≥1) to bid on (e.g., UTF-8 encoded string)
1000
+
1001
+ Returns:
1002
+ PrivateBid object ready for use (use PublicBid.Copy() to get the public part)
1003
+
1004
+ Raises:
1005
+ InputError: invalid inputs
1006
+ """
1007
+ # test inputs
1008
+ if len(secret) < 1:
1009
+ raise InputError(f'invalid secret length: {len(secret)}')
1010
+ # generate random values
1011
+ public_key: bytes = RandBytes(64) # 512 bits
1012
+ private_key: bytes = RandBytes(64) # 512 bits
1013
+ # build object
1014
+ return cls(
1015
+ public_key=public_key,
1016
+ public_hash=Hash512(public_key + private_key + secret),
1017
+ private_key=private_key,
1018
+ secret_bid=secret)