transcrypto 1.8.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,735 @@
1
+ # SPDX-FileCopyrightText: Copyright 2026 Daniel Balparda <balparda@github.com>
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Balparda's TransCrypto base CryptoKey class and protocols, including serialization."""
4
+
5
+ from __future__ import annotations
6
+
7
+ import abc as abstract
8
+ import dataclasses
9
+ import enum
10
+ import json
11
+ import logging
12
+ import pathlib
13
+ import pickle # noqa: S403
14
+ import sys
15
+ from collections import abc
16
+ from typing import (
17
+ Any,
18
+ Protocol,
19
+ Self,
20
+ cast,
21
+ final,
22
+ runtime_checkable,
23
+ )
24
+
25
+ import zstandard
26
+
27
+ from transcrypto.utils import base, human, timer
28
+
29
+ # TODO: more consistent logging in whole project
30
+
31
+ # Crypto types: add bytes for cryptographic data; has to be encoded for JSON serialization
32
+ type CryptValue = bool | int | float | str | bytes | list[CryptValue] | dict[str, CryptValue] | None
33
+ type CryptDict = dict[str, CryptValue]
34
+ _JSON_DATACLASS_TYPES: set[str] = {
35
+ # native support
36
+ 'int',
37
+ 'float',
38
+ 'str',
39
+ 'bool',
40
+ # support for lists for now, but no nested lists or dicts yet
41
+ 'list[int]',
42
+ 'list[float]',
43
+ 'list[str]',
44
+ 'list[bool]',
45
+ # need conversion/encoding: see CryptValue/CryptDict
46
+ 'bytes',
47
+ }
48
+
49
+ # these control the pickling of data, do NOT ever change, or you will break all databases
50
+ # <https://docs.python.org/3/library/pickle.html#pickle.DEFAULT_PROTOCOL>
51
+ _PICKLE_PROTOCOL = 4 # protocol 4 available since python v3.8 # do NOT ever change!
52
+ PickleGeneric: abc.Callable[[Any], bytes] = lambda o: pickle.dumps(o, protocol=_PICKLE_PROTOCOL)
53
+ UnpickleGeneric: abc.Callable[[bytes], Any] = pickle.loads # noqa: S301
54
+ PickleJSON: abc.Callable[[base.JSONDict], bytes] = lambda d: json.dumps(
55
+ d, separators=(',', ':')
56
+ ).encode('utf-8')
57
+ UnpickleJSON: abc.Callable[[bytes], base.JSONDict] = lambda b: json.loads(b.decode('utf-8'))
58
+ _PICKLE_AAD = b'transcrypto.base.Serialize.1.0' # do NOT ever change!
59
+ # these help find compressed files, do NOT change unless zstandard changes
60
+ _ZSTD_MAGIC_FRAME = 0xFD2FB528
61
+ _ZSTD_MAGIC_SKIPPABLE_MIN = 0x184D2A50
62
+ _ZSTD_MAGIC_SKIPPABLE_MAX = 0x184D2A5F
63
+
64
+
65
+ class CryptoError(base.Error):
66
+ """Cryptographic exception (TransCrypto)."""
67
+
68
+
69
+ class CryptoInputType(enum.StrEnum):
70
+ """Types of inputs that can represent arbitrary bytes."""
71
+
72
+ # prefixes; format prefixes are all 4 bytes
73
+ PATH = '@' # @path on disk → read bytes from a file
74
+ STDIN = '@-' # stdin
75
+ HEX = 'hex:' # hex:deadbeef → decode hex
76
+ BASE64 = 'b64:' # b64:... → decode base64
77
+ STR = 'str:' # str:hello → UTF-8 encode the literal
78
+ RAW = 'raw:' # raw:... → byte literals via \\xNN escapes (rare but handy)
79
+
80
+
81
+ def DetectInputType(data_str: str, /) -> CryptoInputType | None:
82
+ """Auto-detect `data_str` type, if possible.
83
+
84
+ Args:
85
+ data_str (str): data to process, putatively a bytes blob
86
+
87
+ Returns:
88
+ CryptoInputType | None: type if has a known prefix, None otherwise
89
+
90
+ """
91
+ data_str = data_str.strip()
92
+ if data_str == CryptoInputType.STDIN:
93
+ return CryptoInputType.STDIN
94
+ for t in (
95
+ CryptoInputType.PATH,
96
+ CryptoInputType.STR,
97
+ CryptoInputType.HEX,
98
+ CryptoInputType.BASE64,
99
+ CryptoInputType.RAW,
100
+ ):
101
+ if data_str.startswith(t):
102
+ return t
103
+ return None
104
+
105
+
106
+ def BytesFromInput(data_str: str, /, *, expect: CryptoInputType | None = None) -> bytes: # noqa: C901, PLR0911, PLR0912
107
+ """Parse input `data_str` into `bytes`. May auto-detect or enforce a type of input.
108
+
109
+ Can load from disk ('@'). Can load from stdin ('@-').
110
+
111
+ Args:
112
+ data_str (str): data to process, putatively a bytes blob
113
+ expect (CryptoInputType | None, optional): If not given (None) will try to auto-detect the
114
+ input type by looking at the prefix on `data_str` and if none is found will suppose
115
+ a 'str:' was given; if one of the supported CryptoInputType is given then will enforce
116
+ that specific type prefix or no prefix
117
+
118
+ Returns:
119
+ bytes: data
120
+
121
+ Raises:
122
+ base.InputError: unexpected type or conversion error
123
+
124
+ """
125
+ data_str = data_str.strip()
126
+ # auto-detect
127
+ detected_type: CryptoInputType | None = DetectInputType(data_str)
128
+ expect = CryptoInputType.STR if expect is None and detected_type is None else expect
129
+ if detected_type is not None and expect is not None and detected_type != expect:
130
+ raise base.InputError(
131
+ f'Expected type {expect=} is different from detected type {detected_type=}'
132
+ )
133
+ # now we know they don't conflict, so unify them; remove prefix if we have it
134
+ expect = detected_type if expect is None else expect
135
+ assert expect is not None, 'should never happen: type should be known here' # noqa: S101
136
+ data_str = data_str.removeprefix(expect)
137
+ # for every type something different will happen now
138
+ try:
139
+ match expect:
140
+ case CryptoInputType.STDIN:
141
+ # read raw bytes from stdin: prefer the binary buffer; if unavailable,
142
+ # fall back to text stream encoded as UTF-8 (consistent with str: policy).
143
+ stream = getattr(sys.stdin, 'buffer', None)
144
+ if stream is None:
145
+ text: str = sys.stdin.read()
146
+ if not isinstance(text, str): # pyright: ignore[reportUnnecessaryIsInstance]
147
+ raise base.InputError('sys.stdin.read() produced non-text data') # noqa: TRY301
148
+ return text.encode('utf-8')
149
+ data: bytes = stream.read()
150
+ if not isinstance(data, bytes): # pyright: ignore[reportUnnecessaryIsInstance]
151
+ raise base.InputError('sys.stdin.buffer.read() produced non-binary data') # noqa: TRY301
152
+ return data
153
+ case CryptoInputType.PATH:
154
+ if not pathlib.Path(data_str).exists():
155
+ raise base.InputError(f'cannot find file {data_str!r}') # noqa: TRY301
156
+ return pathlib.Path(data_str).read_bytes()
157
+ case CryptoInputType.STR:
158
+ return data_str.encode('utf-8')
159
+ case CryptoInputType.HEX:
160
+ return base.HexToBytes(data_str)
161
+ case CryptoInputType.BASE64:
162
+ return base.EncodedToBytes(data_str)
163
+ case CryptoInputType.RAW:
164
+ return base.RawToBytes(data_str)
165
+ case _:
166
+ raise base.InputError(f'invalid type {expect!r}') # noqa: TRY301
167
+ except Exception as err:
168
+ raise base.InputError(f'invalid input: {err}') from err
169
+
170
+
171
+ @dataclasses.dataclass(kw_only=True, slots=True, frozen=True, repr=False)
172
+ class CryptoKey(abstract.ABC):
173
+ """A cryptographic key."""
174
+
175
+ @abstract.abstractmethod
176
+ def __post_init__(self) -> None:
177
+ """Check data."""
178
+ # every sub-class of CryptoKey has to implement its own version of __post_init__()
179
+
180
+ @abstract.abstractmethod
181
+ def __str__(self) -> str:
182
+ """Safe (no secrets) string representation of the key.
183
+
184
+ Returns:
185
+ string representation of the key without leaking secrets
186
+
187
+ """
188
+ # every sub-class of CryptoKey has to implement its own version of __str__()
189
+
190
+ @final
191
+ def __repr__(self) -> str:
192
+ """Safe (no secrets) string representation of the key. Same as __str__().
193
+
194
+ Returns:
195
+ string representation of the key without leaking secrets
196
+
197
+ """
198
+ # concrete __repr__() delegates to the (abstract) __str__():
199
+ # this avoids marking __repr__() abstract while still unifying behavior
200
+ return self.__str__()
201
+
202
+ @final
203
+ def _DebugDump(self) -> str:
204
+ """Debug dump of the key object. NOT for logging, NOT for regular use, EXPOSES secrets.
205
+
206
+ We disable default __repr__() for the CryptoKey classes for security reasons, so we won't
207
+ leak private key values into logs, but this method allows for explicit access to the
208
+ class fields for debugging purposes by mimicking the usual dataclass __repr__().
209
+
210
+ Returns:
211
+ string with all the object's fields explicit values
212
+
213
+ """
214
+ cls: str = type(self).__name__
215
+ parts: list[str] = []
216
+ for field in dataclasses.fields(self):
217
+ val: Any = getattr(self, field.name) # getattr is fine with frozen/slots
218
+ parts.append(f'{field.name}={val!r}')
219
+ return f'{cls}({", ".join(parts)})'
220
+
221
+ @final
222
+ @property
223
+ def _json_dict(self) -> base.JSONDict:
224
+ """Dictionary representation of the object suitable for JSON conversion.
225
+
226
+ Returns:
227
+ JSONDict: representation of the object suitable for JSON conversion
228
+
229
+ Raises:
230
+ base.ImplementationError: object has types that are not supported in JSON
231
+
232
+ """
233
+ self_dict: CryptDict = dataclasses.asdict(self)
234
+ for field in dataclasses.fields(self):
235
+ # check the type is OK
236
+ if field.type not in _JSON_DATACLASS_TYPES:
237
+ raise base.ImplementationError(
238
+ f'Unsupported JSON field {field.name!r}/{field.type} not in {_JSON_DATACLASS_TYPES}'
239
+ )
240
+ # convert types that we accept but JSON does not
241
+ if field.type == 'bytes':
242
+ self_dict[field.name] = base.BytesToEncoded(cast('bytes', self_dict[field.name]))
243
+ return cast('base.JSONDict', self_dict)
244
+
245
+ @final
246
+ @property
247
+ def json(self) -> str:
248
+ """JSON representation of the object, tightly packed, not for humans.
249
+
250
+ Returns:
251
+ str: JSON representation of the object, tightly packed
252
+
253
+ """
254
+ return json.dumps(self._json_dict, separators=(',', ':'))
255
+
256
+ @final
257
+ @property
258
+ def formatted_json(self) -> str:
259
+ """JSON representation of the object formatted for humans.
260
+
261
+ Returns:
262
+ str: JSON representation of the object formatted for humans
263
+
264
+ """
265
+ return json.dumps(self._json_dict, indent=4, sort_keys=True)
266
+
267
+ @final
268
+ @classmethod
269
+ def _FromJSONDict(cls, json_dict: base.JSONDict, /) -> Self:
270
+ """Create object from JSON representation.
271
+
272
+ Args:
273
+ json_dict (base.JSONDict): JSON dict
274
+
275
+ Returns:
276
+ a CryptoKey object ready for use
277
+
278
+ Raises:
279
+ base.InputError: unexpected type/fields
280
+ base.ImplementationError: unsupported JSON field
281
+
282
+ """
283
+ # check we got exactly the fields we needed
284
+ cls_fields: set[str] = {f.name for f in dataclasses.fields(cls)}
285
+ json_fields: set[str] = set(json_dict)
286
+ if cls_fields != json_fields:
287
+ raise base.InputError(
288
+ f'JSON data decoded to unexpected fields: {cls_fields=} / {json_fields=}'
289
+ )
290
+ # reconstruct the types we meddled with inside self._json_dict
291
+ for field in dataclasses.fields(cls):
292
+ if field.type not in _JSON_DATACLASS_TYPES:
293
+ raise base.ImplementationError(
294
+ f'Unsupported JSON field {field.name!r}/{field.type} not in {_JSON_DATACLASS_TYPES}'
295
+ )
296
+ if field.type == 'bytes':
297
+ json_dict[field.name] = base.EncodedToBytes(json_dict[field.name]) # type: ignore[assignment, arg-type]
298
+ # build the object
299
+ return cls(**json_dict)
300
+
301
+ @final
302
+ @classmethod
303
+ def FromJSON(cls, json_data: str, /) -> Self:
304
+ """Create object from JSON representation.
305
+
306
+ Args:
307
+ json_data (str): JSON string
308
+
309
+ Returns:
310
+ a CryptoKey object ready for use
311
+
312
+ Raises:
313
+ base.InputError: unexpected type/fields
314
+
315
+ """
316
+ # get the dict back
317
+ json_dict: base.JSONDict = json.loads(json_data)
318
+ if not isinstance(json_dict, dict): # pyright: ignore[reportUnnecessaryIsInstance]
319
+ raise base.InputError(f'JSON data decoded to unexpected type: {type(json_dict)}')
320
+ return cls._FromJSONDict(json_dict)
321
+
322
+ @final
323
+ @property
324
+ def blob(self) -> bytes:
325
+ """Serial (bytes) representation of the object.
326
+
327
+ Returns:
328
+ bytes, pickled, representation of the object
329
+
330
+ """
331
+ return self.Blob()
332
+
333
+ @final
334
+ def Blob(self, /, *, encryption_key: Encryptor | None = None, silent: bool = True) -> bytes:
335
+ """Get serial (bytes) representation of the object with more options, including encryption.
336
+
337
+ Args:
338
+ encryption_key (Encryptor, optional): if given will encryption_key.Encrypt() data before save
339
+ silent (bool, optional): if True (default) will not log
340
+
341
+ Returns:
342
+ bytes, pickled, representation of the object
343
+
344
+ """
345
+ return Serialize(
346
+ self._json_dict, compress=-2, encryption_key=encryption_key, silent=silent, pickler=PickleJSON
347
+ )
348
+
349
+ @final
350
+ @property
351
+ def encoded(self) -> str:
352
+ """Base-64 representation of the object.
353
+
354
+ Returns:
355
+ str, pickled, base64, representation of the object
356
+
357
+ """
358
+ return self.Encoded()
359
+
360
+ @final
361
+ def Encoded(self, /, *, encryption_key: Encryptor | None = None, silent: bool = True) -> str:
362
+ """Base-64 representation of the object with more options, including encryption.
363
+
364
+ Args:
365
+ encryption_key (Encryptor, optional): if given will encryption_key.Encrypt() data before save
366
+ silent (bool, optional): if True (default) will not log
367
+
368
+ Returns:
369
+ str, pickled, base64, representation of the object
370
+
371
+ """
372
+ return CryptoInputType.BASE64 + base.BytesToEncoded(
373
+ self.Blob(encryption_key=encryption_key, silent=silent)
374
+ )
375
+
376
+ @final
377
+ @property
378
+ def hex(self) -> str:
379
+ """Hexadecimal representation of the object.
380
+
381
+ Returns:
382
+ str, pickled, hexadecimal, representation of the object
383
+
384
+ """
385
+ return self.Hex()
386
+
387
+ @final
388
+ def Hex(self, /, *, encryption_key: Encryptor | None = None, silent: bool = True) -> str:
389
+ """Hexadecimal representation of the object with more options, including encryption.
390
+
391
+ Args:
392
+ encryption_key (Encryptor, optional): if given will encryption_key.Encrypt() data before save
393
+ silent (bool, optional): if True (default) will not log
394
+
395
+ Returns:
396
+ str, pickled, hexadecimal, representation of the object
397
+
398
+ """
399
+ return CryptoInputType.HEX + base.BytesToHex(
400
+ self.Blob(encryption_key=encryption_key, silent=silent)
401
+ )
402
+
403
+ @final
404
+ @property
405
+ def raw(self) -> str:
406
+ """Raw escaped binary representation of the object.
407
+
408
+ Returns:
409
+ str, pickled, raw escaped binary, representation of the object
410
+
411
+ """
412
+ return self.Raw()
413
+
414
+ @final
415
+ def Raw(self, /, *, encryption_key: Encryptor | None = None, silent: bool = True) -> str:
416
+ """Raw escaped binary representation of the object with more options, including encryption.
417
+
418
+ Args:
419
+ encryption_key (Encryptor, optional): if given will encryption_key.Encrypt() data before save
420
+ silent (bool, optional): if True (default) will not log
421
+
422
+ Returns:
423
+ str, pickled, raw escaped binary, representation of the object
424
+
425
+ """
426
+ return CryptoInputType.RAW + base.BytesToRaw(
427
+ self.Blob(encryption_key=encryption_key, silent=silent)
428
+ )
429
+
430
+ @final
431
+ @classmethod
432
+ def Load(
433
+ cls, data: str | bytes, /, *, decryption_key: Decryptor | None = None, silent: bool = True
434
+ ) -> Self:
435
+ """Load (create) object from serialized bytes or string.
436
+
437
+ Args:
438
+ data (str | bytes): if bytes is assumed from CryptoKey.blob/Blob(), and
439
+ if string is assumed from CryptoKey.encoded/Encoded()
440
+ decryption_key (Decryptor, optional): if given will decryption_key.Decrypt() data before load
441
+ silent (bool, optional): if True (default) will not log
442
+
443
+ Returns:
444
+ a CryptoKey object ready for use
445
+
446
+ Raises:
447
+ base.InputError: decode error
448
+
449
+ """
450
+ # if this is a string, then we suppose it is base64
451
+ if isinstance(data, str):
452
+ data = BytesFromInput(data)
453
+ # we now have bytes and we suppose it came from CryptoKey.blob()/CryptoKey.CryptoBlob()
454
+ try:
455
+ json_dict: base.JSONDict = DeSerialize(
456
+ data=data, decryption_key=decryption_key, silent=silent, unpickler=UnpickleJSON
457
+ )
458
+ return cls._FromJSONDict(json_dict)
459
+ except Exception as err:
460
+ raise base.InputError(f'input decode error: {err}') from err
461
+
462
+
463
+ @runtime_checkable
464
+ class Encryptor(Protocol):
465
+ """Abstract interface for a class that has encryption.
466
+
467
+ Contract:
468
+ - If algorithm accepts a `nonce` or `tag` these have to be handled internally by the
469
+ implementation and appended to the `ciphertext`/`signature`.
470
+ - If AEAD is supported, `associated_data` (AAD) must be authenticated. If not supported
471
+ then `associated_data` different from None must raise InputError.
472
+
473
+ Notes:
474
+ The interface is deliberately minimal: byte-in / byte-out.
475
+ Metadata like nonce/tag may be:
476
+ - returned alongside `ciphertext`/`signature`, or
477
+ - bundled/serialized into `ciphertext`/`signature` by the implementation.
478
+
479
+ """
480
+
481
+ @abstract.abstractmethod
482
+ def Encrypt(self, plaintext: bytes, /, *, associated_data: bytes | None = None) -> bytes:
483
+ """Encrypt `plaintext` and return `ciphertext`.
484
+
485
+ Args:
486
+ plaintext (bytes): Data to encrypt.
487
+ associated_data (bytes, optional): Optional AAD for AEAD modes; must be
488
+ provided again on decrypt
489
+
490
+ Returns:
491
+ bytes: Ciphertext; if a nonce/tag is needed for decryption, the implementation
492
+ must encode it within the returned bytes (or document how to retrieve it)
493
+
494
+ Raises:
495
+ base.InputError: invalid inputs
496
+ key.CryptoError: internal crypto failures
497
+
498
+ """
499
+
500
+
501
+ @runtime_checkable
502
+ class Decryptor(Protocol):
503
+ """Abstract interface for a class that has decryption (see contract/notes in Encryptor)."""
504
+
505
+ @abstract.abstractmethod
506
+ def Decrypt(self, ciphertext: bytes, /, *, associated_data: bytes | None = None) -> bytes:
507
+ """Decrypt `ciphertext` and return the original `plaintext`.
508
+
509
+ Args:
510
+ ciphertext (bytes): Data to decrypt (including any embedded nonce/tag if applicable)
511
+ associated_data (bytes, optional): Optional AAD (must match what was used during encrypt)
512
+
513
+ Returns:
514
+ bytes: Decrypted plaintext bytes
515
+
516
+ Raises:
517
+ base.InputError: invalid inputs
518
+ key.CryptoError: internal crypto failures, authentication failure, key mismatch, etc
519
+
520
+ """
521
+
522
+
523
+ @runtime_checkable
524
+ class Verifier(Protocol):
525
+ """Abstract interface for asymmetric signature verify. (see contract/notes in Encryptor)."""
526
+
527
+ @abstract.abstractmethod
528
+ def Verify(
529
+ self, message: bytes, signature: bytes, /, *, associated_data: bytes | None = None
530
+ ) -> bool:
531
+ """Verify a `signature` for `message`. True if OK; False if failed verification.
532
+
533
+ Args:
534
+ message (bytes): Data that was signed (including any embedded nonce/tag if applicable)
535
+ signature (bytes): Signature data to verify (including any embedded nonce/tag if applicable)
536
+ associated_data (bytes, optional): Optional AAD (must match what was used during signing)
537
+
538
+ Returns:
539
+ True if signature is valid, False otherwise
540
+
541
+ Raises:
542
+ base.InputError: invalid inputs
543
+ key.CryptoError: internal crypto failures, authentication failure, key mismatch, etc
544
+
545
+ """
546
+
547
+
548
+ @runtime_checkable
549
+ class Signer(Protocol):
550
+ """Abstract interface for asymmetric signing. (see contract/notes in Encryptor)."""
551
+
552
+ @abstract.abstractmethod
553
+ def Sign(self, message: bytes, /, *, associated_data: bytes | None = None) -> bytes:
554
+ """Sign `message` and return the `signature`.
555
+
556
+ Args:
557
+ message (bytes): Data to sign.
558
+ associated_data (bytes, optional): Optional AAD for AEAD modes; must be
559
+ provided again on decrypt
560
+
561
+ Returns:
562
+ bytes: Signature; if a nonce/tag is needed for decryption, the implementation
563
+ must encode it within the returned bytes (or document how to retrieve it)
564
+
565
+ Raises:
566
+ base.InputError: invalid inputs
567
+ key.CryptoError: internal crypto failures
568
+
569
+ """
570
+
571
+
572
+ def Serialize[T](
573
+ python_obj: T,
574
+ /,
575
+ *,
576
+ file_path: str | None = None,
577
+ compress: int | None = 3,
578
+ encryption_key: Encryptor | None = None,
579
+ silent: bool = False,
580
+ pickler: abc.Callable[[T], bytes] = PickleGeneric,
581
+ ) -> bytes:
582
+ """Serialize a Python object into a BLOB, optionally compress / encrypt / save to disk.
583
+
584
+ Data path is:
585
+
586
+ `obj` => [pickler] => (compress) => (encrypt) => (save to `file_path`) => return
587
+
588
+ At every step of the data path the data will be measured, in bytes.
589
+ Every data conversion will be timed. The measurements/times will be logged (once).
590
+
591
+ Compression levels / speed can be controlled by `compress`. Use this as reference:
592
+
593
+ | Level | Speed | Compression ratio | Typical use case |
594
+ | -------- | ------------| ------------------------| --------------------------------------- |
595
+ | -5 to -1 | Fastest | Poor (better than none) | Real-time / very latency-sensitive |
596
+ | 0…3 | Very fast | Good ratio | Default CLI choice, safe baseline |
597
+ | 4…6 | Moderate | Better ratio | Good compromise for general persistence |
598
+ | 7…10 | Slower | Marginally better ratio | Only if storage space is precious |
599
+ | 11…15 | Much slower | Slight gains | Large archives, not for runtime use |
600
+ | 16…22 | Very slow | Tiny gains | Archival-only, multi-GB datasets |
601
+
602
+ Args:
603
+ python_obj (Any): serializable Python object
604
+ file_path (str, optional): full path to optionally save the data to
605
+ compress (int | None, optional): Compress level before encrypting/saving; -22 ≤ compress ≤ 22;
606
+ None is no compression; default is 3, which is fast, see table above for other values
607
+ encryption_key (Encryptor, optional): if given will encryption_key.Encrypt() data before save
608
+ silent (bool, optional): if True will not log; default is False (will log)
609
+ pickler (Callable[[Any], bytes], optional): if not given, will just be the `pickle` module;
610
+ if given will be a method to convert any Python object to its `bytes` representation;
611
+ PickleGeneric is the default, but another useful value is PickleJSON
612
+
613
+ Returns:
614
+ bytes: serialized binary data corresponding to obj + (compression) + (encryption)
615
+
616
+ """
617
+ messages: list[str] = []
618
+ with timer.Timer('Serialization complete', emit_log=False) as tm_all:
619
+ # pickle
620
+ with timer.Timer('PICKLE', emit_log=False) as tm_pickle:
621
+ obj: bytes = pickler(python_obj)
622
+ if not silent:
623
+ messages.append(f' {tm_pickle}, {human.HumanizedBytes(len(obj))}')
624
+ # compress, if needed
625
+ if compress is not None:
626
+ compress = max(compress, -22)
627
+ compress = min(compress, 22)
628
+ with timer.Timer(f'COMPRESS@{compress}', emit_log=False) as tm_compress:
629
+ obj = zstandard.ZstdCompressor(level=compress).compress(obj)
630
+ if not silent:
631
+ messages.append(f' {tm_compress}, {human.HumanizedBytes(len(obj))}')
632
+ # encrypt, if needed
633
+ if encryption_key is not None:
634
+ with timer.Timer('ENCRYPT', emit_log=False) as tm_crypto:
635
+ obj = encryption_key.Encrypt(obj, associated_data=_PICKLE_AAD)
636
+ if not silent:
637
+ messages.append(f' {tm_crypto}, {human.HumanizedBytes(len(obj))}')
638
+ # optionally save to disk
639
+ if file_path is not None:
640
+ with timer.Timer('SAVE', emit_log=False) as tm_save:
641
+ pathlib.Path(file_path).write_bytes(obj)
642
+ if not silent:
643
+ messages.append(f' {tm_save}, to {file_path!r}')
644
+ # log and return
645
+ if not silent:
646
+ logging.info(f'{tm_all}; parts:\n{"\n".join(messages)}')
647
+ return obj
648
+
649
+
650
+ def DeSerialize[T]( # noqa: C901
651
+ *,
652
+ data: bytes | None = None,
653
+ file_path: str | None = None,
654
+ decryption_key: Decryptor | None = None,
655
+ silent: bool = False,
656
+ unpickler: abc.Callable[[bytes], T] = UnpickleGeneric,
657
+ ) -> T:
658
+ """Load (de-serializes) a BLOB back to a Python object, optionally decrypting / decompressing.
659
+
660
+ Data path is:
661
+
662
+ `data` or `file_path` => (decrypt) => (decompress) => [unpickler] => return object
663
+
664
+ At every step of the data path the data will be measured, in bytes.
665
+ Every data conversion will be timed. The measurements/times will be logged (once).
666
+ Compression versus no compression will be automatically detected.
667
+
668
+ Args:
669
+ data (bytes | None, optional): if given, use this as binary data string (input);
670
+ if you use this option, `file_path` will be ignored
671
+ file_path (str | None, optional): if given, use this as file path to load binary data
672
+ string (input); if you use this option, `data` will be ignored. Defaults to None.
673
+ decryption_key (Decryptor | None, optional): if given will decryption_key.Decrypt() data before
674
+ decompressing/loading. Defaults to None.
675
+ silent (bool, optional): if True will not log; default is False (will log). Defaults to False.
676
+ unpickler (Callable[[bytes], Any], optional): if not given, will just be the `pickle` module;
677
+ if given will be a method to convert a `bytes` representation back to a Python object;
678
+ UnpickleGeneric is the default, but another useful value is UnpickleJSON.
679
+ Defaults to UnpickleGeneric.
680
+
681
+ Returns:
682
+ De-Serialized Python object corresponding to data
683
+
684
+ Raises:
685
+ base.InputError: invalid inputs
686
+ base.CryptoError: internal crypto failures, authentication failure, key mismatch, etc
687
+
688
+ """ # noqa: DOC502
689
+ # test inputs
690
+ if (data is None and file_path is None) or (data is not None and file_path is not None):
691
+ raise base.InputError('you must provide only one of either `data` or `file_path`')
692
+ if file_path and not pathlib.Path(file_path).exists():
693
+ raise base.InputError(f'invalid file_path: {file_path!r}')
694
+ if data and len(data) < 4: # noqa: PLR2004
695
+ raise base.InputError('invalid data: too small')
696
+ # start the pipeline
697
+ obj: bytes = data or b''
698
+ messages: list[str] = [f'DATA: {human.HumanizedBytes(len(obj))}'] if data and not silent else []
699
+ with timer.Timer('De-Serialization complete', emit_log=False) as tm_all:
700
+ # optionally load from disk
701
+ if file_path:
702
+ assert not obj, 'should never happen: if we have a file obj should be empty' # noqa: S101
703
+ with timer.Timer('LOAD', emit_log=False) as tm_load:
704
+ obj = pathlib.Path(file_path).read_bytes()
705
+ if not silent:
706
+ messages.append(f' {tm_load}, {human.HumanizedBytes(len(obj))}, from {file_path!r}')
707
+ # decrypt, if needed
708
+ if decryption_key is not None:
709
+ with timer.Timer('DECRYPT', emit_log=False) as tm_crypto:
710
+ obj = decryption_key.Decrypt(obj, associated_data=_PICKLE_AAD)
711
+ if not silent:
712
+ messages.append(f' {tm_crypto}, {human.HumanizedBytes(len(obj))}')
713
+ # decompress: we try to detect compression to determine if we must call zstandard
714
+ if (
715
+ len(obj) >= 4 # noqa: PLR2004
716
+ and (
717
+ ((magic := int.from_bytes(obj[:4], 'little')) == _ZSTD_MAGIC_FRAME)
718
+ or (_ZSTD_MAGIC_SKIPPABLE_MIN <= magic <= _ZSTD_MAGIC_SKIPPABLE_MAX)
719
+ )
720
+ ):
721
+ with timer.Timer('DECOMPRESS', emit_log=False) as tm_decompress:
722
+ obj = zstandard.ZstdDecompressor().decompress(obj)
723
+ if not silent:
724
+ messages.append(f' {tm_decompress}, {human.HumanizedBytes(len(obj))}')
725
+ elif not silent:
726
+ messages.append(' (no compression detected)')
727
+ # create the actual object = unpickle
728
+ with timer.Timer('UNPICKLE', emit_log=False) as tm_unpickle:
729
+ python_obj: T = unpickler(obj)
730
+ if not silent:
731
+ messages.append(f' {tm_unpickle}')
732
+ # log and return
733
+ if not silent:
734
+ logging.info(f'{tm_all}; parts:\n{"\n".join(messages)}')
735
+ return python_obj