spectrl 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spectrl/__init__.py ADDED
@@ -0,0 +1,221 @@
1
+ """spectrl — Inline Spectrum URL Encoder.
2
+
3
+ Encodes a single mass spectrum into a compact, URL-safe token (spectrl1.…) so it can be
4
+ shared with no backend. The entire spectrum lives in the token.
5
+
6
+ Public API::
7
+
8
+ encode_spectrum(spec, *, lossless=False, max_len=None) -> str
9
+ decode_token(token) -> DecodedSpectrum
10
+ from_mzmlpy(spec, ref_groups=None) -> InlineSpectrum
11
+ top_n(spec, n) -> InlineSpectrum
12
+ to_fragment(token, base) -> str
13
+ to_query(token, base, param="d") -> str
14
+ to_data_uri(token) -> str
15
+ extract_token(url_or_uri) -> str
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import warnings
21
+ from urllib.parse import parse_qs, urlparse, urlunparse
22
+
23
+ from .cv import ARRAY_CHARGE, ARRAY_INTENSITY, ARRAY_MZ, ION_MOBILITY_ARRAY_TAILS
24
+ from .header import build_header, extract_descriptors, parse_header
25
+ from .model import DecodedSpectrum, InlineSpectrum, SpectrlCvParam
26
+ from .peaks import _validate_arrays, build_array_blobs, canonical_sort, compute_hash, decode_array_blobs, top_n
27
+ from .proforma import validate_interp
28
+ from .token import build_token, parse_token
29
+
30
+ __all__ = [
31
+ "encode_spectrum",
32
+ "decode_token",
33
+ "from_mzmlpy",
34
+ "top_n",
35
+ "to_fragment",
36
+ "to_query",
37
+ "to_data_uri",
38
+ "extract_token",
39
+ "InlineSpectrum",
40
+ "DecodedSpectrum",
41
+ "SpectrlCvParam",
42
+ ]
43
+
44
+ _SIZE_WARN = 8192 # bytes — warn past this
45
+ _MAGIC_PREFIX = "spectrl1."
46
+ _DATA_URI_PREFIX = "data:application/vnd.spectrl;v=1,"
47
+
48
+
49
+ def encode_spectrum(
50
+ spec: InlineSpectrum,
51
+ *,
52
+ lossless: bool = False,
53
+ max_len: int | None = None,
54
+ ) -> str:
55
+ """Encode an InlineSpectrum to a spectrl1 token string.
56
+
57
+ Args:
58
+ spec: The spectrum to encode.
59
+ lossless: If True, use raw IEEE-754 + zlib (bit-exact). Default is lossy
60
+ MS-Numpress (recommended for URL sharing).
61
+ max_len: Raise OverflowError if the encoded token exceeds this byte length.
62
+ Use top_n() to reduce peak count, or fall back to a USI reference for
63
+ repository-resident spectra.
64
+
65
+ Returns:
66
+ A ``spectrl1.`` token string.
67
+
68
+ Raises:
69
+ OverflowError: If max_len is set and the encoded length exceeds it.
70
+ ValueError: If arrays contain NaN/Inf, or peaks are not finite.
71
+ """
72
+ spec = canonical_sort(spec)
73
+ _validate_arrays(spec)
74
+
75
+ if spec.interp is not None:
76
+ validate_interp(spec.interp)
77
+
78
+ blobs, descriptors = build_array_blobs(spec, lossless=lossless)
79
+
80
+ # Assign segment indices
81
+ for i, desc in enumerate(descriptors):
82
+ desc["seg"] = i
83
+
84
+ # Compute hash over header (without hash field) + blobs
85
+ header_no_hash = build_header(spec, descriptors, hash_str=None)
86
+ hash_str = compute_hash(header_no_hash, blobs)
87
+
88
+ # Build final header with hash
89
+ header_bytes = build_header(spec, descriptors, hash_str=hash_str)
90
+ token = build_token(header_bytes, blobs)
91
+
92
+ if len(token) > _SIZE_WARN:
93
+ warnings.warn(
94
+ f"spectrl token length {len(token)} bytes exceeds recommended maximum of {_SIZE_WARN} bytes. "
95
+ "Consider using top_n() to reduce peak count, or fall back to a USI reference.",
96
+ UserWarning,
97
+ stacklevel=2,
98
+ )
99
+
100
+ if max_len is not None and len(token) > max_len:
101
+ raise OverflowError(
102
+ f"Encoded spectrl token is {len(token)} bytes, which exceeds max_len={max_len}. "
103
+ "Use top_n(spec, n) to reduce peak count before encoding, "
104
+ "or use a USI reference for repository-resident spectra."
105
+ )
106
+
107
+ return token
108
+
109
+
110
+ def decode_token(token: str) -> DecodedSpectrum:
111
+ """Decode a spectrl1 token string into a DecodedSpectrum.
112
+
113
+ Verifies the stored hash if present, raising ValueError on mismatch.
114
+
115
+ Args:
116
+ token: A ``spectrl1.`` token string.
117
+
118
+ Returns:
119
+ DecodedSpectrum with all metadata and peak arrays populated.
120
+
121
+ Raises:
122
+ ValueError: On bad magic/version or hash mismatch.
123
+ """
124
+ header_bytes, blobs = parse_token(token)
125
+ decoded = parse_header(header_bytes)
126
+
127
+ # Verify hash BEFORE decoding arrays (catches corruption early). The header
128
+ # without key 9 is recovered by byte-slicing the received bytes (not by
129
+ # re-encoding), so verification never depends on a canonical msgpack form.
130
+ if decoded.hash is not None:
131
+ from .header import strip_top_key
132
+
133
+ header_no_hash = strip_top_key(header_bytes, 9)
134
+ expected = compute_hash(header_no_hash, blobs)
135
+ if expected != decoded.hash:
136
+ raise ValueError(
137
+ f"spectrl token hash mismatch: stored={decoded.hash!r}, computed={expected!r}. Token may be corrupted."
138
+ )
139
+
140
+ descriptors = extract_descriptors(header_bytes)
141
+
142
+ # Decode peak arrays
143
+ arrays = decode_array_blobs(descriptors, blobs)
144
+
145
+ decoded.mz = arrays.get(ARRAY_MZ)
146
+ decoded.intensity = arrays.get(ARRAY_INTENSITY)
147
+ decoded.charge = arrays.get(ARRAY_CHARGE)
148
+
149
+ # Ion mobility: any remaining array tail in ION_MOBILITY_ARRAY_TAILS
150
+ for tail, arr in arrays.items():
151
+ if tail in ION_MOBILITY_ARRAY_TAILS.values():
152
+ decoded.ion_mobility = arr
153
+ from .cv import decode_tail
154
+
155
+ decoded.ion_mobility_type = decode_tail(tail)
156
+ break
157
+
158
+ return decoded
159
+
160
+
161
+ def from_mzmlpy(spec, ref_groups: dict | None = None) -> InlineSpectrum:
162
+ """Convert a mzmlpy Spectrum to InlineSpectrum.
163
+
164
+ Args:
165
+ spec: A mzmlpy.spectra.Spectrum.
166
+ ref_groups: Optional dict mapping group id → mzmlpy _ParamGroup, for
167
+ expanding referenceableParamGroupRef elements. Build it as
168
+ ``{g.id: g for g in mzml.referenceable_param_groups}``.
169
+
170
+ Returns:
171
+ InlineSpectrum ready for encoding.
172
+ """
173
+ from .mzml import from_mzmlpy as _bridge
174
+
175
+ return _bridge(spec, ref_groups=ref_groups)
176
+
177
+
178
+ # ─── URL binding helpers ─────────────────────────────────────────────────────
179
+
180
+
181
+ def to_fragment(token: str, base: str) -> str:
182
+ """Wrap a token as a URL fragment: ``base#token``.
183
+
184
+ The fragment is never sent to the server, avoiding length limits and access logs.
185
+ """
186
+ return f"{base.rstrip('#')}#{token}"
187
+
188
+
189
+ def to_query(token: str, base: str, param: str = "d") -> str:
190
+ """Wrap a token as a URL query parameter: ``base?param=token``."""
191
+ parsed = urlparse(base)
192
+ query = f"{param}={token}"
193
+ return urlunparse(parsed._replace(query=query))
194
+
195
+
196
+ def to_data_uri(token: str) -> str:
197
+ """Wrap a token in a ``data:application/vnd.mzx;v=1,`` URI."""
198
+ return f"{_DATA_URI_PREFIX}{token}"
199
+
200
+
201
+ def extract_token(url_or_uri: str) -> str:
202
+ """Extract a spectrl1 token from a URL fragment, query string, or data: URI.
203
+
204
+ Raises ValueError if no token is found.
205
+ """
206
+ if url_or_uri.startswith(_DATA_URI_PREFIX):
207
+ return url_or_uri[len(_DATA_URI_PREFIX) :]
208
+
209
+ parsed = urlparse(url_or_uri)
210
+
211
+ if parsed.fragment.startswith(_MAGIC_PREFIX):
212
+ return parsed.fragment
213
+
214
+ # Check query params for any value starting with spectrl1.
215
+ qs = parse_qs(parsed.query)
216
+ for vals in qs.values():
217
+ for v in vals:
218
+ if v.startswith(_MAGIC_PREFIX):
219
+ return v
220
+
221
+ raise ValueError(f"No spectrl1 token found in: {url_or_uri!r}")
spectrl/cli.py ADDED
@@ -0,0 +1,88 @@
1
+ """CLI for mzx: encode, decode, and inspect tokens."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+
9
+
10
+ def _encode_cmd(args: argparse.Namespace) -> None:
11
+ import numpy as np
12
+
13
+ from . import encode_spectrum
14
+ from .model import InlineSpectrum, SpectrlCvParam
15
+
16
+ data = json.load(sys.stdin if args.input == "-" else open(args.input))
17
+ mz = np.array(data["mz"], dtype=np.float64)
18
+ intensity = np.array(data["intensity"], dtype=np.float64)
19
+
20
+ params = [SpectrlCvParam(**p) for p in data.get("params", [])]
21
+ spec = InlineSpectrum(
22
+ default_array_length=len(mz),
23
+ mz=mz,
24
+ intensity=intensity,
25
+ id=data.get("id"),
26
+ params=params,
27
+ )
28
+ token = encode_spectrum(spec, lossless=args.lossless, max_len=args.max_len)
29
+ print(token)
30
+
31
+
32
+ def _decode_cmd(args: argparse.Namespace) -> None:
33
+ from . import decode_token
34
+
35
+ token = (sys.stdin.read() if args.input == "-" else open(args.input).read()).strip()
36
+ decoded = decode_token(token)
37
+ out: dict = {
38
+ "id": decoded.id,
39
+ "default_array_length": decoded.default_array_length,
40
+ "mz": decoded.mz.tolist() if decoded.mz is not None else None,
41
+ "intensity": decoded.intensity.tolist() if decoded.intensity is not None else None,
42
+ "charge": decoded.charge.tolist() if decoded.charge is not None else None,
43
+ "hash": decoded.hash,
44
+ "interp": decoded.interp,
45
+ }
46
+ print(json.dumps(out, indent=2))
47
+
48
+
49
+ def _inspect_cmd(args: argparse.Namespace) -> None:
50
+ import msgpack
51
+
52
+ from .token import parse_token
53
+
54
+ token = (sys.stdin.read() if args.input == "-" else open(args.input).read()).strip()
55
+ header_bytes, blobs = parse_token(token)
56
+ h = msgpack.unpackb(header_bytes, raw=False)
57
+ print(f"Segments: 1 header + {len(blobs)} array(s)")
58
+ print(f"Header size: {len(header_bytes)} bytes")
59
+ for i, blob in enumerate(blobs):
60
+ print(f"Array {i} size: {len(blob)} bytes")
61
+ print("Header (decoded):")
62
+ print(json.dumps(h, indent=2, default=str))
63
+
64
+
65
+ def main() -> None:
66
+ parser = argparse.ArgumentParser(prog="spectrl", description="mzx inline spectrum encoder/decoder")
67
+ sub = parser.add_subparsers(dest="cmd", required=True)
68
+
69
+ enc = sub.add_parser("encode", help="Encode a spectrum JSON to a spectrl1 token")
70
+ enc.add_argument("input", nargs="?", default="-", help="Input JSON file or '-' for stdin")
71
+ enc.add_argument("--lossless", action="store_true", help="Use lossless IEEE-754 + zlib encoding")
72
+ enc.add_argument("--max-len", type=int, default=None, help="Maximum token length in bytes")
73
+ enc.set_defaults(func=_encode_cmd)
74
+
75
+ dec = sub.add_parser("decode", help="Decode a spectrl1 token to JSON")
76
+ dec.add_argument("input", nargs="?", default="-", help="Token file or '-' for stdin")
77
+ dec.set_defaults(func=_decode_cmd)
78
+
79
+ ins = sub.add_parser("inspect", help="Inspect a spectrl1 token header as readable JSON")
80
+ ins.add_argument("input", nargs="?", default="-", help="Token file or '-' for stdin")
81
+ ins.set_defaults(func=_inspect_cmd)
82
+
83
+ args = parser.parse_args()
84
+ args.func(args)
85
+
86
+
87
+ if __name__ == "__main__":
88
+ main()
@@ -0,0 +1,78 @@
1
+ """Codec registry keyed by compression CV accession tail integer."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Protocol
6
+
7
+ import numpy as np
8
+
9
+ from ..cv import (
10
+ COMP_NUMLIN_ZLIB,
11
+ COMP_NUMPIC_ZLIB,
12
+ COMP_NUMSLOF_ZLIB,
13
+ COMP_ZLIB,
14
+ )
15
+ from .numpress import (
16
+ decode_numlin_zlib,
17
+ decode_numpic_zlib,
18
+ decode_numslof_zlib,
19
+ encode_numlin_zlib,
20
+ encode_numpic_zlib,
21
+ encode_numslof_zlib,
22
+ )
23
+ from .raw import decode_zlib_raw, encode_zlib_raw
24
+
25
+
26
+ class Codec(Protocol):
27
+ def encode(self, data: np.ndarray, fp: float | None) -> bytes: ...
28
+ def decode(self, blob: bytes) -> np.ndarray: ...
29
+
30
+
31
+ class _NumLinZlibCodec:
32
+ def encode(self, data: np.ndarray, fp: float | None) -> bytes:
33
+ return encode_numlin_zlib(data, fp)
34
+
35
+ def decode(self, blob: bytes) -> np.ndarray:
36
+ return decode_numlin_zlib(blob)
37
+
38
+
39
+ class _NumSlofZlibCodec:
40
+ def encode(self, data: np.ndarray, fp: float | None) -> bytes:
41
+ return encode_numslof_zlib(data, fp)
42
+
43
+ def decode(self, blob: bytes) -> np.ndarray:
44
+ return decode_numslof_zlib(blob)
45
+
46
+
47
+ class _NumPicZlibCodec:
48
+ def encode(self, data: np.ndarray, fp: float | None) -> bytes:
49
+ return encode_numpic_zlib(data, fp)
50
+
51
+ def decode(self, blob: bytes) -> np.ndarray:
52
+ return decode_numpic_zlib(blob)
53
+
54
+
55
+ class _ZlibRawCodec:
56
+ def encode(self, data: np.ndarray, fp: float | None) -> bytes:
57
+ return encode_zlib_raw(data)
58
+
59
+ def decode(self, blob: bytes) -> np.ndarray:
60
+ return decode_zlib_raw(blob)
61
+
62
+
63
+ _REGISTRY: dict[int, Codec] = {
64
+ COMP_NUMLIN_ZLIB: _NumLinZlibCodec(),
65
+ COMP_NUMSLOF_ZLIB: _NumSlofZlibCodec(),
66
+ COMP_NUMPIC_ZLIB: _NumPicZlibCodec(),
67
+ COMP_ZLIB: _ZlibRawCodec(),
68
+ }
69
+
70
+
71
+ def get_codec(comp_tail: int) -> Codec:
72
+ """Return the codec for a given compression accession tail.
73
+
74
+ Raises KeyError if the tail is not registered.
75
+ """
76
+ if comp_tail not in _REGISTRY:
77
+ raise KeyError(f"No codec registered for compression tail {comp_tail}.")
78
+ return _REGISTRY[comp_tail]
@@ -0,0 +1,73 @@
1
+ """MS-Numpress + zlib codec wrappers over pynumpress."""
2
+
3
+ import struct
4
+ import zlib
5
+
6
+ import numpy as np
7
+ import pynumpress
8
+
9
+ DEFAULT_NUMLIN_FP = 100000.0 # ~0.1 mDa precision for m/z
10
+ # SLOF fp must satisfy: log(max_intensity + 1) * fp <= 65535 (uint16 max)
11
+ # Use 3600.0 which handles intensities up to ~8e7; clip to safe value if data is larger.
12
+ _SLOF_UINT16_MAX = 65535.0
13
+
14
+
15
+ def _safe_slof_fp(data: np.ndarray, desired_fp: float) -> float:
16
+ """Return a slof fp that won't overflow uint16 given the array's max value."""
17
+ max_val = float(np.max(data)) if len(data) > 0 else 1.0
18
+ max_val = max(max_val, 1.0)
19
+ import math
20
+
21
+ max_fp = _SLOF_UINT16_MAX / (math.log(max_val + 1) + 1e-9)
22
+ return min(desired_fp, max_fp)
23
+
24
+
25
+ DEFAULT_NUMSLOF_FP = 3600.0 # handles intensities up to ~8e7; adjusted dynamically if needed
26
+
27
+
28
+ def encode_numlin_zlib(data: np.ndarray, fp: float | None = None) -> bytes:
29
+ """Encode array with MS-Numpress linear prediction then zlib."""
30
+ fp = fp if fp is not None else DEFAULT_NUMLIN_FP
31
+ encoded = pynumpress.encode_linear(data.astype(np.float64), fp)
32
+ return zlib.compress(encoded.tobytes())
33
+
34
+
35
+ def decode_numlin_zlib(blob: bytes) -> np.ndarray:
36
+ """Decode MS-Numpress linear + zlib blob back to float64 array."""
37
+ decompressed = zlib.decompress(blob)
38
+ n = len(decompressed)
39
+ # pynumpress 0.0.9 cannot decode a single-value linear blob (12 bytes: 8-byte
40
+ # fixed point + one 4-byte int), though encode_linear emits exactly that. The
41
+ # MS-Numpress reference decodes it (dataSize == 12 → one value); do so directly
42
+ # to keep single-peak spectra round-trippable and cross-impl compatible.
43
+ if 12 <= n < 16:
44
+ fixed_point = struct.unpack(">d", decompressed[:8])[0]
45
+ first = int.from_bytes(decompressed[8:12], "little", signed=False)
46
+ return np.array([first / fixed_point], dtype=np.float64)
47
+ return np.array(pynumpress.decode_linear(np.frombuffer(decompressed, dtype=np.uint8)), dtype=np.float64)
48
+
49
+
50
+ def encode_numslof_zlib(data: np.ndarray, fp: float | None = None) -> bytes:
51
+ """Encode array with MS-Numpress short logged float then zlib."""
52
+ desired = fp if fp is not None else DEFAULT_NUMSLOF_FP
53
+ safe_fp = _safe_slof_fp(data, desired)
54
+ encoded = pynumpress.encode_slof(data.astype(np.float64), safe_fp)
55
+ return zlib.compress(encoded.tobytes())
56
+
57
+
58
+ def decode_numslof_zlib(blob: bytes) -> np.ndarray:
59
+ """Decode MS-Numpress slof + zlib blob back to float64 array."""
60
+ decompressed = zlib.decompress(blob)
61
+ return np.array(pynumpress.decode_slof(np.frombuffer(decompressed, dtype=np.uint8)), dtype=np.float64)
62
+
63
+
64
+ def encode_numpic_zlib(data: np.ndarray, fp: float | None = None) -> bytes:
65
+ """Encode array with MS-Numpress positive integer then zlib."""
66
+ encoded = pynumpress.encode_pic(data.astype(np.float64))
67
+ return zlib.compress(encoded.tobytes())
68
+
69
+
70
+ def decode_numpic_zlib(blob: bytes) -> np.ndarray:
71
+ """Decode MS-Numpress pic + zlib blob back to float64 array."""
72
+ decompressed = zlib.decompress(blob)
73
+ return np.array(pynumpress.decode_pic(np.frombuffer(decompressed, dtype=np.uint8)), dtype=np.float64)
spectrl/codecs/raw.py ADDED
@@ -0,0 +1,17 @@
1
+ """Lossless IEEE-754 little-endian + zlib codec."""
2
+
3
+ import zlib
4
+
5
+ import numpy as np
6
+
7
+
8
+ def encode_zlib_raw(data: np.ndarray) -> bytes:
9
+ """Encode array as little-endian float64 + zlib."""
10
+ raw = data.astype("<f8").tobytes()
11
+ return zlib.compress(raw)
12
+
13
+
14
+ def decode_zlib_raw(blob: bytes) -> np.ndarray:
15
+ """Decode zlib-compressed little-endian float64 bytes back to array."""
16
+ raw = zlib.decompress(blob)
17
+ return np.frombuffer(raw, dtype="<f8").astype(np.float64)
spectrl/cv.py ADDED
@@ -0,0 +1,117 @@
1
+ """CV accession ↔ integer-tail mapping using mzmlpy's StrEnum constants.
2
+
3
+ Rules (§3.1):
4
+ - Accession tails default to MS: ontology.
5
+ - Unit tails default to UO: ontology.
6
+ - Any other ontology uses an explicit [ontology_id, tail] pair.
7
+
8
+ The tail for "MS:1000511" is 1000511; for "UO:0000031" is 31.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from mzmlpy.constants import (
14
+ BinaryDataArrayAccession,
15
+ BinaryDataTypeAccession,
16
+ CollisionDissociationTypeAccession,
17
+ CompressionTypeAccessions,
18
+ ScanPolarity,
19
+ SpectrumCombinationAccession,
20
+ SpectrumMSAccession,
21
+ SpectrumType,
22
+ )
23
+
24
+ _DEFAULT_PARAM_ONTOLOGY = "MS"
25
+ _DEFAULT_UNIT_ONTOLOGY = "UO"
26
+
27
+
28
+ def accession_tail(accession: str) -> int:
29
+ """Extract the integer tail from an accession string like 'MS:1000511' → 1000511."""
30
+ return int(accession.split(":")[1])
31
+
32
+
33
+ def accession_ontology(accession: str) -> str:
34
+ """Extract the ontology prefix from 'MS:1000511' → 'MS'."""
35
+ return accession.split(":")[0]
36
+
37
+
38
+ def encode_tail(accession: str) -> int:
39
+ """Encode an accession to its tail integer (assumes MS: default ontology)."""
40
+ return accession_tail(accession)
41
+
42
+
43
+ def encode_unit(unit_accession: str) -> int | list:
44
+ """Encode a unit accession to a tail int (UO: default) or [ontology, tail] for other ontologies."""
45
+ onto = accession_ontology(unit_accession)
46
+ tail = accession_tail(unit_accession)
47
+ if onto == _DEFAULT_UNIT_ONTOLOGY:
48
+ return tail
49
+ return [onto, tail]
50
+
51
+
52
+ def decode_tail(tail: int, ontology: str = _DEFAULT_PARAM_ONTOLOGY) -> str:
53
+ """Reconstruct an accession string from a tail integer and ontology prefix."""
54
+ return f"{ontology}:{tail:07d}"
55
+
56
+
57
+ def decode_unit_tail(tail: int | list) -> str:
58
+ """Reconstruct a unit accession string from a tail (int = UO: default, list = [ontology, tail])."""
59
+ if isinstance(tail, list):
60
+ return f"{tail[0]}:{tail[1]:07d}"
61
+ return f"{_DEFAULT_UNIT_ONTOLOGY}:{tail:07d}"
62
+
63
+
64
+ # ─── Codec compression tails (used by codecs module) ───────────────────────
65
+
66
+ COMP_NUMLIN_ZLIB = accession_tail(CompressionTypeAccessions.MS_NUMPRESS_LINEAR_PREDICTION_ZLIB)
67
+ COMP_NUMSLOF_ZLIB = accession_tail(CompressionTypeAccessions.MS_NUMPRESS_SHORT_LOGGED_FLOAT_ZLIB)
68
+ COMP_NUMPIC_ZLIB = accession_tail(CompressionTypeAccessions.MS_NUMPRESS_POSITIVE_INTEGER_ZLIB)
69
+ COMP_NUMLIN = accession_tail(CompressionTypeAccessions.MS_NUMPRESS_LINEAR_PREDICTION)
70
+ COMP_NUMSLOF = accession_tail(CompressionTypeAccessions.MS_NUMPRESS_SHORT_LOGGED_FLOAT)
71
+ COMP_NUMPIC = accession_tail(CompressionTypeAccessions.MS_NUMPRESS_POSITIVE_INTEGER)
72
+ COMP_ZLIB = accession_tail(CompressionTypeAccessions.ZLIB_COMPRESSION)
73
+ COMP_NONE = accession_tail(CompressionTypeAccessions.NO_COMPRESSION)
74
+
75
+ # ─── Data type tails ────────────────────────────────────────────────────────
76
+
77
+ TYPE_FLOAT64 = accession_tail(BinaryDataTypeAccession.FLOAT_64)
78
+ TYPE_FLOAT32 = accession_tail(BinaryDataTypeAccession.FLOAT_32)
79
+ TYPE_INT32 = accession_tail(BinaryDataTypeAccession.INT_32)
80
+ TYPE_INT64 = accession_tail(BinaryDataTypeAccession.INT_64)
81
+
82
+ # ─── Array type tails ───────────────────────────────────────────────────────
83
+
84
+ ARRAY_MZ = accession_tail(BinaryDataArrayAccession.MZ)
85
+ ARRAY_INTENSITY = accession_tail(BinaryDataArrayAccession.INTENSITY)
86
+ ARRAY_CHARGE = accession_tail(BinaryDataArrayAccession.CHARGE)
87
+
88
+ # Ion mobility array tails
89
+ ION_MOBILITY_ARRAY_TAILS: dict[str, int] = {
90
+ acc: accession_tail(acc)
91
+ for acc in (
92
+ BinaryDataArrayAccession.RAW_ION_MOBILITY,
93
+ BinaryDataArrayAccession.MEAN_ION_MOBILITY_DRIFT_TIME,
94
+ BinaryDataArrayAccession.DECONVOLUTED_ION_MOBILITY_DRIFT_TIME,
95
+ BinaryDataArrayAccession.MEAN_INVERSE_REDUCED_ION_MOBILITY,
96
+ BinaryDataArrayAccession.MEAN_ION_MOBILITY,
97
+ BinaryDataArrayAccession.DECONVOLUTED_INVERSE_REDUCED_ION_MOBILITY,
98
+ BinaryDataArrayAccession.RAW_ION_MOBILITY_DRIFT_TIME,
99
+ BinaryDataArrayAccession.RAW_INVERSE_REDUCED_ION_MOBILITY,
100
+ BinaryDataArrayAccession.ION_MOBILITY,
101
+ )
102
+ }
103
+
104
+ # ─── Known accession registry for validation/tests ──────────────────────────
105
+
106
+ ALL_MZX_ACCESSIONS: set[str] = set()
107
+ for _enum in (
108
+ BinaryDataArrayAccession,
109
+ BinaryDataTypeAccession,
110
+ CompressionTypeAccessions,
111
+ ScanPolarity,
112
+ SpectrumCombinationAccession,
113
+ SpectrumMSAccession,
114
+ SpectrumType,
115
+ CollisionDissociationTypeAccession,
116
+ ):
117
+ ALL_MZX_ACCESSIONS.update(str(v) for v in _enum)