structo 0.0.1__tar.gz → 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,217 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ # Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ # poetry.lock
109
+ # poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ # pdm.lock
116
+ # pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ # pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # Redis
135
+ *.rdb
136
+ *.aof
137
+ *.pid
138
+
139
+ # RabbitMQ
140
+ mnesia/
141
+ rabbitmq/
142
+ rabbitmq-data/
143
+
144
+ # ActiveMQ
145
+ activemq-data/
146
+
147
+ # SageMath parsed files
148
+ *.sage.py
149
+
150
+ # Environments
151
+ .env
152
+ .envrc
153
+ .venv
154
+ env/
155
+ venv/
156
+ ENV/
157
+ env.bak/
158
+ venv.bak/
159
+
160
+ # Spyder project settings
161
+ .spyderproject
162
+ .spyproject
163
+
164
+ # Rope project settings
165
+ .ropeproject
166
+
167
+ # mkdocs documentation
168
+ /site
169
+
170
+ # mypy
171
+ .mypy_cache/
172
+ .dmypy.json
173
+ dmypy.json
174
+
175
+ # Pyre type checker
176
+ .pyre/
177
+
178
+ # pytype static type analyzer
179
+ .pytype/
180
+
181
+ # Cython debug symbols
182
+ cython_debug/
183
+
184
+ # PyCharm
185
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
186
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
187
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
188
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
189
+ # .idea/
190
+
191
+ # Abstra
192
+ # Abstra is an AI-powered process automation framework.
193
+ # Ignore directories containing user credentials, local state, and settings.
194
+ # Learn more at https://abstra.io/docs
195
+ .abstra/
196
+
197
+ # Visual Studio Code
198
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
199
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
200
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
201
+ # you could uncomment the following to ignore the entire vscode folder
202
+ # .vscode/
203
+
204
+ # Ruff stuff:
205
+ .ruff_cache/
206
+
207
+ # PyPI configuration file
208
+ .pypirc
209
+
210
+ # Marimo
211
+ marimo/_static/
212
+ marimo/_lsp/
213
+ __marimo__/
214
+
215
+ # Streamlit
216
+ .streamlit/secrets.toml
217
+ output.raw
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: structo
3
- Version: 0.0.1
3
+ Version: 0.0.2
4
4
  Summary: Structify
5
5
  Author-email: Ben Brady <benbradybusiness@gmail.com>
6
6
  License-Expression: MIT
@@ -0,0 +1,11 @@
1
+ # Structo
2
+
3
+ ```py
4
+
5
+ ```
6
+
7
+ ## Examples
8
+
9
+ See ./examples for example usage
10
+
11
+
@@ -0,0 +1,54 @@
1
+ from structo import Serializer, serialize_to_bytes, deserialize_from_bytes
2
+ from dataclasses import dataclass
3
+ import typing as t
4
+
5
+ # We don't inherit from SerialiableObject
6
+ # since we're using a custom serializer
7
+ @dataclass
8
+ class Flags:
9
+ flag_a: bool
10
+ flag_b: bool
11
+ flag_c: bool
12
+
13
+ class FlagsSerialiser(Serializer[Flags]):
14
+ # This is optional, but allows for size calculations
15
+ @staticmethod
16
+ def length(format):
17
+ return 1
18
+
19
+ @staticmethod
20
+ def write(buf, format, value):
21
+ byte = 0
22
+ if value.flag_a:
23
+ byte += 1 << 0
24
+ if value.flag_b:
25
+ byte += 1 << 1
26
+ if value.flag_c:
27
+ byte += 1 << 2
28
+
29
+ byte = buf.write(bytes([byte]))
30
+
31
+ @staticmethod
32
+ def read(buf, format):
33
+ byte = buf.read(1)[0]
34
+ flag_a = ((byte >> 0) & 1) != 0
35
+ flag_b = ((byte >> 1) & 1) != 0
36
+ flag_c = ((byte >> 2) & 1) != 0
37
+ return Flags(
38
+ flag_a=flag_a,
39
+ flag_b=flag_b,
40
+ flag_c=flag_c,
41
+ )
42
+
43
+ type FlagsDatatype = t.Annotated[Flags, FlagsSerialiser]
44
+
45
+
46
+ value = Flags(
47
+ flag_a=False,
48
+ flag_b=True,
49
+ flag_c=False,
50
+ )
51
+ output = serialize_to_bytes(flags_datatype, value)
52
+ print(output)
53
+ print(deserialize_from_bytes(flags_datatype, output))
54
+
@@ -0,0 +1,64 @@
1
+ from structo import (
2
+ SerializableObject,
3
+ Serializer,
4
+ write_serializable,
5
+ read_serializable,
6
+ uint32_LE,
7
+ String,
8
+ List,
9
+ )
10
+ import random
11
+ import typing as t
12
+ from pathlib import Path
13
+
14
+
15
+ class Post(SerializableObject):
16
+ id: uint32_LE
17
+ author: String[uint32_LE]
18
+ tags: List[uint32_LE, String[uint32_LE]]
19
+
20
+
21
+ class PostsSerialiser(Serializer[t.Iterable[Post]]):
22
+ @staticmethod
23
+ def write(buf, format, value):
24
+ for item in value:
25
+ buf.write(bytes([255]))
26
+ write_serializable(buf, Post, item)
27
+
28
+ buf.write(bytes([0]))
29
+
30
+ @staticmethod
31
+ def read(buf, format):
32
+ while True:
33
+ continue_byte = buf.read(1)[0]
34
+ if continue_byte == 0:
35
+ break
36
+ assert continue_byte == 255, "Continue byte was not 255"
37
+
38
+ print("Loading...") # to prove it's interspliced loading and yielding
39
+ yield read_serializable(buf, Post)
40
+
41
+
42
+ type PostsIterable = t.Annotated[t.Iterable[Post], PostsSerialiser()]
43
+
44
+
45
+ # Too large to effectively store in memory
46
+ def generate_posts():
47
+ for x in range(1_000_000):
48
+ yield Post(
49
+ id=random.randint(0, 1_000_000),
50
+ author="Me",
51
+ tags=[str(random.randint(0, 100)) for _ in range(random.randint(0, 10))],
52
+ )
53
+
54
+
55
+ output = Path("output.raw")
56
+ if not output.exists():
57
+ with open(output, "wb") as f:
58
+ posts = generate_posts()
59
+ write_serializable(f, PostsIterable, posts)
60
+
61
+ with open(output, "rb") as f:
62
+ iterable_posts = read_serializable(f, PostsIterable)
63
+ for post in iterable_posts:
64
+ print(post)
@@ -0,0 +1,40 @@
1
+ from structo import uint16_LE, uint32_LE, SerializableObject, Literal
2
+
3
+
4
+ class RiffHeader(SerializableObject):
5
+ chunk_id: Literal[b"RIFF"]
6
+ chunk_size: uint32_LE
7
+ format: Literal[b"WAVE"]
8
+
9
+
10
+ class FormatChunkHeader(SerializableObject):
11
+ id: Literal[b"fmt "]
12
+ size: uint32_LE
13
+
14
+
15
+ class DataChunkHeader(SerializableObject):
16
+ id: Literal[b"data"]
17
+ size: uint32_LE
18
+
19
+
20
+ class WavFormat(SerializableObject):
21
+ audio_format: uint16_LE
22
+ num_channels: uint16_LE
23
+ sample_rate: uint32_LE
24
+ byte_range: uint32_LE
25
+ block_align: uint16_LE
26
+ bits_per_sample: uint16_LE
27
+
28
+
29
+ with open("example.wav", "rb") as f:
30
+ RiffHeader.read(f)
31
+
32
+ format_header = FormatChunkHeader.read(f)
33
+ format_data = f.read(format_header.size)
34
+ format = WavFormat.from_bytes(format_data)
35
+
36
+ data_header = DataChunkHeader.read(f)
37
+ wav_data = f.read(data_header.size)
38
+
39
+ print(format)
40
+ print(len(wav_data))
@@ -11,3 +11,9 @@ dynamic = ["version", "description"]
11
11
 
12
12
  [project.urls]
13
13
  Home = "https://nnilky.site"
14
+
15
+ [dependency-groups]
16
+ dev = [
17
+ "black>=26.1.0",
18
+ "pytest>=9.0.2",
19
+ ]
@@ -0,0 +1,49 @@
1
+ """
2
+ Structify
3
+ """
4
+
5
+ __version__ = "0.0.2"
6
+
7
+ from .object import SerializableObject
8
+ from .serialise import write_serializable, read_serializable
9
+ from .serializer import Serializer, Format
10
+ from .types import (
11
+ uint64_BE,
12
+ uint64_LE,
13
+ uint64,
14
+ uint32_BE,
15
+ uint32_LE,
16
+ uint32,
17
+ uint16_BE,
18
+ uint16_LE,
19
+ uint16,
20
+ uint8,
21
+ int64_BE,
22
+ int64_LE,
23
+ int64,
24
+ int32_BE,
25
+ int32_LE,
26
+ int32,
27
+ int16_BE,
28
+ int16_LE,
29
+ int16,
30
+ int8,
31
+ float64_BE,
32
+ float64_LE,
33
+ float64,
34
+ float32_BE,
35
+ float32_LE,
36
+ float32,
37
+ Array,
38
+ Buffer,
39
+ List,
40
+ String,
41
+ Blob,
42
+ Literal
43
+ )
44
+ from .utils import (
45
+ StructoReader,
46
+ StructifyWriter,
47
+ serialize_to_bytes,
48
+ deserialize_from_bytes,
49
+ )
@@ -0,0 +1,48 @@
1
+ import typing as t
2
+ import io
3
+ from dataclasses import dataclass
4
+ import annotationlib
5
+
6
+
7
+ class SerializableObjectMeta(type):
8
+ def __new__(cls, name, bases, dct):
9
+ from .serialise import get_serializer
10
+
11
+ new_class = super().__new__(cls, name, bases, dct)
12
+ annotate = annotationlib.get_annotate_from_class_namespace(dct)
13
+ if annotate:
14
+ attrs = annotate(annotationlib.Format.VALUE_WITH_FAKE_GLOBALS)
15
+ for key, value in attrs.items():
16
+ try:
17
+ get_serializer(value)
18
+ except Exception as e:
19
+ raise ValueError(
20
+ f"Invalid attribute defintion for {name}.{key}"
21
+ ) from e
22
+
23
+ return dataclass(new_class) # type: ignore
24
+
25
+
26
+ @t.dataclass_transform()
27
+ class SerializableObject(metaclass=SerializableObjectMeta):
28
+ @classmethod
29
+ def read(cls, buf: io.Reader) -> t.Self:
30
+ from .serialise import read_serializable
31
+
32
+ return read_serializable(buf, cls)
33
+
34
+ def write(self, buf: io.Writer):
35
+ from .serialise import write_serializable
36
+
37
+ write_serializable(buf, type(self), self)
38
+
39
+ @classmethod
40
+ def from_bytes(cls, data: bytes) -> t.Self:
41
+ buf = io.BytesIO(data)
42
+ return cls.read(buf)
43
+
44
+ def to_bytes(self) -> bytes:
45
+ buf = io.BytesIO()
46
+ self.write(buf)
47
+ buf.seek(0)
48
+ return buf.read()
@@ -0,0 +1,77 @@
1
+ import io
2
+ import typing as t
3
+ import annotationlib
4
+ from .serializer import Format, Serializer
5
+
6
+
7
+ def get_serializer(format: Format) -> Serializer:
8
+ from .object import SerializableObject
9
+
10
+ class SerialiableObjectSerializer(Serializer):
11
+ @staticmethod
12
+ def write(buf: io.Writer, format: type, value: SerializableObject):
13
+ annotations = annotationlib.get_annotations(format)
14
+ for field_key, field_format in annotations.items():
15
+ field_value = getattr(value, field_key)
16
+ write_serializable(buf, field_format, field_value)
17
+
18
+ @staticmethod
19
+ def read(buf: io.Reader, format: type) -> SerializableObject:
20
+ annotations = annotationlib.get_annotations(format)
21
+ attrs = {}
22
+ for field_key, field_format in annotations.items():
23
+ field_value = read_serializable(buf, field_format)
24
+ attrs[field_key] = field_value
25
+
26
+ return format(**attrs)
27
+
28
+ if hasattr(format, "__value__"):
29
+ format = format.__value__
30
+
31
+ if t.get_origin(format) is t.Annotated:
32
+ args = t.get_args(format)
33
+ serializers = [arg for arg in args if isinstance(arg, Serializer)]
34
+ assert len(serializers) != 0, f"No serializers for {format} found"
35
+ assert len(serializers) == 1, f"More than one serializers for {format} found"
36
+ serializer = serializers[0]
37
+
38
+ return serializer
39
+
40
+ if isinstance(format, type) and issubclass(format, SerializableObject):
41
+ return SerialiableObjectSerializer()
42
+
43
+ if format == int:
44
+ raise ValueError(
45
+ f"No serializer for int, you need to use structo.int32, structo.int32 or similar instead"
46
+ )
47
+ if format == float:
48
+ raise ValueError(
49
+ f"No serializer for float, you need to use structo.float32 or structo.float64 instead"
50
+ )
51
+ if format == bytes:
52
+ raise ValueError(
53
+ f"No serializer for bytes, you need to use structo.Buffer or structo.Blob instead"
54
+ )
55
+ if format == list:
56
+ raise ValueError(
57
+ f"No serializer for list, you need to use structo.List or structo.Array instead"
58
+ )
59
+
60
+ raise NotImplementedError(f"No serializer found for {format}")
61
+
62
+
63
+ def write_serializable(buf: io.Writer, format: Format, value: t.Any):
64
+ return get_serializer(format).write(buf, format, value)
65
+
66
+
67
+ def read_serializable(buf: io.Reader, format: Format) -> t.Any:
68
+ return get_serializer(format).read(buf, format)
69
+
70
+
71
+ def read_uint(buf: io.Reader, format: Format) -> int:
72
+ value = read_serializable(buf, format)
73
+
74
+ assert isinstance(value, int), f"expected uint, got {format}"
75
+ assert value >= 0, "expected uint, got {format}"
76
+
77
+ return value
@@ -0,0 +1,13 @@
1
+ import typing as t
2
+ import io
3
+
4
+ type Format = type | t.TypeAliasType
5
+
6
+
7
+ class Serializer[T]:
8
+ def length(self, format: Format) -> int | None:
9
+ return None
10
+
11
+ def write(self, buf: io.Writer, format: Format, value: T): ...
12
+
13
+ def read(self, buf: io.Reader, format: Format) -> T: ...
@@ -0,0 +1,105 @@
1
+ import typing as t
2
+ from dataclasses import dataclass
3
+
4
+ import pytest
5
+ import structo as st
6
+ from structo import (
7
+ SerializableObject,
8
+ deserialize_from_bytes,
9
+ uint16,
10
+ uint16_LE,
11
+ uint16_BE,
12
+ serialize_to_bytes,
13
+ )
14
+ from utils import assert_serialises_correctly, assert_equal
15
+
16
+
17
+ testdata: list[tuple[st.Format, t.Any, bytes]] = []
18
+
19
+
20
+ @dataclass
21
+ class EncodeTests[T]:
22
+ datatype: st.Format
23
+ tests: list[tuple[T, bytes]]
24
+
25
+
26
+ testdata.append((st.Literal[b"foo"], b"foo", b"foo"))
27
+
28
+ testsets: list[EncodeTests] = [
29
+ EncodeTests(
30
+ datatype=st.uint8,
31
+ tests=[
32
+ (1, bytes([1])),
33
+ (100, bytes([100])),
34
+ (255, bytes([255])),
35
+ ],
36
+ ),
37
+ EncodeTests(
38
+ datatype=st.Literal,
39
+ tests=[
40
+ (1, bytes([1])),
41
+ (100, bytes([100])),
42
+ (255, bytes([255])),
43
+ ],
44
+ ),
45
+ ]
46
+
47
+ for testset in testsets:
48
+ for value, expected in testset.tests:
49
+ testdata.append((testset.datatype, value, expected))
50
+
51
+
52
+ @dataclass
53
+ class BinaryEncodeTests[T]:
54
+ tests: list[tuple[T, bytes]]
55
+ le: st.Format
56
+ be: st.Format
57
+ be_default: st.Format
58
+
59
+
60
+ binary_testsets: list[BinaryEncodeTests] = [
61
+ BinaryEncodeTests(
62
+ le=st.uint16_LE,
63
+ be=st.uint16_BE,
64
+ be_default=st.uint16,
65
+ tests=[
66
+ (1, bytes([0, 1])),
67
+ ((256 * 2) + 1, bytes([2, 1])),
68
+ ],
69
+ ),
70
+ BinaryEncodeTests(
71
+ le=st.uint32_LE,
72
+ be=st.uint32_BE,
73
+ be_default=st.uint32,
74
+ tests=[
75
+ (1, bytes([0, 0, 0, 1])),
76
+ ((2 << 8) + 1, bytes([0, 0, 2, 1])),
77
+ ((3 << 16) + (2 << 8) + 1, bytes([0, 3, 2, 1])),
78
+ ((4 << 24) + (3 << 16) + (2 << 8) + 1, bytes([4, 3, 2, 1])),
79
+ ],
80
+ ),
81
+ BinaryEncodeTests(
82
+ le=st.uint64_LE,
83
+ be=st.uint64_BE,
84
+ be_default=st.uint64,
85
+ tests=[
86
+ (1, bytes([0, 0, 0, 0, 0, 0, 0, 1])),
87
+ ((3 << 16) + (2 << 8) + 1, bytes([0, 0, 0, 0, 3, 2, 1])),
88
+ ((1 << 48) + 1, bytes([1, 0, 0, 0, 0, 0, 0, 1])),
89
+ ],
90
+ ),
91
+ ]
92
+
93
+ for testset in binary_testsets:
94
+ for value, expected in testset.tests:
95
+ testdata.append((testset.be, value, expected))
96
+ testdata.append((testset.be_default, value, expected))
97
+ testdata.append((testset.le, value, bytes(reversed(expected))))
98
+
99
+
100
+ @pytest.mark.parametrize("datatype,value,expected", testdata)
101
+ def test_encode_primitives(datatype, value, expected):
102
+ actual = serialize_to_bytes(datatype, value)
103
+ assert (
104
+ actual == expected
105
+ ), f"{datatype} {value=} | expected {expected}, recieved {actual}"