structo 0.0.1__tar.gz → 0.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,217 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ # Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ # poetry.lock
109
+ # poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ # pdm.lock
116
+ # pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ # pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # Redis
135
+ *.rdb
136
+ *.aof
137
+ *.pid
138
+
139
+ # RabbitMQ
140
+ mnesia/
141
+ rabbitmq/
142
+ rabbitmq-data/
143
+
144
+ # ActiveMQ
145
+ activemq-data/
146
+
147
+ # SageMath parsed files
148
+ *.sage.py
149
+
150
+ # Environments
151
+ .env
152
+ .envrc
153
+ .venv
154
+ env/
155
+ venv/
156
+ ENV/
157
+ env.bak/
158
+ venv.bak/
159
+
160
+ # Spyder project settings
161
+ .spyderproject
162
+ .spyproject
163
+
164
+ # Rope project settings
165
+ .ropeproject
166
+
167
+ # mkdocs documentation
168
+ /site
169
+
170
+ # mypy
171
+ .mypy_cache/
172
+ .dmypy.json
173
+ dmypy.json
174
+
175
+ # Pyre type checker
176
+ .pyre/
177
+
178
+ # pytype static type analyzer
179
+ .pytype/
180
+
181
+ # Cython debug symbols
182
+ cython_debug/
183
+
184
+ # PyCharm
185
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
186
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
187
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
188
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
189
+ # .idea/
190
+
191
+ # Abstra
192
+ # Abstra is an AI-powered process automation framework.
193
+ # Ignore directories containing user credentials, local state, and settings.
194
+ # Learn more at https://abstra.io/docs
195
+ .abstra/
196
+
197
+ # Visual Studio Code
198
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
199
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
200
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
201
+ # you could uncomment the following to ignore the entire vscode folder
202
+ # .vscode/
203
+
204
+ # Ruff stuff:
205
+ .ruff_cache/
206
+
207
+ # PyPI configuration file
208
+ .pypirc
209
+
210
+ # Marimo
211
+ marimo/_static/
212
+ marimo/_lsp/
213
+ __marimo__/
214
+
215
+ # Streamlit
216
+ .streamlit/secrets.toml
217
+ output.raw
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: structo
3
- Version: 0.0.1
3
+ Version: 0.0.3
4
4
  Summary: Structify
5
5
  Author-email: Ben Brady <benbradybusiness@gmail.com>
6
6
  License-Expression: MIT
@@ -0,0 +1,11 @@
1
+ # Structo
2
+
3
+ ```py
4
+
5
+ ```
6
+
7
+ ## Examples
8
+
9
+ See ./examples for example usage
10
+
11
+
@@ -0,0 +1,52 @@
1
+ from structo import Serializer
2
+ from dataclasses import dataclass
3
+ import typing as t
4
+
5
+
6
+ # We don't inherit from SerialiableObject
7
+ # since we're using a custom serializer
8
+ @dataclass
9
+ class Flags:
10
+ flag_a: bool
11
+ flag_b: bool
12
+ flag_c: bool
13
+
14
+
15
+ class FlagsSerialiser(Serializer[Flags]):
16
+ def write(self, buf, value):
17
+ byte = 0
18
+ if value.flag_a:
19
+ byte += 1 << 0
20
+ if value.flag_b:
21
+ byte += 1 << 1
22
+ if value.flag_c:
23
+ byte += 1 << 2
24
+
25
+ byte = buf.write(bytes([byte]))
26
+
27
+ def read(self, buf):
28
+ byte = buf.read(1)[0]
29
+ flag_a = ((byte >> 0) & 1) != 0
30
+ flag_b = ((byte >> 1) & 1) != 0
31
+ flag_c = ((byte >> 2) & 1) != 0
32
+ return Flags(
33
+ flag_a=flag_a,
34
+ flag_b=flag_b,
35
+ flag_c=flag_c,
36
+ )
37
+
38
+ def sizeof(self):
39
+ return 1
40
+
41
+
42
+ type FlagsDatatype = t.Annotated[Flags, FlagsSerialiser()]
43
+
44
+
45
+ value = Flags(
46
+ flag_a=False,
47
+ flag_b=True,
48
+ flag_c=False,
49
+ )
50
+ output = FlagsSerialiser().to_bytes(value)
51
+ print(output)
52
+ print(FlagsSerialiser().from_bytes(output))
@@ -0,0 +1,64 @@
1
+ import typing as t
2
+ from typing import Annotated
3
+ from structo import (
4
+ SerializableObject,
5
+ Serializer,
6
+ uint32_LE,
7
+ String,
8
+ List,
9
+ )
10
+ import random
11
+ from pathlib import Path
12
+
13
+
14
+ class Post(SerializableObject):
15
+ id: Annotated[int, uint32_LE]
16
+ author: Annotated[str, String(uint32_LE)]
17
+ tags: Annotated[list[str], List(String(uint32_LE), uint32_LE)]
18
+
19
+ CONTINUE_BYTE = bytes([255])
20
+ NULL_TERMINATOR = bytes([0])
21
+
22
+
23
+ class PostsSerialiser(Serializer[t.Iterable[Post]]):
24
+ def write(self, buf, value):
25
+ for item in value:
26
+ buf.write(CONTINUE_BYTE)
27
+ item.write(buf)
28
+
29
+ buf.write(NULL_TERMINATOR)
30
+
31
+ def read(self, buf):
32
+ while True:
33
+ continue_byte = buf.read(1)
34
+ if continue_byte == NULL_TERMINATOR:
35
+ break
36
+ assert continue_byte == CONTINUE_BYTE, "Continue byte was not 255"
37
+
38
+ print("Loading...") # to prove it's interspliced loading and yielding
39
+ yield Post.read(buf)
40
+
41
+
42
+ type PostsIterable = t.Annotated[t.Iterable[Post], PostsSerialiser()]
43
+
44
+
45
+ # Too large to effectively store in memory
46
+ def generate_posts():
47
+ for x in range(1_000_000):
48
+ yield Post(
49
+ id=random.randint(0, 1_000_000),
50
+ author="Me",
51
+ tags=[str(random.randint(0, 100)) for _ in range(random.randint(0, 10))],
52
+ )
53
+
54
+
55
+ output = Path("output.raw")
56
+ if not output.exists():
57
+ with open(output, "wb") as f:
58
+ posts = generate_posts()
59
+ PostsSerialiser().write(f, posts)
60
+
61
+ with open(output, "rb") as f:
62
+ iterable_posts = PostsSerialiser().read(f)
63
+ for post in iterable_posts:
64
+ print(post)
@@ -0,0 +1,39 @@
1
+ from typing import Annotated
2
+ from structo import uint16_LE, uint32_LE, SerializableObject, Literal
3
+
4
+
5
+ class WavHeader(SerializableObject):
6
+ chunk_id: Annotated[bytes, Literal(b"RIFF")]
7
+ chunk_size: Annotated[int, uint32_LE]
8
+ format: Annotated[bytes, Literal(b"WAVE")]
9
+
10
+
11
+ class ChunkHeader(SerializableObject):
12
+ id: Annotated[bytes, Literal(b"fmt ", b"data")]
13
+ size: Annotated[int, uint32_LE]
14
+
15
+
16
+ class WavFormat(SerializableObject):
17
+ audio_format: Annotated[int, uint16_LE]
18
+ num_channels: Annotated[int, uint16_LE]
19
+ sample_rate: Annotated[int, uint32_LE]
20
+ byte_range: Annotated[int, uint32_LE]
21
+ block_align: Annotated[int, uint16_LE]
22
+ bits_per_sample: Annotated[int, uint16_LE]
23
+
24
+
25
+ with open("example.wav", "rb") as f:
26
+ WavHeader.read(f)
27
+
28
+ format_header = ChunkHeader.read(f)
29
+ assert format_header.id == b'fmt '
30
+
31
+ format_data = f.read(format_header.size)
32
+ format = WavFormat.from_bytes(format_data)
33
+
34
+ data_header = ChunkHeader.read(f)
35
+ assert data_header.id == b"data"
36
+ wav_data = f.read(data_header.size)
37
+
38
+ print(format)
39
+ print(len(wav_data))
@@ -11,3 +11,9 @@ dynamic = ["version", "description"]
11
11
 
12
12
  [project.urls]
13
13
  Home = "https://nnilky.site"
14
+
15
+ [dependency-groups]
16
+ dev = [
17
+ "black>=26.1.0",
18
+ "pytest>=9.0.2",
19
+ ]
@@ -0,0 +1,4 @@
1
+ [pytest]
2
+ testpaths =
3
+ tests
4
+ python_functions = " *"
@@ -0,0 +1,47 @@
1
+ """
2
+ Structify
3
+ """
4
+
5
+ __version__ = "0.0.3"
6
+
7
+ from .serializer import Serializer
8
+ from .types import (
9
+ uint64_BE,
10
+ uint64_LE,
11
+ uint64,
12
+ uint32_BE,
13
+ uint32_LE,
14
+ uint32,
15
+ uint16_BE,
16
+ uint16_LE,
17
+ uint16,
18
+ uint8,
19
+ int64_BE,
20
+ int64_LE,
21
+ int64,
22
+ int32_BE,
23
+ int32_LE,
24
+ int32,
25
+ int16_BE,
26
+ int16_LE,
27
+ int16,
28
+ int8,
29
+ float64_BE,
30
+ float64_LE,
31
+ float64,
32
+ float32_BE,
33
+ float32_LE,
34
+ float32,
35
+ Array,
36
+ Buffer,
37
+ List,
38
+ String,
39
+ Blob,
40
+ Literal,
41
+ ObjectSerializer,
42
+ )
43
+ from .object import SerializableObject
44
+ from .utils import (
45
+ StructoReader,
46
+ StructifyWriter,
47
+ )
@@ -0,0 +1,58 @@
1
+ import io
2
+ import annotationlib
3
+ import typing as t
4
+ from dataclasses import dataclass
5
+
6
+ from .serializer import Serializer
7
+ from .serialise import get_serializer
8
+
9
+
10
+ class SerializableObjectMeta(type):
11
+ def __new__(cls, name, bases, dct):
12
+ new_class = t.cast(type, super().__new__(cls, name, bases, dct))
13
+ if name == "SerializableObject":
14
+ return new_class
15
+
16
+ annotate = annotationlib.get_annotate_from_class_namespace(dct)
17
+ _annotations: dict[str, Serializer] = {}
18
+
19
+ if annotate:
20
+ attrs = annotate(annotationlib.Format.VALUE_WITH_FAKE_GLOBALS)
21
+ for key, value in attrs.items():
22
+ try:
23
+ serializer = get_serializer(value)
24
+ _annotations[key] = serializer
25
+ except Exception as e:
26
+ raise ValueError(
27
+ f"Invalid attribute defintion for {name}.{key}"
28
+ ) from e
29
+
30
+ return t.cast(SerializableObject, dataclass(new_class))
31
+
32
+
33
+ # This is very messed up since we
34
+ @t.dataclass_transform()
35
+ class SerializableObject(metaclass=SerializableObjectMeta):
36
+ @classmethod
37
+ def serializer(cls) -> Serializer[t.Self]:
38
+ from .types import ObjectSerializer
39
+
40
+ return ObjectSerializer(cls)
41
+
42
+ @classmethod
43
+ def sizeof(cls) -> int | None:
44
+ return cls.serializer().sizeof()
45
+
46
+ def write(self, buf: io.Writer):
47
+ return self.serializer().write(buf, self)
48
+
49
+ @classmethod
50
+ def read(cls, buf: io.Reader) -> t.Self:
51
+ return cls.serializer().read(buf)
52
+
53
+ def to_bytes(self) -> bytes:
54
+ return self.serializer().to_bytes(self)
55
+
56
+ @classmethod
57
+ def from_bytes(cls, data: bytes) -> t.Self:
58
+ return cls.serializer().from_bytes(data)
@@ -0,0 +1,41 @@
1
+ import io
2
+ import typing as t
3
+ from .serializer import Serializer
4
+
5
+
6
+ def get_serializer(format: type) -> Serializer:
7
+ from .serializer import Serializer
8
+ from .object import SerializableObject
9
+ from .types.object import ObjectSerializer
10
+
11
+ if t.get_origin(format) is t.Annotated:
12
+ args = t.get_args(format)
13
+ serializers = [arg for arg in args if isinstance(arg, Serializer)]
14
+ assert len(serializers) != 0, f"No serializers for {format} found"
15
+ assert len(serializers) == 1, f"More than one serializers for {format} found"
16
+ serializer = serializers[0]
17
+
18
+ return serializer
19
+
20
+ if issubclass(format, SerializableObject):
21
+ return ObjectSerializer(format)
22
+
23
+ # Nicely formatted errors:
24
+ if format == int:
25
+ raise ValueError(
26
+ f"No serializer for int, you need to use structo.int32, structo.int32 or similar instead"
27
+ )
28
+ if format == float:
29
+ raise ValueError(
30
+ f"No serializer for float, you need to use structo.float32 or structo.float64 instead"
31
+ )
32
+ if format == bytes:
33
+ raise ValueError(
34
+ f"No serializer for bytes, you need to use structo.Buffer or structo.Blob instead"
35
+ )
36
+ if format == list:
37
+ raise ValueError(
38
+ f"No serializer for list, you need to use structo.List or structo.Array instead"
39
+ )
40
+
41
+ raise NotImplementedError(f"No serializer found for {format}")
@@ -0,0 +1,20 @@
1
+ import io
2
+
3
+
4
+ class Serializer[T]:
5
+ def sizeof(self) -> int | None:
6
+ return None
7
+
8
+ def write(self, buf: io.Writer, value: T): ...
9
+
10
+ def read(self, buf: io.Reader) -> T: ...
11
+
12
+ def to_bytes(self, value: T) -> bytes:
13
+ buf = io.BytesIO()
14
+ self.write(buf, value)
15
+ buf.seek(0)
16
+ return buf.getvalue()
17
+
18
+ def from_bytes(self, data: bytes) -> T:
19
+ buf = io.BytesIO(data)
20
+ return self.read(buf)
@@ -0,0 +1,35 @@
1
+ from .primatives import (
2
+ uint64_BE,
3
+ uint64_LE,
4
+ uint64,
5
+ uint32_BE,
6
+ uint32_LE,
7
+ uint32,
8
+ uint16_BE,
9
+ uint16_LE,
10
+ uint16,
11
+ uint8,
12
+ int64_BE,
13
+ int64_LE,
14
+ int64,
15
+ int32_BE,
16
+ int32_LE,
17
+ int32,
18
+ int16_BE,
19
+ int16_LE,
20
+ int16,
21
+ int8,
22
+ float64_BE,
23
+ float64_LE,
24
+ float64,
25
+ float32_BE,
26
+ float32_LE,
27
+ float32,
28
+ )
29
+ from .buffer import Buffer
30
+ from .array import Array
31
+ from .blob import Blob
32
+ from .list import List
33
+ from .string import String
34
+ from .literal import Literal
35
+ from .object import ObjectSerializer
@@ -0,0 +1,34 @@
1
+ from ..serializer import Serializer
2
+
3
+
4
+ class Array[T](Serializer[list[T]]):
5
+ length: int
6
+ type: Serializer[T]
7
+
8
+ def __init__(self, length: int, type: Serializer[T]) -> None:
9
+ assert length > 0, "Array must be longer than 0"
10
+ self.length = length
11
+ self.type = type
12
+
13
+ def write(self, buf, value):
14
+ assert (
15
+ len(value) == self.length
16
+ ), f"expected array with {self.length} length, receieved {len(value)}"
17
+
18
+ for item in value:
19
+ self.type.write(buf, item)
20
+
21
+ def read(self, buf):
22
+ items = []
23
+ for _ in range(self.length):
24
+ items.append(self.type.read(buf))
25
+
26
+ return items
27
+
28
+ def sizeof(self):
29
+ element_length = self.type.sizeof()
30
+ if element_length is None:
31
+ return None
32
+ else:
33
+ return element_length * self.length
34
+
@@ -0,0 +1,19 @@
1
+ from ..serializer import Serializer
2
+
3
+
4
+ class Blob(Serializer[bytes]):
5
+ "A set of arbitrary bytes, prefixed with it's length"
6
+
7
+ length_type: Serializer[int]
8
+
9
+ def __init__(self, length_type: Serializer[int]) -> None:
10
+ self.length_type = length_type
11
+
12
+ def write(self, buf, value):
13
+ self.length_type.write(buf, len(value))
14
+ buf.write(value)
15
+
16
+ def read(self, buf):
17
+ length = self.length_type.read(buf)
18
+ data = buf.read(length)
19
+ return data
@@ -0,0 +1,21 @@
1
+ from ..serializer import Serializer
2
+
3
+
4
+ class Buffer(Serializer[bytes]):
5
+ length: int
6
+
7
+ def __init__(self, length: int) -> None:
8
+ self.length = length
9
+
10
+ def write(self, buf, value):
11
+ assert (
12
+ len(value) == self.length
13
+ ), f"expected data with length {self.length}, received {len(value)}"
14
+ buf.write(value)
15
+
16
+ def read(self, buf):
17
+ data = buf.read(self.length)
18
+ return data
19
+
20
+ def sizeof(self):
21
+ return self.length