umbi 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
umbi-0.0.1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Roman Andriushchenko
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
umbi-0.0.1/PKG-INFO ADDED
@@ -0,0 +1,68 @@
1
+ Metadata-Version: 2.4
2
+ Name: umbi
3
+ Version: 0.0.1
4
+ Summary: Library for binary encoding of annotated transition systems
5
+ Author-email: Roman Andriushchenko <roman.andriu@gmail.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Roman Andriushchenko
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/randriu/umbi
29
+ Requires-Python: >=3.9
30
+ Description-Content-Type: text/markdown
31
+ License-File: LICENSE
32
+ Requires-Dist: marshmallow
33
+ Requires-Dist: python-magic
34
+ Requires-Dist: tomli
35
+ Provides-Extra: dev
36
+ Requires-Dist: pytest; extra == "dev"
37
+ Requires-Dist: pip-tools; extra == "dev"
38
+ Requires-Dist: black; extra == "dev"
39
+ Requires-Dist: isort; extra == "dev"
40
+ Requires-Dist: toml-sort; extra == "dev"
41
+ Requires-Dist: twine; extra == "dev"
42
+ Requires-Dist: bumpver; extra == "dev"
43
+ Dynamic: license-file
44
+
45
+ # umbi
46
+
47
+ ## Installation:
48
+
49
+ (optional) create and activate a python environment:
50
+
51
+ ```
52
+ $ python -m venv venv
53
+ $ source venv/bin/activate
54
+ ```
55
+
56
+ Install `umbi` via
57
+ ```
58
+ (venv) $ pip install umbi
59
+ ```
60
+
61
+ ## Running umbi
62
+
63
+ Examples:
64
+ ```
65
+ (venv) $ umbi --import-umb /path/to/input.umb
66
+ (venv) $ umbi --import-umb /path/to/input.umb --export-umb /path/to/output.umb
67
+ (venv) $ umbi --import-umb /path/to/input.umb --export-umb /path/to/output.umb --log-level=DEBUG
68
+ ```
umbi-0.0.1/README.md ADDED
@@ -0,0 +1,24 @@
1
+ # umbi
2
+
3
+ ## Installation:
4
+
5
+ (optional) create and activate a python environment:
6
+
7
+ ```
8
+ $ python -m venv venv
9
+ $ source venv/bin/activate
10
+ ```
11
+
12
+ Install `umbi` via
13
+ ```
14
+ (venv) $ pip install umbi
15
+ ```
16
+
17
+ ## Running umbi
18
+
19
+ Examples:
20
+ ```
21
+ (venv) $ umbi --import-umb /path/to/input.umb
22
+ (venv) $ umbi --import-umb /path/to/input.umb --export-umb /path/to/output.umb
23
+ (venv) $ umbi --import-umb /path/to/input.umb --export-umb /path/to/output.umb --log-level=DEBUG
24
+ ```
@@ -0,0 +1,53 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "umbi"
7
+ version = "0.0.1"
8
+ description = "Library for binary encoding of annotated transition systems"
9
+ readme = "README.md"
10
+ authors = [{name = "Roman Andriushchenko", email = "roman.andriu@gmail.com"}]
11
+ license = {file = "LICENSE"}
12
+ dependencies = [
13
+ "marshmallow",
14
+ "python-magic",
15
+ "tomli"
16
+ ]
17
+ requires-python = ">=3.9"
18
+
19
+ [project.urls]
20
+ Homepage = "https://github.com/randriu/umbi"
21
+
22
+ [project.optional-dependencies]
23
+ dev = [
24
+ "pytest",
25
+ "pip-tools",
26
+ "black",
27
+ "isort",
28
+ "toml-sort",
29
+ "twine",
30
+ "bumpver"
31
+ ]
32
+
33
+ [tool.setuptools]
34
+ packages = ["umbi"]
35
+ include-package-data = true
36
+
37
+ [project.scripts]
38
+ umbi = "umbi.__main__:main"
39
+
40
+ [tool.bumpver]
41
+ current_version = "0.0.1"
42
+ version_pattern = "MAJOR.MINOR.PATCH"
43
+ commit_message = "bump version {old_version} -> {new_version}"
44
+ tag_message = "{new_version}"
45
+ tag_scope = "default"
46
+ pre_commit_hook = ""
47
+ post_commit_hook = ""
48
+ commit = false
49
+ tag = false
50
+ push = false
51
+
52
+ [tool.bumpver.file_patterns]
53
+ "pyproject.toml" = ['current_version = "{version}"', 'version = "{version}"']
umbi-0.0.1/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,33 @@
1
+ import logging
2
+ import os
3
+
4
+ import tomli
5
+
6
+ logging.basicConfig(level=logging.DEBUG, format="%(levelname)s - %(message)s")
7
+
8
+ from .explicit_ats import ExplicitAts
9
+ from .io_bytes import *
10
+ from .io_json import *
11
+ from .io_tar import *
12
+ from .io_umb import *
13
+
14
+ # from .simple_ats import SimpleAts
15
+
16
+
17
+ def get_pyproject_attribute(attribute, default):
18
+ """Read an attribute from pyproject.toml."""
19
+ pyproject_path = os.path.join(os.path.dirname(__file__), "..", "pyproject.toml")
20
+ try:
21
+ with open(pyproject_path, "rb") as f:
22
+ project_data = tomli.load(f)["project"]
23
+ return project_data.get(attribute)
24
+ except (FileNotFoundError, KeyError):
25
+ return default
26
+
27
+
28
+ __toolname__ = get_pyproject_attribute("name", "unknown")
29
+ __version__ = get_pyproject_attribute("version", "0.0.0")
30
+
31
+ # TODO move to config file
32
+ __format_version__ = 0
33
+ __format_revision__ = 1
@@ -0,0 +1,35 @@
1
+ import logging
2
+
3
+ import click
4
+
5
+ import umbi
6
+
7
+
8
+ @click.command()
9
+ @click.option(
10
+ "--log-level",
11
+ type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]),
12
+ default="INFO",
13
+ show_default=True,
14
+ required=False,
15
+ help="logging level",
16
+ )
17
+ @click.option("--import-umb", type=click.Path(), required=False, help=".umb filepath to import")
18
+ @click.option("--export-umb", type=click.Path(), required=False, help=".umb filepath to export")
19
+ def main(log_level, import_umb, export_umb):
20
+
21
+ logging.getLogger().setLevel(level=getattr(logging, log_level))
22
+ logging.debug(f"this is {umbi.__toolname__} v.{umbi.__version__}")
23
+
24
+ ats = None
25
+ if import_umb is not None:
26
+ logging.info(f"loading ${import_umb}")
27
+ ats = umbi.read_umb(import_umb)
28
+ if export_umb is not None:
29
+ if ats is None:
30
+ raise ValueError("--export-umb specified, but nothing to export")
31
+ umbi.write_umb(ats, export_umb)
32
+
33
+
34
+ if __name__ == "__main__":
35
+ main()
umbi-0.0.1/umbi/ats.py ADDED
@@ -0,0 +1,136 @@
1
+ import collections
2
+ import random
3
+
4
+ import umbi
5
+
6
+
7
+ class SimpleAts:
8
+ """Annotated transition system."""
9
+
10
+ def __init__(self):
11
+ self.info = umbi.AtsInfoSchema.empty_object()
12
+
13
+ self.time = None
14
+ self.branch_probability_type = None
15
+
16
+ self.num_players = 1
17
+ self.num_actions = 1
18
+
19
+ self.initial_states = None
20
+ self.state_choices = []
21
+ self.choice_branches = []
22
+ self.branch_target = []
23
+ self.branch_value = []
24
+ self.annotations = {}
25
+
26
+ def validate_field_set(self, field: str):
27
+ field_value = getattr(self, field)
28
+ if field_value is None:
29
+ raise ValueError(f"SimpleAts: field '{field}' is not set")
30
+
31
+ def validate_field_in(self, field: str, domain: list):
32
+ """Validate that the field has the value within the given domain."""
33
+ self.validate_field_set(field)
34
+ field_value = getattr(self, field)
35
+ if field_value not in domain:
36
+ raise ValueError(f"SimpleAts: field '{field}' must be in {domain}")
37
+
38
+ @staticmethod
39
+ def validate_is_list(l, name, length=None):
40
+ if not isinstance(l, list):
41
+ raise ValueError(f"SimpleAts: '{name}' must be a list")
42
+ if length is not None and not len(l) == length:
43
+ raise ValueError(f"SimpleAts: '{name}' must be of length {length}")
44
+
45
+ @property
46
+ def num_states(self):
47
+ return len(self.state_choices)
48
+
49
+ @property
50
+ def num_initial_states(self):
51
+ return len(self.initial_states)
52
+
53
+ @property
54
+ def num_choices(self):
55
+ return len(self.choice_branches)
56
+
57
+ @property
58
+ def num_branches(self):
59
+ return len(self.branch_target)
60
+
61
+ def validate(self):
62
+ self.validate_field_in("time", ["discrete", "stochastic", "urgent-stochastic"])
63
+ self.validate_field_in("branch_values", ["none", "number", "interval"])
64
+ if self.branch_values != "none":
65
+ self.validate_field_in("branch_value_type", ["double", "rational"])
66
+
67
+ self.validate_field_set("num_states")
68
+ self.validate_field_set("num_initial_states")
69
+ self.validate_field_set("num_choices")
70
+ self.validate_field_set("num_branches")
71
+ self.validate_field_set("num_players")
72
+ self.validate_field_set("num_actions")
73
+
74
+ SimpleAts.validate_is_list(self.initial_states, "initial_states")
75
+ if not all([state < self.num_states for state in self.initial_states]):
76
+ raise ValueError(f"SimpleAts: invalid initial states")
77
+
78
+ SimpleAts.validate_is_list(self.choice_branches, "choice_branches", self.num_choices)
79
+ for choice, branches in enumerate(self.choice_branches):
80
+ SimpleAts.validate_is_list(branches, f"choice_branches[{choice}]")
81
+ if not len(branches) > 0:
82
+ raise ValueError(f"SimpleAts: 'choice_branches[{choice}]' must be a non-empty list")
83
+
84
+ SimpleAts.validate_is_list(self.branch_target, "branch_target")
85
+ SimpleAts.validate_is_list(self.branch_value, "branch_target")
86
+
87
+ # def add_state(self):
88
+ # self.state_choices.append([])
89
+
90
+ # def add_states(self, num_new_states : int):
91
+ # for _ in range(num_new_states):
92
+ # self.add_state()
93
+
94
+ def add_annotation(self, key: str):
95
+ if key in self.annotations:
96
+ print(f"warning: redefining annotation {key}")
97
+ annotation = umbi.AnnotationSchema.empty_object()
98
+ self.annotations[key] = annotation
99
+ return annotation
100
+
101
+ def choice_successors(self, choice: int) -> set:
102
+ successors = set()
103
+ for branch in self.choice_branches[choice]:
104
+ successors.add(self.branch_target[branch])
105
+ return successors
106
+
107
+ def state_successors(self, state: int) -> set:
108
+ successors = set()
109
+ for choice in self.state_choices[state]:
110
+ successors.update(self.choice_successors(choice))
111
+ return successors
112
+
113
+ def choice_distribution(self, choice: int) -> dict:
114
+ distr = collections.defaultdict(int)
115
+ for branch in self.choice_branches[choice]:
116
+ target, value = self.branches[branch]
117
+ distr[target] += value
118
+ return dict(distr)
119
+
120
+ def sample_choice(self, state: int) -> int:
121
+ return random.choice(self.state_choices[state])
122
+
123
+ def sample_choice_target(self, choice: int) -> int:
124
+ distr = self.choice_distribution(choice)
125
+ target = random.choices(population=list(distr.keys()), weights=list(distr.values()), k=1)[0]
126
+ return target
127
+
128
+ def sample_path(self, state=None, length=0):
129
+ if state is None:
130
+ state = random.choice(self.initial_states)
131
+ path = [state]
132
+ for _ in range(length):
133
+ choice = self.sample_choice(state)
134
+ state = self.sample_choice_target(choice)
135
+ path.append(state)
136
+ return path
@@ -0,0 +1,23 @@
1
+ # import umbi
2
+
3
+
4
+ class ExplicitAts:
5
+ """Annotated transition system in an explicit format matching the .umb file format."""
6
+
7
+ def __init__(self):
8
+ self.info = None
9
+
10
+ self.initial_states = None
11
+ self.state_choices = None
12
+ self.state_to_player = None
13
+ self.exit_rates = None
14
+
15
+ self.choice_branches = None
16
+ self.branch_target = None
17
+ self.branch_probabilities = None
18
+
19
+ self.choice_to_action = None
20
+ self.branch_to_action = None
21
+ self.action_to_string = None
22
+
23
+ self.annotations = {}
@@ -0,0 +1,113 @@
1
+ import logging
2
+ import struct
3
+
4
+ import umbi
5
+
6
+
7
+ def bytes_to_string(data: bytes) -> str:
8
+ """Convert a binary string to a utf-8 string."""
9
+ return data.decode("utf-8")
10
+
11
+
12
+ def string_to_bytes(string: str) -> bytes:
13
+ """Convert a utf-8 string to a binary string."""
14
+ return string.encode("utf-8")
15
+
16
+
17
+ def assert_key_in_dict(table: dict, key, desc: str):
18
+ if key not in table:
19
+ raise ValueError(f"{desc} must be in {table} but is {key}")
20
+
21
+
22
+ def value_type_to_struct_format(value_type: str) -> str:
23
+ """Convert a value type to a formatting string for struct."""
24
+ table = {
25
+ "int32": "i",
26
+ "uint32": "I",
27
+ "int64": "q",
28
+ "uint64": "Q",
29
+ "double": "d",
30
+ }
31
+ assert_key_in_dict(table, value_type, "value type")
32
+ return table[value_type]
33
+
34
+
35
+ def value_type_to_size(value_type: str) -> int:
36
+ """Map value type to its size."""
37
+ table = {
38
+ "int32": 4,
39
+ "uint32": 4,
40
+ "int64": 8,
41
+ "uint64": 8,
42
+ "double": 8,
43
+ }
44
+ assert_key_in_dict(table, value_type, "value type")
45
+ return table[value_type]
46
+
47
+
48
+ def endianness_to_struct_format(little_endian: bool) -> str:
49
+ """
50
+ Convert endianness flag to a formatting string for struct.
51
+ :param little_endian: True for little-endian, False for big-endian
52
+ """
53
+ table = {False: ">", True: "<"}
54
+ assert_key_in_dict(table, little_endian, "endianness")
55
+ return table[little_endian]
56
+
57
+
58
+ def vector_to_bytes(vector: list, value_type: str, little_endian: bool = True) -> bytes:
59
+ """Encode a list of values as a binary string.
60
+
61
+ :param value_type: vector element type, one of {"bool", "uint64", "double"}
62
+ """
63
+ if len(vector) == 0:
64
+ logging.warning("exporting empty binary file")
65
+ return b""
66
+
67
+ if value_type == "char":
68
+ return string_to_bytes(vector)
69
+
70
+ if value_type == "bool":
71
+ # TODO respect endianness
72
+ assert little_endian, "big-endianness for bitvectors is not implemented"
73
+ # drop trailing zeros?
74
+
75
+ # pad vector up to 64 bits
76
+ target_pad = 64
77
+ num_pad = (target_pad - (len(vector) % target_pad)) % target_pad
78
+ vector = vector + [False] * num_pad
79
+ bitmask = b""
80
+ for byte_index in range(len(vector) // 8):
81
+ bits = vector[byte_index * 8 : byte_index * 8 + 8]
82
+ byte_int = sum((1 << i) for i, bit in enumerate(bits) if bit)
83
+ bitmask += byte_int.to_bytes(1, byteorder="little")
84
+ return bitmask
85
+
86
+ for item in vector:
87
+ assert isinstance(item, int) or isinstance(item, float)
88
+ if value_type in ["uint32", "uint64"]:
89
+ assert isinstance(item, int) and item >= 0
90
+ endian_format = endianness_to_struct_format(little_endian)
91
+ type_format = value_type_to_struct_format(value_type)
92
+ return struct.pack(f"{endian_format}{len(vector)}{type_format}", *vector)
93
+
94
+
95
+ def bytes_to_vector(vector_bytes: bytes, value_type: str, little_endian: bool = True) -> list:
96
+ """
97
+ Decode a binary string as a list of numbers.
98
+
99
+ :param value_type: vector element type, one of {"bool", "uint64", "double"}
100
+ """
101
+ if value_type == "char":
102
+ return bytes_to_string(vector_bytes)
103
+ if value_type == "bool":
104
+ bitvector = []
105
+ for bitmask in vector_bytes:
106
+ for i in range(8):
107
+ bitvector.append((bitmask >> i) & 1 == 1)
108
+ return bitvector
109
+ type_format = value_type_to_struct_format(value_type)
110
+ endian_format = endianness_to_struct_format(little_endian)
111
+ num_entries = len(vector_bytes) // value_type_to_size(value_type)
112
+ vector = struct.unpack(f"{endian_format}{num_entries}{type_format}", vector_bytes)
113
+ return list(vector)
@@ -0,0 +1,43 @@
1
+ import json
2
+ import logging
3
+ import typing
4
+
5
+ import umbi
6
+
7
+ """ A type alias for (high-level) json objects. """
8
+ jsonlike = typing.Union[dict, list]
9
+
10
+
11
+ def json_remove_none(json_obj: object):
12
+ """Recursively remove all None (null) values from a json (sub-)object."""
13
+ if isinstance(json_obj, dict):
14
+ return {k: json_remove_none(v) for k, v in json_obj.items() if v is not None}
15
+ elif isinstance(json_obj, list):
16
+ return [json_remove_none(v) for v in json_obj]
17
+ else:
18
+ return json_obj
19
+
20
+
21
+ def json_to_string(json_obj: jsonlike, remove_none: bool = False, indent: int = 4) -> str:
22
+ """Encode a json object as a string."""
23
+ if remove_none:
24
+ json_obj = json_remove_none(json_obj)
25
+ return json.dumps(json_obj, indent=indent)
26
+
27
+
28
+ def string_to_json(json_str: str) -> jsonlike:
29
+ """Convert a string to a json object."""
30
+ return json.loads(json_str)
31
+
32
+
33
+ def json_show(json_obj: jsonlike):
34
+ """Print a json object to stdout."""
35
+ logging.debug(json_to_string(json_obj))
36
+
37
+
38
+ def bytes_to_json(data: bytes) -> jsonlike:
39
+ return string_to_json(umbi.bytes_to_string(data))
40
+
41
+
42
+ def json_to_bytes(json_obj: jsonlike) -> bytes:
43
+ return umbi.string_to_bytes(json_to_string(json_obj))
@@ -0,0 +1,160 @@
1
+ import io
2
+ import logging
3
+ import tarfile
4
+
5
+ import umbi
6
+
7
+
8
+ def tar_filenames(tarpath: str) -> list[str]:
9
+ """Retrieve filenames in the tarball file."""
10
+ with tarfile.open(tarpath, mode="r:*") as tar:
11
+ return [m.name for m in tar.getmembers() if m.isfile()]
12
+
13
+
14
+ def tar_read_file(tarpath: str, filename: str) -> object:
15
+ """Read contents of a specific file in the tarball."""
16
+ with tarfile.open(tarpath, mode="r:*") as tar:
17
+ filenames = [m.name for m in tar.getmembers() if m.isfile()]
18
+ if filename not in filenames:
19
+ raise KeyError(f"{tarpath} has no file {filename}")
20
+ member = tar.getmember(filename)
21
+ return tar.extractfile(member).read()
22
+
23
+
24
+ def tar_read(tarpath: str) -> dict[str, bytes]:
25
+ """
26
+ Read all contents of a tarball file.
27
+
28
+ :returns: a dictionary filename -> contents
29
+ """
30
+ filename_data = {}
31
+ with tarfile.open(tarpath, mode="r:*") as tar:
32
+ for member in tar.getmembers():
33
+ if member.isfile():
34
+ filename_data[member.name] = tar.extractfile(member).read()
35
+ return filename_data
36
+
37
+
38
+ def tar_write(tarpath: str, filename_data: dict[str, bytes], gzip: bool = True):
39
+ """
40
+ Create a tarball file with the given contents.
41
+
42
+ :param tarpath: path to a tarball file
43
+ :param filename_data: a dictionary filename -> binary string
44
+ :param gzip: if True, the tarball file will be gzipped
45
+ """
46
+ mode = "w"
47
+ if gzip:
48
+ mode = "w:gz"
49
+ tar_stream = io.BytesIO()
50
+ with tarfile.open(fileobj=tar_stream, mode=mode) as tar:
51
+ for filename, data in filename_data.items():
52
+ tar_info = tarfile.TarInfo(name=filename)
53
+ tar_info.size = len(data)
54
+ tar.addfile(tar_info, io.BytesIO(data))
55
+ tar_bytes = tar_stream.getvalue()
56
+ with open(tarpath, "wb") as file:
57
+ file.write(tar_bytes)
58
+ logging.info(f"data exported to {tarpath}")
59
+
60
+
61
+ def row_start_to_ranges(row_start: list) -> list:
62
+ """Convert row start indices to ranges."""
63
+ ranges = []
64
+ num_rows = len(row_start) - 1
65
+ for row in range(num_rows):
66
+ ranges.append(list(range(row_start[row], row_start[row + 1])))
67
+ return ranges
68
+
69
+
70
+ def ranges_to_row_start(ranges: list) -> list:
71
+ """Convert ranges to row start indices."""
72
+ row_start = [interval[0] for interval in ranges]
73
+ row_start.append(ranges[-1][-1] + 1)
74
+ assert len(row_start) == len(ranges) + 1
75
+ return row_start
76
+
77
+
78
+ def indices_to_bitvector(vector: list[int], num_entries: int) -> list[bool]:
79
+ """Convert a list of unsigned integers to a bitvector.
80
+
81
+ :param vector: a list of unsigned integers
82
+ :param num_entries: the size of the resulting bitvector, must be no smaller than max(vector)
83
+ """
84
+ assert max(vector) < num_entries
85
+ bitvector = [False] * num_entries
86
+ for x in vector:
87
+ bitvector[x] = True
88
+ return bitvector
89
+
90
+
91
+ def bitvector_to_indices(bitvector: list[bool]) -> list[int]:
92
+ """Convert a bitvector to a list of indices set to True.
93
+
94
+ :param bitvector: a list of bools
95
+ """
96
+ return [i for i, bit in enumerate(bitvector) if bit]
97
+
98
+
99
+ class TarReader:
100
+ """An auxiliary class to simplify tar reading and to keep track of (un)used files."""
101
+
102
+ def __init__(self, tarpath: str):
103
+ self.tarpath = tarpath
104
+ self.filenames = tar_filenames(tarpath)
105
+ self.files_read = set()
106
+
107
+ filenames_str = "\n".join(self.filenames)
108
+ logging.debug(f"found the following files:\n{filenames_str}")
109
+
110
+ def warn_unread_files(self):
111
+ """Print warning about unread files from the tarfile, if such exist."""
112
+ unread_files = [f for f in self.filenames if f not in self.files_read]
113
+ if len(unread_files) > 0:
114
+ unread_files_str = "\n".join(unread_files)
115
+ logging.warning(
116
+ f'the following files from "{self.tarpath}" were not used during parsing:\n{unread_files_str}'
117
+ )
118
+
119
+ def read(self, filename: str, file_format: str, csr: bool = False) -> object:
120
+ """Read contents and process a specific file in the tarball.
121
+
122
+ :param file_format: one of ["json", "bool", "uint32","uint64", "double"]
123
+ """
124
+ if filename not in self.filenames:
125
+ raise KeyError(f"tar archive {self.tarpath} has no required file {filename}")
126
+ self.files_read.add(filename)
127
+ data = tar_read_file(self.tarpath, filename)
128
+
129
+ if file_format == "json":
130
+ return umbi.bytes_to_json(data)
131
+
132
+ data = umbi.bytes_to_vector(data, file_format)
133
+ if file_format == "bool":
134
+ data = bitvector_to_indices(data)
135
+ if csr:
136
+ data = row_start_to_ranges(data)
137
+ return data
138
+
139
+
140
+ class TarWriter:
141
+ """An auxiliary class to simplify tar writing."""
142
+
143
+ def __init__(self):
144
+ self.filename_data = dict[str, bytes]()
145
+
146
+ def add(self, data: object, filename: str, file_format: str, csr: bool = False):
147
+ if csr:
148
+ data = ranges_to_row_start(data)
149
+
150
+ if file_format == "json":
151
+ data = umbi.json_to_bytes(data)
152
+ else:
153
+ # if file_format == "bool":
154
+ # data = indices_to_bitvector(data)
155
+ data = umbi.vector_to_bytes(data, file_format)
156
+
157
+ self.filename_data[filename] = data
158
+
159
+ def write(self, tarpath: str):
160
+ umbi.tar_write(tarpath, self.filename_data)
@@ -0,0 +1,364 @@
1
+ import logging
2
+ import time
3
+ from types import SimpleNamespace
4
+
5
+ from marshmallow import (
6
+ Schema,
7
+ ValidationError,
8
+ fields,
9
+ post_load,
10
+ validate,
11
+ validates_schema,
12
+ )
13
+
14
+ import umbi
15
+
16
+
17
+ class FieldUint(fields.Int):
18
+ """Custom marshmallow field for unsigned integers."""
19
+
20
+ def _deserialize(self, value, attr, data, **kwargs):
21
+ result = super()._deserialize(value, attr, data, **kwargs)
22
+ if result < 0:
23
+ raise ValidationError(f"value {value} must be an unsigned integer")
24
+ return result
25
+
26
+
27
+ class JsonSchema(Schema):
28
+ """An abstract class to represent specific schemas that will follow."""
29
+
30
+ @post_load
31
+ def make_object(self, data, **kwargs):
32
+ """Create an object with attributes matching the json fields."""
33
+ for field in self.fields:
34
+ if field not in data:
35
+ data[field] = None
36
+ return SimpleNamespace(**data)
37
+
38
+ @classmethod
39
+ def empty_object(cls):
40
+ """Create an empty object with attributes (set to None) corresponding to the fields of schema."""
41
+ return SimpleNamespace(**{field: None for field in cls().fields})
42
+
43
+ @validates_schema
44
+ def validate_fields(self, data, **kwargs):
45
+ """A method that is called upon the creation of the object to validate the fields."""
46
+ pass
47
+
48
+ @classmethod
49
+ def from_json(cls, json_obj) -> SimpleNamespace:
50
+ """Parse from a json object.
51
+ :returns None if any exception occurs"""
52
+ try:
53
+ return cls().load(json_obj)
54
+ except ValidationError as err:
55
+ logging.error(f"{cls} validation error:")
56
+ logging.error(umbi.json_to_string(err.messages))
57
+ raise err
58
+
59
+
60
+ class ModelDataSchema(JsonSchema):
61
+ """Model data schema."""
62
+
63
+ name = fields.String(data_key="name")
64
+ version = fields.String(data_key="version")
65
+ authors = fields.List(fields.String(), data_key="authors")
66
+ description = fields.String(data_key="description")
67
+ comment = fields.String(data_key="comment")
68
+ doi = fields.String(data_key="doi")
69
+ url = fields.String(data_key="url")
70
+
71
+
72
+ class FileDataSchema(JsonSchema):
73
+ """File data schema."""
74
+
75
+ tool = fields.String(data_key="tool")
76
+ tool_version = fields.String(data_key="tool-version")
77
+ creation_date = FieldUint(data_key="creation-date")
78
+ parameters = fields.Raw(data_key="parameters")
79
+
80
+ @classmethod
81
+ def this_tool_object(cls):
82
+ """Create an object with attributes set according to this tool."""
83
+ obj = SimpleNamespace(**{field: None for field in cls().fields})
84
+ obj.tool = umbi.__toolname__
85
+ obj.tool_version = umbi.__version__
86
+ obj.creation_date = int(time.time())
87
+ return obj
88
+
89
+
90
+ class TransitionSystemSchema(JsonSchema):
91
+ """ATS index file schema."""
92
+
93
+ time = fields.String(
94
+ data_key="time", required=True, validate=validate.OneOf(["discrete", "stochastic", "urgent-stochastic"])
95
+ )
96
+
97
+ num_players = FieldUint(data_key="#players", required=True)
98
+ num_states = FieldUint(data_key="#states", required=True)
99
+ num_initial_states = FieldUint(data_key="#initial-states", required=True)
100
+ num_choices = FieldUint(data_key="#choices", required=True)
101
+ num_actions = FieldUint(data_key="#actions", required=True)
102
+ num_branches = FieldUint(data_key="#branches", required=True)
103
+
104
+ branch_probability_type = fields.String(
105
+ data_key="branch-probability-type",
106
+ required=True,
107
+ validate=validate.OneOf(["none", "double", "rational", "double-interval", "rational-interval"]),
108
+ )
109
+
110
+
111
+ class AtomicPropositionSchema(JsonSchema):
112
+ """Atomic proposition schema."""
113
+
114
+ alias = fields.String(data_key="alias", required=False)
115
+ description = fields.String(data_key="description", required=False)
116
+ applies_to = fields.List(
117
+ fields.String(validate=validate.OneOf(["states", "choices", "branches"])), data_key="applies-to", required=True
118
+ )
119
+ type = fields.String(
120
+ data_key="type", required=False, validate=validate.OneOf(["bool"]), load_default="bool"
121
+ ) # TODO discuss
122
+
123
+
124
+ class RewardSchema(JsonSchema):
125
+ """Reward model schema."""
126
+
127
+ alias = fields.String(data_key="alias")
128
+ description = fields.String(data_key="description")
129
+ applies_to = fields.List(
130
+ fields.String(validate=validate.OneOf(["states", "choices", "branches"])),
131
+ data_key="applies-to",
132
+ required=True,
133
+ )
134
+ type = fields.String(
135
+ data_key="type",
136
+ required=True,
137
+ validate=validate.OneOf(["double", "rational", "double-interval", "rational-interval"]),
138
+ )
139
+ lower = fields.Float(data_key="lower", required=False)
140
+ upper = fields.Float(data_key="upper", required=False)
141
+
142
+
143
+ class VariableValuationSchema(JsonSchema):
144
+ """Variable valuation schema."""
145
+
146
+ alias = fields.String(data_key="alias", required=False)
147
+ description = fields.String(data_key="description", required=False)
148
+ applies_to = fields.List(
149
+ fields.String(validate=validate.OneOf(["states", "choices", "branches"])), data_key="applies-to", required=True
150
+ )
151
+ type = fields.String(
152
+ data_key="type", required=True, validate=validate.OneOf(["bool", "int", "int32", "uint32", "int64", "uint64"])
153
+ )
154
+
155
+ @post_load
156
+ def make_object(self, data, **kwargs):
157
+ obj = super().make_object(data, **kwargs)
158
+ if obj.type == "int": # TODO discuss
159
+ logging.warning("variable annotation type is int, interpreting as int32")
160
+ obj.type = "int32"
161
+ return obj
162
+
163
+
164
+ class AnnotationSchema(JsonSchema):
165
+ """Single annotation schema."""
166
+
167
+ aps = fields.Dict(
168
+ keys=fields.String(), values=fields.Nested(AtomicPropositionSchema), data_key="aps", required=False
169
+ )
170
+ rewards = fields.Dict(keys=fields.String(), values=fields.Nested(RewardSchema), data_key="rewards", required=False)
171
+ variables = fields.Dict(
172
+ keys=fields.String(), values=fields.Nested(VariableValuationSchema), data_key="variables", required=False
173
+ )
174
+
175
+
176
+ class AtsInfoSchema(JsonSchema):
177
+ """ATS index file schema."""
178
+
179
+ format_version = FieldUint(data_key="format-version", required=True)
180
+ format_revision = FieldUint(data_key="format-revision", required=True)
181
+ model_data = fields.Nested(ModelDataSchema, data_key="model-data", required=False)
182
+ file_data = fields.Nested(FileDataSchema, data_key="file-data", required=False)
183
+ transition_system = fields.Nested(TransitionSystemSchema, data_key="transition-system", required=True)
184
+ annotations = fields.Nested(AnnotationSchema, data_key="annotations", required=False)
185
+
186
+ @classmethod
187
+ def empty_object(cls):
188
+ """Create an empty object with attributes (set to None) corresponding to the fields of schema."""
189
+ obj = SimpleNamespace(**{field: None for field in cls().fields})
190
+ obj.format_version = umbi.__format_version__
191
+ obj.format_revision = umbi.__format_revision__
192
+ obj.model_data = ModelDataSchema.empty_object()
193
+ obj.file_data = FileDataSchema.empty_object()
194
+ obj.transition_system = TransitionSystemSchema.empty_object()
195
+ obj.annotations = AnnotationSchema.empty_object()
196
+ return obj
197
+
198
+
199
+ def read_index_file(reader: umbi.TarReader, ats: umbi.ExplicitAts):
200
+ json_obj = reader.read("index.json", "json")
201
+ umbi.json_show(json_obj)
202
+ ats.info = AtsInfoSchema.from_json(json_obj)
203
+
204
+
205
+ def write_index_file(writer: umbi.TarWriter, ats: umbi.ExplicitAts):
206
+ info = AtsInfoSchema().empty_object()
207
+ info.model_data = ats.info.model_data
208
+ info.file_data = FileDataSchema.this_tool_object()
209
+ info.transition_system = ats.info.transition_system
210
+ info.annotations = ats.info.annotations
211
+ # FIXME variable type int32->int
212
+ if info.annotations.variables is not None:
213
+ for key, annotation in info.annotations.variables.items():
214
+ if annotation.type == "int32":
215
+ logging.warning("variable annotation type is int32, storing as int in the index file")
216
+ annotation.type = "int"
217
+ json_obj = AtsInfoSchema().dump(info)
218
+ json_obj = umbi.json_remove_none(json_obj)
219
+ umbi.json_show(json_obj)
220
+ writer.add(json_obj, "index.json", "json")
221
+
222
+
223
+ def read_state_files(reader: umbi.TarReader, ats: umbi.ExplicitAts):
224
+ ts = ats.info.transition_system
225
+ ats.initial_states = reader.read("initial-states.bin", "bool")
226
+ if ts.num_players > 0:
227
+ ats.state_choices = reader.read("state-to-choice.bin", "uint64", csr=True)
228
+ if ts.num_players > 1:
229
+ ats.state_to_player = reader.read("state-to-player.bin", "uint64")
230
+ if ts.time in ["stochastic", "urgent-stochastic"]:
231
+ ats.exit_rates = reader.read("exit-rates.bin", "double") # TODO discuss
232
+
233
+
234
+ def write_state_files(writer: umbi.TarWriter, ats: umbi.ExplicitAts):
235
+ ts = ats.info.transition_system
236
+ writer.add(umbi.indices_to_bitvector(ats.initial_states, ts.num_states), "initial-states.bin", "bool")
237
+ if ts.num_players > 0:
238
+ writer.add(umbi.ranges_to_row_start(ats.state_choices), "state-to-choice.bin", "uint64")
239
+ if ts.num_players > 1:
240
+ writer.add(ats.state_to_player, "state-to-player.bin", "uint64")
241
+ if ts.time in ["stochastic", "urgent-stochastic"]:
242
+ writer.add(ats.exit_rates, "exit-rates.bin", "double") # TODO discuss
243
+
244
+
245
+ def read_branch_files(reader: umbi.TarReader, ats: umbi.ExplicitAts):
246
+ ts = ats.info.transition_system
247
+ if ts.num_branches > ts.num_choices:
248
+ ats.choice_branches = reader.read("choice-to-branch.bin", "uint64", csr=True)
249
+ ats.branch_target = reader.read("branch-to-target.bin", "uint64")
250
+ assert ts.branch_probability_type == "double", "not implemented yet"
251
+ ats.branch_probabilities = reader.read("branch-probabilities.bin", "double")
252
+
253
+
254
+ def write_branch_files(writer: umbi.TarWriter, ats: umbi.ExplicitAts):
255
+ ts = ats.info.transition_system
256
+ if ts.num_branches > ts.num_choices:
257
+ writer.add(ats.choice_branches, "choice-to-branch.bin", "uint64", csr=True)
258
+ writer.add(ats.branch_target, "branch-to-target.bin", "uint64")
259
+ assert ts.branch_probability_type == "double", "not implemented yet"
260
+ writer.add(ats.branch_probabilities, "branch-probabilities.bin", "double")
261
+
262
+
263
+ def read_action_files(reader: umbi.TarReader, ats: umbi.ExplicitAts):
264
+ ts = ats.info.transition_system
265
+ if ts.time == "discrete":
266
+ if ts.num_players > 0:
267
+ ats.choice_to_action = reader.read("choice-to-action.bin", "uint32")
268
+ else:
269
+ ats.branch_to_action = reader.read("branch-to-action.bin", "uint32")
270
+ if "action-to-action-strings.bin" in reader.filenames and "action-strings.bin" in reader.filenames:
271
+ action_string_offset = reader.read("action-to-action-strings.bin", "uint32")
272
+ action_string_chars = reader.read("action-strings.bin", "char")
273
+ ats.action_to_string = []
274
+ for action in range(ts.num_actions):
275
+ action_string = action_string_chars[action_string_offset[action] : action_string_offset[action + 1]]
276
+ ats.action_to_string.append(action_string)
277
+
278
+
279
+ def write_action_files(writer: umbi.TarWriter, ats: umbi.ExplicitAts):
280
+ ts = ats.info.transition_system
281
+ if ts.time == "discrete":
282
+ if ts.num_players > 0:
283
+ writer.add(ats.choice_to_action, "choice-to-action.bin", "uint32")
284
+ else:
285
+ writer.add(ats.branch_to_action, "branch-to-action.bin", "uint32")
286
+ if ats.action_to_string is not None:
287
+ action_string_offset = [0]
288
+ action_string_chars = ""
289
+ for action, string in enumerate(ats.action_to_string):
290
+ action_string_chars += string
291
+ action_string_offset.append(len(action_string_chars))
292
+ writer.add(action_string_offset, "action-to-action-strings.bin", "uint32")
293
+ writer.add(action_string_chars, "action-strings.bin", "char")
294
+
295
+
296
+ def read_annotation(reader: umbi.TarReader, annotation_label: str, annotation_dict: dict[str, object]):
297
+ if annotation_dict is None:
298
+ return
299
+ path = f"annotations/{annotation_label}"
300
+ for key, annotation in annotation_dict.items():
301
+ annotation.data = dict()
302
+ for applies in annotation.applies_to:
303
+ filename = f"{path}/{key}/for-{applies}/values.bin"
304
+ # logging.debug(annotation.type)
305
+ annotation.data[applies] = reader.read(filename, annotation.type)
306
+
307
+
308
+ def write_annotation(writer: umbi.TarWriter, annotation_label: str, annotation_dict: dict[str, object]):
309
+ if annotation_dict is None:
310
+ return
311
+ path = f"annotations/{annotation_label}"
312
+ for key, annotation in annotation_dict.items():
313
+ for applies in annotation.applies_to:
314
+ assert applies in annotation.data
315
+ filename = f"{path}/{key}/for-{applies}/values.bin"
316
+ if annotation.type == "int":
317
+ logging.warning("variable annotation type is int, interpreting as int32")
318
+ annotation.type = "int32"
319
+ writer.add(annotation.data[applies], filename, annotation.type)
320
+
321
+
322
+ def read_annotation_files(reader: umbi.TarReader, ats: umbi.ExplicitAts):
323
+ read_annotation(reader, "aps", ats.info.annotations.aps)
324
+ read_annotation(reader, "rewards", ats.info.annotations.rewards)
325
+ read_annotation(reader, "variables", ats.info.annotations.variables)
326
+
327
+
328
+ def write_annotation_files(writer: umbi.TarWriter, ats: umbi.ExplicitAts):
329
+ write_annotation(writer, "aps", ats.info.annotations.aps)
330
+ write_annotation(writer, "rewards", ats.info.annotations.rewards)
331
+ write_annotation(writer, "variables", ats.info.annotations.variables)
332
+
333
+
334
+ def read_umb(tarpath: str) -> umbi.ExplicitAts:
335
+ """Read ATS from a .umb file."""
336
+ reader = umbi.TarReader(tarpath)
337
+ ats = umbi.ExplicitAts()
338
+ read_index_file(reader, ats)
339
+ read_state_files(reader, ats)
340
+ read_branch_files(reader, ats)
341
+ read_action_files(reader, ats)
342
+ read_annotation_files(reader, ats)
343
+ reader.warn_unread_files()
344
+ # ats.validate()
345
+ return ats
346
+
347
+
348
+ def write_umb(ats: umbi.ExplicitAts, tarpath: str):
349
+ """Store ATS to a .umb file."""
350
+ # ats.validate()
351
+ writer = umbi.TarWriter()
352
+ write_index_file(writer, ats)
353
+ write_state_files(writer, ats)
354
+ write_branch_files(writer, ats)
355
+ write_action_files(writer, ats)
356
+ write_annotation_files(writer, ats)
357
+ writer.write(tarpath)
358
+
359
+ # sanity check: try to read the resulting file
360
+ try:
361
+ read_umb(tarpath)
362
+ except Exception as e:
363
+ logging.warning(f"failed to read the resulted file {tarpath}, printing the error message below:")
364
+ logging.warning(e)
@@ -0,0 +1,68 @@
1
+ Metadata-Version: 2.4
2
+ Name: umbi
3
+ Version: 0.0.1
4
+ Summary: Library for binary encoding of annotated transition systems
5
+ Author-email: Roman Andriushchenko <roman.andriu@gmail.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Roman Andriushchenko
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/randriu/umbi
29
+ Requires-Python: >=3.9
30
+ Description-Content-Type: text/markdown
31
+ License-File: LICENSE
32
+ Requires-Dist: marshmallow
33
+ Requires-Dist: python-magic
34
+ Requires-Dist: tomli
35
+ Provides-Extra: dev
36
+ Requires-Dist: pytest; extra == "dev"
37
+ Requires-Dist: pip-tools; extra == "dev"
38
+ Requires-Dist: black; extra == "dev"
39
+ Requires-Dist: isort; extra == "dev"
40
+ Requires-Dist: toml-sort; extra == "dev"
41
+ Requires-Dist: twine; extra == "dev"
42
+ Requires-Dist: bumpver; extra == "dev"
43
+ Dynamic: license-file
44
+
45
+ # umbi
46
+
47
+ ## Installation:
48
+
49
+ (optional) create and activate a python environment:
50
+
51
+ ```
52
+ $ python -m venv venv
53
+ $ source venv/bin/activate
54
+ ```
55
+
56
+ Install `umbi` via
57
+ ```
58
+ (venv) $ pip install umbi
59
+ ```
60
+
61
+ ## Running umbi
62
+
63
+ Examples:
64
+ ```
65
+ (venv) $ umbi --import-umb /path/to/input.umb
66
+ (venv) $ umbi --import-umb /path/to/input.umb --export-umb /path/to/output.umb
67
+ (venv) $ umbi --import-umb /path/to/input.umb --export-umb /path/to/output.umb --log-level=DEBUG
68
+ ```
@@ -0,0 +1,17 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ umbi/__init__.py
5
+ umbi/__main__.py
6
+ umbi/ats.py
7
+ umbi/explicit_ats.py
8
+ umbi/io_bytes.py
9
+ umbi/io_json.py
10
+ umbi/io_tar.py
11
+ umbi/io_umb.py
12
+ umbi.egg-info/PKG-INFO
13
+ umbi.egg-info/SOURCES.txt
14
+ umbi.egg-info/dependency_links.txt
15
+ umbi.egg-info/entry_points.txt
16
+ umbi.egg-info/requires.txt
17
+ umbi.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ umbi = umbi.__main__:main
@@ -0,0 +1,12 @@
1
+ marshmallow
2
+ python-magic
3
+ tomli
4
+
5
+ [dev]
6
+ pytest
7
+ pip-tools
8
+ black
9
+ isort
10
+ toml-sort
11
+ twine
12
+ bumpver
@@ -0,0 +1 @@
1
+ umbi