thebundle 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bundle/__init__.py +38 -0
- bundle/_version.py +16 -0
- bundle/data/README.md +133 -0
- bundle/data/__init__.py +39 -0
- bundle/data/data.py +147 -0
- bundle/data/json.py +273 -0
- bundle/entity/README.md +59 -0
- bundle/entity/__init__.py +21 -0
- bundle/entity/entity.py +69 -0
- bundle/logs/__init__.py +123 -0
- bundle/process/README.md +92 -0
- bundle/process/__init__.py +27 -0
- bundle/process/_abc.py +59 -0
- bundle/process/asynchronous.py +114 -0
- bundle/process/synchronous.py +119 -0
- bundle/tasks/README.md +51 -0
- bundle/tasks/__init__.py +29 -0
- bundle/tasks/_abc.py +49 -0
- bundle/tasks/asynchronous.py +41 -0
- bundle/tasks/synchronous.py +41 -0
- bundle/tests/__init__.py +31 -0
- bundle/tests/tools/__init__.py +34 -0
- bundle/tests/tools/assertions.py +47 -0
- bundle/tests/tools/cprofile.py +95 -0
- bundle/tests/tools/data.py +52 -0
- bundle/tests/tools/data_json.py +102 -0
- bundle/tests/tools/entity.py +26 -0
- bundle/tests/tools/process.py +91 -0
- bundle/tests/tools/tasks.py +87 -0
- references/windows/ref/JSONData.json +3 -0
- references/windows/ref/NestedDatajson.json +9 -0
- references/windows/ref/TestAsyncProcess.json +16 -0
- references/windows/ref/TestAsyncTask.json +12 -0
- references/windows/ref/TestEntity.json +10 -0
- references/windows/ref/TestProcess.json +16 -0
- references/windows/ref/TestStreamingAsyncProcess.json +16 -0
- references/windows/ref/TestStreamingProcess.json +16 -0
- references/windows/ref/TestTask.json +12 -0
- references/windows/schema/JSONData.json +13 -0
- references/windows/schema/NestedDatajson.json +39 -0
- references/windows/schema/TestAsyncProcess.json +70 -0
- references/windows/schema/TestAsyncTask.json +54 -0
- references/windows/schema/TestEntity.json +46 -0
- references/windows/schema/TestProcess.json +70 -0
- references/windows/schema/TestStreamingAsyncProcess.json +70 -0
- references/windows/schema/TestStreamingProcess.json +70 -0
- references/windows/schema/TestTask.json +54 -0
- thebundle-0.0.1.dist-info/LICENSE +18 -0
- thebundle-0.0.1.dist-info/METADATA +23 -0
- thebundle-0.0.1.dist-info/RECORD +52 -0
- thebundle-0.0.1.dist-info/WHEEL +5 -0
- thebundle-0.0.1.dist-info/top_level.txt +2 -0
bundle/__init__.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Copyright 2023 HorusElohim
|
|
2
|
+
|
|
3
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
4
|
+
# or more contributor license agreements. See the NOTICE file
|
|
5
|
+
# distributed with this work for additional information
|
|
6
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
7
|
+
# to you under the Apache License, Version 2.0 (the
|
|
8
|
+
# "License"); you may not use this file except in compliance
|
|
9
|
+
# with the License. You may obtain a copy of the License at
|
|
10
|
+
|
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
|
|
13
|
+
# Unless required by applicable law or agreed to in writing,
|
|
14
|
+
# software distributed under the License is distributed on an
|
|
15
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
16
|
+
# KIND, either express or implied. See the License for the
|
|
17
|
+
# specific language governing permissions and limitations
|
|
18
|
+
# under the License.
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
import time
|
|
22
|
+
from datetime import datetime
|
|
23
|
+
import logging
|
|
24
|
+
import typing
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
|
|
27
|
+
from ._version import version, version_tuple
|
|
28
|
+
from .logs import setup_logging, LOGGING_LEVEL, LogEmoji
|
|
29
|
+
|
|
30
|
+
LOGGER = setup_logging(log_level=LOGGING_LEVEL, to_json=True)
|
|
31
|
+
|
|
32
|
+
from . import data
|
|
33
|
+
from . import entity
|
|
34
|
+
from . import tasks
|
|
35
|
+
from . import process
|
|
36
|
+
from . import tests
|
|
37
|
+
|
|
38
|
+
LOGGER.debug("bundle loaded")
|
bundle/_version.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# file generated by setuptools_scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
TYPE_CHECKING = False
|
|
4
|
+
if TYPE_CHECKING:
|
|
5
|
+
from typing import Tuple, Union
|
|
6
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
7
|
+
else:
|
|
8
|
+
VERSION_TUPLE = object
|
|
9
|
+
|
|
10
|
+
version: str
|
|
11
|
+
__version__: str
|
|
12
|
+
__version_tuple__: VERSION_TUPLE
|
|
13
|
+
version_tuple: VERSION_TUPLE
|
|
14
|
+
|
|
15
|
+
__version__ = version = '0.0.1'
|
|
16
|
+
__version_tuple__ = version_tuple = (0, 0, 1)
|
bundle/data/README.md
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# data
|
|
2
|
+
|
|
3
|
+
The data subpackage in BUNDLE introduces advanced data handling capabilities, focusing on the Dataclass and JSONData classes. These classes enhance Python's standard data handling methods, providing efficient and effective ways to manage complex data structures.
|
|
4
|
+
|
|
5
|
+
*data.py*
|
|
6
|
+
---
|
|
7
|
+
The data.py module define the class `Dataclass``.
|
|
8
|
+
|
|
9
|
+
## Dataclass
|
|
10
|
+
Dataclass extends the standard Python dataclass to facilitate easy conversions between dataclass instances and dictionaries, addressing the need for streamlined data handling in Python.
|
|
11
|
+
|
|
12
|
+
### Features
|
|
13
|
+
|
|
14
|
+
* Python class -> dict
|
|
15
|
+
* dict -> Python class
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
### Definition
|
|
19
|
+
|
|
20
|
+
It's preferred to initialize always the class attribute by `bundle.data.field` with a *default constructor* as for the dataclasses.
|
|
21
|
+
|
|
22
|
+
```python
|
|
23
|
+
import bundle
|
|
24
|
+
|
|
25
|
+
# Define a custom data class
|
|
26
|
+
class MyData(bundle.data.Dataclass):
|
|
27
|
+
name: str = bundle.data.field(default_constructor=str)
|
|
28
|
+
value: int = bundle.data.field(default_constructor=int)
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
__Hint__: if need to assign default arguments in the CTor then use a lambda
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
str_example: str = bundle.data.field(default_constructor=lambda: str("Nice"))
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
### Instantiation
|
|
38
|
+
|
|
39
|
+
The instantiation happen as a normal dataclass.
|
|
40
|
+
|
|
41
|
+
### Dataclass()
|
|
42
|
+
Using the constructor.
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
```python
|
|
46
|
+
my_data = MyData(name="Example", value=42)
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### from_dict: dict -> Python class
|
|
50
|
+
Using the class method from_dict. Convert the dict into the class.
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
new_data = MyData.from_dict(data_dict)
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### as_dict: Python class -> dict
|
|
57
|
+
Covert class to dict.
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
data_dict = my_data.as_dict()
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
*json.py*
|
|
65
|
+
---
|
|
66
|
+
The json.py module define the class `JSONData``.
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
## JSONData
|
|
70
|
+
The JSONData class in the data subpackage of BUNDLE extends the capabilities of the Dataclass by enabling serialization to and from JSON. This class is ideal for applications requiring robust data interchange, configuration management, or data persistence. JSONData provides a seamless way to handle JSON serialization and deserialization, including validation with jsonschema. It is designed to make working with JSON data in Python more intuitive, efficient, and reliable.
|
|
71
|
+
|
|
72
|
+
### Features
|
|
73
|
+
* JSON Serialization: Convert Python objects into JSON format, supporting both simple and complex data structures.
|
|
74
|
+
* JSON Deserialization: Reconstruct Python objects from JSON, preserving the data's structure and types.
|
|
75
|
+
* JSON Schema Validation: Validate JSON data against predefined schemas to ensure data integrity and format consistency.
|
|
76
|
+
* Custom Serialization: Handle non-JSON serializable objects using pickle, extending the range of data types that can be serialized.
|
|
77
|
+
|
|
78
|
+
### Definition
|
|
79
|
+
As Dataclass.
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
import bundle
|
|
83
|
+
|
|
84
|
+
@bundle.data.dataclass
|
|
85
|
+
class MyJsonData(bundle.data.JSONData):
|
|
86
|
+
name: str = "Default Name"
|
|
87
|
+
value: int = 0
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Instantiation
|
|
91
|
+
As Dataclass.
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
my_json_data = MyJsonData(name="Example", value=42)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### Serialization
|
|
98
|
+
Convert a JSONData instance to a JSON string or file:
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
# Serialize to JSON string
|
|
102
|
+
json_str = my_json_data.as_json()
|
|
103
|
+
print(json_str)
|
|
104
|
+
|
|
105
|
+
# Serialize to JSON file
|
|
106
|
+
json_file_path = "data.json"
|
|
107
|
+
my_json_data.dump_json(json_file_path)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
__Hint__: During the serialization any Python object will be serialized with pickle.
|
|
111
|
+
|
|
112
|
+
### Deserialization
|
|
113
|
+
Load a JSONData instance from a JSON string or file:
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
# Load from JSON file
|
|
117
|
+
loaded_data = MyJsonData.from_json(json_file_path)
|
|
118
|
+
print(loaded_data)
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
### JSON Schema Validation
|
|
123
|
+
Generate and validate against a JSON schema:
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
# Generate JSON schema
|
|
127
|
+
json_schema_path = "schema.json"
|
|
128
|
+
MyJsonData.to_jsonschema(json_schema_path)
|
|
129
|
+
# Validate the instance against the schema
|
|
130
|
+
is_valid = my_json_data.is_valid_by_jsonschema(json_schema_path)
|
|
131
|
+
print(f"Validation result: {is_valid}")
|
|
132
|
+
```
|
|
133
|
+
|
bundle/data/__init__.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Copyright 2023 HorusElohim
|
|
2
|
+
|
|
3
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
4
|
+
# or more contributor license agreements. See the NOTICE file
|
|
5
|
+
# distributed with this work for additional information
|
|
6
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
7
|
+
# to you under the Apache License, Version 2.0 (the
|
|
8
|
+
# "License"); you may not use this file except in compliance
|
|
9
|
+
# with the License. You may obtain a copy of the License at
|
|
10
|
+
|
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
|
|
13
|
+
# Unless required by applicable law or agreed to in writing,
|
|
14
|
+
# software distributed under the License is distributed on an
|
|
15
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
16
|
+
# KIND, either express or implied. See the License for the
|
|
17
|
+
# specific language governing permissions and limitations
|
|
18
|
+
# under the License.
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
from dataclasses import asdict, dataclass, field, fields
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
from .. import logging, LogEmoji
|
|
25
|
+
|
|
26
|
+
LOGGER = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def check_file_exist(path: str | Path, not_exist_raise=False) -> Path:
|
|
30
|
+
if not isinstance(path, Path | str):
|
|
31
|
+
raise ValueError(f"{type(path)=}, instead of [Path | str]")
|
|
32
|
+
path = Path(path)
|
|
33
|
+
if not_exist_raise and not path.exists():
|
|
34
|
+
raise ValueError(f"{path=}: ❌ NOT EXIST")
|
|
35
|
+
return path
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
from .data import Dataclass
|
|
39
|
+
from .json import JSONData
|
bundle/data/data.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# Copyright 2023 HorusElohim
|
|
2
|
+
|
|
3
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
4
|
+
# or more contributor license agreements. See the NOTICE file
|
|
5
|
+
# distributed with this work for additional information
|
|
6
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
7
|
+
# to you under the Apache License, Version 2.0 (the
|
|
8
|
+
# "License"); you may not use this file except in compliance
|
|
9
|
+
# with the License. You may obtain a copy of the License at
|
|
10
|
+
|
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
|
|
13
|
+
# Unless required by applicable law or agreed to in writing,
|
|
14
|
+
# software distributed under the License is distributed on an
|
|
15
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
16
|
+
# KIND, either express or implied. See the License for the
|
|
17
|
+
# specific language governing permissions and limitations
|
|
18
|
+
# under the License.
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
from typing import List
|
|
24
|
+
from typing import Any, Dict, List, Type, TypeVar, Union
|
|
25
|
+
from pprint import pformat
|
|
26
|
+
|
|
27
|
+
from . import dataclass, field, asdict, LOGGER, LogEmoji
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class Dataclass:
|
|
32
|
+
"""
|
|
33
|
+
A utility wrapper around the standard Python dataclass to facilitate easy
|
|
34
|
+
conversions between dataclass instances and dictionaries.
|
|
35
|
+
|
|
36
|
+
Attributes:
|
|
37
|
+
dataclass: Reference to the dataclass decorator.
|
|
38
|
+
field: Helper function to define fields with default values or other attributes.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
class_type: str = field(default_factory=str)
|
|
42
|
+
dataclass = dataclass
|
|
43
|
+
field = field
|
|
44
|
+
|
|
45
|
+
def __post_init__(self):
|
|
46
|
+
# Set the class_type attribute to the name of the class
|
|
47
|
+
# This will be set for every instance of Dataclass or its subclasses
|
|
48
|
+
self.class_type = self.__class__.__name__
|
|
49
|
+
|
|
50
|
+
@staticmethod
|
|
51
|
+
def _is_annotation_present_in_mro(cls, field_name):
|
|
52
|
+
for base_class in cls.__mro__:
|
|
53
|
+
if field_name in getattr(base_class, "__annotations__", {}):
|
|
54
|
+
return True
|
|
55
|
+
return False
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def _recursive_dataclass_from_dict(target_class: Dataclass, source_dict: Union[Dict, List]) -> Dataclass:
|
|
59
|
+
"""
|
|
60
|
+
Recursively convert a dictionary to a dataclass instance.
|
|
61
|
+
"""
|
|
62
|
+
try:
|
|
63
|
+
# Log the expected fields from annotations in the MRO and the actual keys from source_dict
|
|
64
|
+
expected_fields = set()
|
|
65
|
+
if not hasattr(target_class, "__mro__"):
|
|
66
|
+
raise RuntimeError(f"{target_class=} not a class!")
|
|
67
|
+
for base_class in target_class.__mro__:
|
|
68
|
+
expected_fields.update(getattr(base_class, "__annotations__", {}).keys())
|
|
69
|
+
|
|
70
|
+
actual_keys = set(source_dict.keys())
|
|
71
|
+
LOGGER.debug(f"Expected fields for {target_class} from MRO: {sorted(expected_fields)}")
|
|
72
|
+
LOGGER.debug(f"Actual keys in source_dict: {sorted(actual_keys)}")
|
|
73
|
+
|
|
74
|
+
initialized_fields = {}
|
|
75
|
+
for field in source_dict:
|
|
76
|
+
LOGGER.debug(f"working on field {field}")
|
|
77
|
+
if Dataclass._is_annotation_present_in_mro(target_class, field):
|
|
78
|
+
field_type = next(
|
|
79
|
+
(
|
|
80
|
+
getattr(base_class, "__annotations__", {}).get(field)
|
|
81
|
+
for base_class in target_class.__mro__
|
|
82
|
+
if field in getattr(base_class, "__annotations__", {})
|
|
83
|
+
),
|
|
84
|
+
None,
|
|
85
|
+
)
|
|
86
|
+
if isinstance(source_dict[field], dict):
|
|
87
|
+
initialized_fields[field] = Dataclass._recursive_dataclass_from_dict(field_type, source_dict[field])
|
|
88
|
+
else:
|
|
89
|
+
initialized_fields[field] = source_dict[field]
|
|
90
|
+
else:
|
|
91
|
+
# If the field is not in annotations of any base classes, log this information
|
|
92
|
+
LOGGER.warning(
|
|
93
|
+
f"Field '%s' is not expected according to the annotations in the MRO of '%s'",
|
|
94
|
+
field,
|
|
95
|
+
target_class.__name__,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Log any missing fields
|
|
99
|
+
missing_fields = expected_fields - actual_keys
|
|
100
|
+
if missing_fields:
|
|
101
|
+
msg = f"Missing fields in source_dict that are expected in MRO of target_class: {missing_fields}"
|
|
102
|
+
LOGGER.error(msg)
|
|
103
|
+
raise ValueError(msg)
|
|
104
|
+
|
|
105
|
+
return target_class(**initialized_fields)
|
|
106
|
+
except KeyError as e:
|
|
107
|
+
# Key is missing in the dictionary, log an error before raising an exception
|
|
108
|
+
LOGGER.error(f"Key missing in source_dict for the expected field in MRO of target_class: {e}")
|
|
109
|
+
raise ValueError(f"Key missing in source_dict for the expected field in MRO of target_class: {target_class}: {e}")
|
|
110
|
+
|
|
111
|
+
@classmethod
|
|
112
|
+
def from_dict(cls: Dataclass, d: Dict[str, Any]) -> Dataclass:
|
|
113
|
+
"""
|
|
114
|
+
Create an instance of the dataclass from a dictionary.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
d: The dictionary containing data to be converted to a dataclass instance.
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
An instance of the Dataclass initialized with data from the dictionary.
|
|
121
|
+
|
|
122
|
+
Raises:
|
|
123
|
+
ValueError: If a required key is missing in the dictionary.
|
|
124
|
+
TypeError: If there's a type mismatch or unexpected structure in the dictionary.
|
|
125
|
+
"""
|
|
126
|
+
data_class = Dataclass._recursive_dataclass_from_dict(cls, d)
|
|
127
|
+
LOGGER.debug(LogEmoji.success)
|
|
128
|
+
return data_class
|
|
129
|
+
|
|
130
|
+
def as_dict(self) -> Dict[str, Any]:
|
|
131
|
+
"""
|
|
132
|
+
Convert the dataclass instance to a dictionary.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
A dictionary representation of the dataclass instance.
|
|
136
|
+
"""
|
|
137
|
+
data_dict = asdict(self)
|
|
138
|
+
LOGGER.debug(LogEmoji.success)
|
|
139
|
+
return data_dict
|
|
140
|
+
|
|
141
|
+
def __str__(self) -> str:
|
|
142
|
+
class_header = f"--------\n{self.class_type}\n--------"
|
|
143
|
+
formatted_data = pformat(self.as_dict(), indent=4, width=80, depth=None)
|
|
144
|
+
return f"{class_header}\n{formatted_data}\n--------"
|
|
145
|
+
|
|
146
|
+
def __hash__(self) -> int:
|
|
147
|
+
return hash(self.as_dict())
|
bundle/data/json.py
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
# Copyright 2023 HorusElohim
|
|
2
|
+
|
|
3
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
4
|
+
# or more contributor license agreements. See the NOTICE file
|
|
5
|
+
# distributed with this work for additional information
|
|
6
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
7
|
+
# to you under the Apache License, Version 2.0 (the
|
|
8
|
+
# "License"); you may not use this file except in compliance
|
|
9
|
+
# with the License. You may obtain a copy of the License at
|
|
10
|
+
|
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
|
|
13
|
+
# Unless required by applicable law or agreed to in writing,
|
|
14
|
+
# software distributed under the License is distributed on an
|
|
15
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
16
|
+
# KIND, either express or implied. See the License for the
|
|
17
|
+
# specific language governing permissions and limitations
|
|
18
|
+
# under the License.
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import json as js
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
import logging
|
|
26
|
+
import abc
|
|
27
|
+
import jsonschema
|
|
28
|
+
from typing import Any, Dict, Union, Type, Set, get_type_hints
|
|
29
|
+
import pickle
|
|
30
|
+
import traceback
|
|
31
|
+
import dataclasses
|
|
32
|
+
from . import Dataclass, dataclass, fields, check_file_exist, LogEmoji
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def is_json_serializable(value: Any) -> bool:
|
|
36
|
+
"""Check if a value is JSON serializable."""
|
|
37
|
+
try:
|
|
38
|
+
js.dumps(value, cls=js.JSONEncoder)
|
|
39
|
+
return True
|
|
40
|
+
except (TypeError, OverflowError):
|
|
41
|
+
return False
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
LOGGER = logging.getLogger(__name__)
|
|
45
|
+
|
|
46
|
+
DEFAULT_SCHEMA = "http://json-schema.org/draft-07/schema#"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class CustomJSONEncoder(js.JSONEncoder):
|
|
50
|
+
"""Extended JSON encoder to handle special data types using pickle for non-JSON serializable objects."""
|
|
51
|
+
|
|
52
|
+
def default(self, obj: Any) -> Any:
|
|
53
|
+
# Convert custom dataclass objects to dictionaries
|
|
54
|
+
LOGGER.debug(f"CustomJSONEncoder for {type(obj)}")
|
|
55
|
+
if isinstance(obj, Dataclass):
|
|
56
|
+
return obj.as_dict()
|
|
57
|
+
if isinstance(obj, Path):
|
|
58
|
+
return {"_path_": str(obj)}
|
|
59
|
+
try:
|
|
60
|
+
# Try to serialize using the parent class method
|
|
61
|
+
return super().default(obj)
|
|
62
|
+
except TypeError:
|
|
63
|
+
# If the object is not JSON serializable, use pickle
|
|
64
|
+
pickled_data = pickle.dumps(obj).hex()
|
|
65
|
+
return {"_pickle_": pickled_data}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class CustomJSONDecoder(js.JSONDecoder):
|
|
69
|
+
"""Custom JSON decoder to handle pickled data types."""
|
|
70
|
+
|
|
71
|
+
def __init__(self, *args, **kwargs):
|
|
72
|
+
super().__init__(object_hook=self.object_hook, *args, **kwargs)
|
|
73
|
+
|
|
74
|
+
def object_hook(self, dct: Dict) -> Any:
|
|
75
|
+
if dct.get("_pickle_"):
|
|
76
|
+
return pickle.loads(bytes.fromhex(dct["_pickle_"]))
|
|
77
|
+
if dct.get("_path_"):
|
|
78
|
+
return Path(dct["_path_"])
|
|
79
|
+
return dct
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class JSONDataABC(abc.ABC):
|
|
83
|
+
"""Abstract base class representing objects that can be serialized to and from JSON."""
|
|
84
|
+
|
|
85
|
+
@abc.abstractclassmethod
|
|
86
|
+
def from_json(cls: Type[JSONData], path: Union[str, Path]) -> JSONData:
|
|
87
|
+
"""Deserialize from a JSON file."""
|
|
88
|
+
pass
|
|
89
|
+
|
|
90
|
+
@abc.abstractmethod
|
|
91
|
+
def dump_json(self, path: Union[str, Path]) -> None:
|
|
92
|
+
"""Serialize to a JSON file."""
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
@property
|
|
96
|
+
@abc.abstractmethod
|
|
97
|
+
def json_encoder(self) -> CustomJSONEncoder:
|
|
98
|
+
"""Get the JSON encoder for this class."""
|
|
99
|
+
pass
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@dataclass(unsafe_hash=True)
|
|
103
|
+
class JSONData(JSONDataABC, Dataclass):
|
|
104
|
+
"""A dataclass that can be serialized to and from JSON."""
|
|
105
|
+
|
|
106
|
+
json_decoder = CustomJSONDecoder
|
|
107
|
+
json_encoder = CustomJSONEncoder
|
|
108
|
+
|
|
109
|
+
@staticmethod
|
|
110
|
+
def load_json_file(json_path: Union[Path, str], decoder: js.JSONDecoder = CustomJSONDecoder) -> Union[dict, None]:
|
|
111
|
+
json_path = check_file_exist(json_path, not_exist_raise=True)
|
|
112
|
+
try:
|
|
113
|
+
with Path(json_path).open("r") as file:
|
|
114
|
+
obj_dict = js.load(file, cls=decoder)
|
|
115
|
+
LOGGER.debug(f"{json_path=} {LogEmoji.success}")
|
|
116
|
+
return obj_dict
|
|
117
|
+
except jsonschema.ValidationError:
|
|
118
|
+
LOGGER.error(f"{json_path=} {LogEmoji.failed} \n{traceback.format_exc()}")
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
@staticmethod
|
|
122
|
+
def dump_json_file(
|
|
123
|
+
obj_dict: Dict,
|
|
124
|
+
path: Union[Path, str],
|
|
125
|
+
encoder: js.JSONEncoder = CustomJSONEncoder,
|
|
126
|
+
) -> Union[dict, None]:
|
|
127
|
+
path = check_file_exist(path)
|
|
128
|
+
try:
|
|
129
|
+
with Path(path).open("w") as file:
|
|
130
|
+
js.dump(obj_dict, file, indent=4, cls=encoder)
|
|
131
|
+
LOGGER.debug(f"{path=} {LogEmoji.success}")
|
|
132
|
+
return obj_dict
|
|
133
|
+
except jsonschema.ValidationError:
|
|
134
|
+
LOGGER.error(f"{path=} {LogEmoji.failed} \n{traceback.format_exc()}")
|
|
135
|
+
return None
|
|
136
|
+
|
|
137
|
+
@classmethod
|
|
138
|
+
def from_json(cls: Type[JSONData], path: Union[str, Path]) -> JSONData:
|
|
139
|
+
"""Load the dataclass from a JSON file."""
|
|
140
|
+
obj_dict = JSONData.load_json_file(json_path=path, decoder=cls.json_decoder)
|
|
141
|
+
LOGGER.debug(f"{path=}{LogEmoji.success}{obj_dict}")
|
|
142
|
+
return cls.from_dict(obj_dict)
|
|
143
|
+
|
|
144
|
+
def dump_json(self, path: Union[str, Path]) -> None:
|
|
145
|
+
"""Save the dataclass to a JSON file."""
|
|
146
|
+
self.dump_json_file(obj_dict=self.as_dict(), path=path, encoder=self.json_encoder)
|
|
147
|
+
LOGGER.debug(f"{path=}{LogEmoji.success}")
|
|
148
|
+
|
|
149
|
+
def as_json(self) -> None:
|
|
150
|
+
"""Convert the dataclass to a JSON string."""
|
|
151
|
+
return js.dumps(self.as_dict(), indent=4, cls=self.json_encoder)
|
|
152
|
+
|
|
153
|
+
@classmethod
|
|
154
|
+
def generate_jsonschema(
|
|
155
|
+
cls,
|
|
156
|
+
target_cls: Type[JSONData] = None,
|
|
157
|
+
processed_classes: Set[Type] = None,
|
|
158
|
+
) -> Dict[str, Any]:
|
|
159
|
+
"""Generate a JSON schema for this or a given dataclass."""
|
|
160
|
+
if processed_classes is None:
|
|
161
|
+
processed_classes = set()
|
|
162
|
+
|
|
163
|
+
target_cls = target_cls or cls
|
|
164
|
+
if target_cls in processed_classes:
|
|
165
|
+
return {"type": "object", "title": target_cls.__name__}
|
|
166
|
+
|
|
167
|
+
processed_classes.add(target_cls)
|
|
168
|
+
|
|
169
|
+
if not dataclasses.is_dataclass(target_cls):
|
|
170
|
+
raise TypeError(f"Expected a dataclass type, got {target_cls}")
|
|
171
|
+
|
|
172
|
+
type_to_schema = {
|
|
173
|
+
int: "integer",
|
|
174
|
+
float: "number",
|
|
175
|
+
str: "string",
|
|
176
|
+
bool: "boolean",
|
|
177
|
+
list: "array",
|
|
178
|
+
dict: "object",
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
def get_pickled_schema() -> Dict[str, Any]:
|
|
182
|
+
return {
|
|
183
|
+
"type": "object",
|
|
184
|
+
"properties": {
|
|
185
|
+
"_pickle_": {"type": "boolean", "const": True},
|
|
186
|
+
"data": {"type": "string"},
|
|
187
|
+
},
|
|
188
|
+
"required": ["_pickle_", "data"],
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
def get_schema_for_type(type_hint: Type) -> Dict[str, Any]:
|
|
192
|
+
LOGGER.debug(f"Working on {type_hint=}")
|
|
193
|
+
if type_hint in type_to_schema:
|
|
194
|
+
return {"type": type_to_schema[type_hint]}
|
|
195
|
+
elif getattr(type_hint, "__origin__", None) == list:
|
|
196
|
+
item_type = type_hint.__args__[0]
|
|
197
|
+
return {"type": "array", "items": get_schema_for_type(item_type)}
|
|
198
|
+
elif dataclasses.is_dataclass(type_hint):
|
|
199
|
+
LOGGER.debug(f"Recursive call on dataclass")
|
|
200
|
+
return cls.generate_jsonschema(type_hint, processed_classes)
|
|
201
|
+
else:
|
|
202
|
+
# Fallback for other non-dataclass types
|
|
203
|
+
return get_pickled_schema()
|
|
204
|
+
|
|
205
|
+
properties = {key: get_schema_for_type(type_hint) for key, type_hint in get_type_hints(target_cls).items()}
|
|
206
|
+
|
|
207
|
+
required_fields = [f.name for f in fields(target_cls)]
|
|
208
|
+
|
|
209
|
+
return {
|
|
210
|
+
"$schema": DEFAULT_SCHEMA,
|
|
211
|
+
"title": target_cls.__name__,
|
|
212
|
+
"type": "object",
|
|
213
|
+
"properties": properties,
|
|
214
|
+
"required": required_fields,
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
@classmethod
|
|
218
|
+
def to_jsonschema(cls, path: Union[str, Path]):
|
|
219
|
+
"""Save a JSON schema for this dataclass."""
|
|
220
|
+
|
|
221
|
+
report = f"to_schema({path})"
|
|
222
|
+
try:
|
|
223
|
+
with Path(path).open("w") as file:
|
|
224
|
+
js.dump(cls.generate_jsonschema(), file, indent=4)
|
|
225
|
+
LOGGER.debug(f"{report}{LogEmoji.success} ")
|
|
226
|
+
except Exception:
|
|
227
|
+
LOGGER.error(f"{report} {LogEmoji.failed} \n{traceback.format_exc()}")
|
|
228
|
+
|
|
229
|
+
@classmethod
|
|
230
|
+
def schema_validation(
|
|
231
|
+
cls: Type[JSONData],
|
|
232
|
+
json_path: Union[Path, str, None] = None,
|
|
233
|
+
jsonschema_path: Union[Path, str, None] = None,
|
|
234
|
+
) -> bool:
|
|
235
|
+
"""Validate JSON data against the schema."""
|
|
236
|
+
# Load the data from the provided path
|
|
237
|
+
report = f"schema_validation{json_path=}"
|
|
238
|
+
if json_path:
|
|
239
|
+
with Path(json_path).open("r") as file:
|
|
240
|
+
data = js.load(file)
|
|
241
|
+
else:
|
|
242
|
+
data = cls.as_dict()
|
|
243
|
+
|
|
244
|
+
# Get the schema for the current dataclass
|
|
245
|
+
if jsonschema_path:
|
|
246
|
+
with Path(jsonschema_path).open("r") as file:
|
|
247
|
+
schema = js.load(file)
|
|
248
|
+
else:
|
|
249
|
+
schema = cls.generate_jsonschema()
|
|
250
|
+
|
|
251
|
+
# Validate the data against the schema
|
|
252
|
+
try:
|
|
253
|
+
jsonschema.validate(data, schema)
|
|
254
|
+
LOGGER.debug(f"{report} {LogEmoji.success}")
|
|
255
|
+
return True
|
|
256
|
+
except jsonschema.ValidationError:
|
|
257
|
+
LOGGER.error(f"{report} {LogEmoji.failed} \n{traceback.format_exc()}")
|
|
258
|
+
return False
|
|
259
|
+
|
|
260
|
+
def is_valid_by_jsonschema(self, jsonschema_path: Union[Path, str, None] = None) -> bool:
|
|
261
|
+
"""Validate JSON data against the schema."""
|
|
262
|
+
try:
|
|
263
|
+
# Load the data from the provided path
|
|
264
|
+
data = self.as_dict()
|
|
265
|
+
# Get the schema for the current dataclass
|
|
266
|
+
jsonschema_dict = JSONData.load_json_file(jsonschema_path) if jsonschema_path else self.generate_jsonschema()
|
|
267
|
+
# Validate the data against the schema
|
|
268
|
+
jsonschema.validate(data, jsonschema_dict)
|
|
269
|
+
LOGGER.debug(f"{jsonschema_path=} {LogEmoji.success}")
|
|
270
|
+
return True
|
|
271
|
+
except jsonschema.ValidationError:
|
|
272
|
+
LOGGER.error(f"{jsonschema_path=} {LogEmoji.failed} \n{traceback.format_exc()}")
|
|
273
|
+
return False
|