xmlgenerator 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xmlgenerator/__init__.py +0 -0
- xmlgenerator/arguments.py +138 -0
- xmlgenerator/bootstrap.py +73 -0
- xmlgenerator/configuration.py +135 -0
- xmlgenerator/generator.py +387 -0
- xmlgenerator/randomization.py +40 -0
- xmlgenerator/substitution.py +118 -0
- xmlgenerator/validation.py +52 -0
- xmlgenerator-0.1.0.dist-info/METADATA +314 -0
- xmlgenerator-0.1.0.dist-info/RECORD +14 -0
- xmlgenerator-0.1.0.dist-info/WHEEL +5 -0
- xmlgenerator-0.1.0.dist-info/entry_points.txt +2 -0
- xmlgenerator-0.1.0.dist-info/licenses/LICENSE +21 -0
- xmlgenerator-0.1.0.dist-info/top_level.txt +1 -0
xmlgenerator/__init__.py
ADDED
File without changes
|
@@ -0,0 +1,138 @@
|
|
1
|
+
import sys
|
2
|
+
from argparse import ArgumentParser, HelpFormatter
|
3
|
+
from pathlib import Path
|
4
|
+
|
5
|
+
import shtab
|
6
|
+
|
7
|
+
|
8
|
+
class MyParser(ArgumentParser):
|
9
|
+
def error(self, message):
|
10
|
+
sys.stderr.write('error: %s\n' % message)
|
11
|
+
self.print_help()
|
12
|
+
sys.exit(2)
|
13
|
+
|
14
|
+
|
15
|
+
class CustomHelpFormatter(HelpFormatter):
|
16
|
+
def __init__(self, prog):
|
17
|
+
super().__init__(prog, max_help_position=36, width=120)
|
18
|
+
|
19
|
+
|
20
|
+
def _get_parser():
|
21
|
+
parser = MyParser(
|
22
|
+
prog='xmlgenerator',
|
23
|
+
description='Generates XML documents from XSD schemas',
|
24
|
+
formatter_class=CustomHelpFormatter
|
25
|
+
)
|
26
|
+
|
27
|
+
source_arg = parser.add_argument(
|
28
|
+
nargs='+',
|
29
|
+
metavar="xsd",
|
30
|
+
dest="source_paths",
|
31
|
+
help="paths to xsd schema(s) or directory with xsd schemas"
|
32
|
+
)
|
33
|
+
parser.add_argument(
|
34
|
+
"-c", "--config",
|
35
|
+
metavar="<config.yml>",
|
36
|
+
dest="config_yaml",
|
37
|
+
help="pass yaml configuration file"
|
38
|
+
)
|
39
|
+
output_arg = parser.add_argument(
|
40
|
+
"-o", "--output",
|
41
|
+
metavar="<output.xml>",
|
42
|
+
dest="output_path",
|
43
|
+
help="save output to dir or file"
|
44
|
+
)
|
45
|
+
parser.add_argument(
|
46
|
+
"-p", "--pretty",
|
47
|
+
action="store_true",
|
48
|
+
help="prettify output XML"
|
49
|
+
)
|
50
|
+
parser.add_argument(
|
51
|
+
"-v", "--validation",
|
52
|
+
metavar="<validation>",
|
53
|
+
choices=["none", "schema", "schematron"],
|
54
|
+
default="schema",
|
55
|
+
help="validate generated XML document (none, schema, schematron, default is schema)"
|
56
|
+
)
|
57
|
+
parser.add_argument(
|
58
|
+
"-ff", "--fail-fast",
|
59
|
+
action="store_true",
|
60
|
+
default="true",
|
61
|
+
help="terminate execution on validation error (default is true)"
|
62
|
+
)
|
63
|
+
parser.add_argument(
|
64
|
+
"-e", "--encoding",
|
65
|
+
metavar="<encoding>",
|
66
|
+
choices=["utf-8", "windows-1251"],
|
67
|
+
default="utf-8",
|
68
|
+
help="output XML encoding (utf-8, windows-1251, default is utf-8)"
|
69
|
+
)
|
70
|
+
parser.add_argument(
|
71
|
+
"--seed",
|
72
|
+
metavar="<seed>",
|
73
|
+
help="set randomization seed"
|
74
|
+
)
|
75
|
+
parser.add_argument(
|
76
|
+
"-d", "--debug",
|
77
|
+
action="store_true",
|
78
|
+
help="enable debug mode"
|
79
|
+
)
|
80
|
+
parser.add_argument(
|
81
|
+
"-V", "--version",
|
82
|
+
action='version',
|
83
|
+
version='%(prog)s 0.1.0',
|
84
|
+
help="shows current version"
|
85
|
+
)
|
86
|
+
|
87
|
+
# add shell completions
|
88
|
+
source_arg.complete = shtab.FILE
|
89
|
+
output_arg.complete = shtab.FILE
|
90
|
+
shtab.add_argument_to(parser, ["-C", "--completion"], "print shell completion script (bash, zsh, tcsh)")
|
91
|
+
completion_act = [a for a in parser._actions if a.dest == 'completion']
|
92
|
+
if completion_act:
|
93
|
+
completion_act[0].metavar = '<shell>'
|
94
|
+
|
95
|
+
return parser
|
96
|
+
|
97
|
+
|
98
|
+
def parse_args():
|
99
|
+
parser = _get_parser()
|
100
|
+
args = parser.parse_args()
|
101
|
+
|
102
|
+
if args.config_yaml:
|
103
|
+
config_path = Path(args.config_yaml)
|
104
|
+
if not config_path.exists() or not config_path.is_file():
|
105
|
+
parser.error(f"configuration file {config_path} does not exist.")
|
106
|
+
|
107
|
+
# Собираем все .xsd файлы
|
108
|
+
xsd_files = _collect_xsd_files(args.source_paths, parser)
|
109
|
+
|
110
|
+
# Обработка пути вывода
|
111
|
+
output_path = Path(args.output_path) if args.output_path else None
|
112
|
+
|
113
|
+
# Проверка: если несколько XSD файлов, то output должен быть директорией
|
114
|
+
if len(xsd_files) > 1 and output_path and not (output_path.is_dir() or args.output_path.endswith(('/', '\\'))):
|
115
|
+
parser.error("option -o/--output must be a directory when multiple source xsd schemas are provided.")
|
116
|
+
|
117
|
+
# Создание директории, если output указан как директория
|
118
|
+
if output_path and (output_path.is_dir() or args.output_path.endswith(('/', '\\'))):
|
119
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
120
|
+
|
121
|
+
return args, xsd_files, output_path
|
122
|
+
|
123
|
+
|
124
|
+
def _collect_xsd_files(source_paths, parser):
|
125
|
+
xsd_files = []
|
126
|
+
for source_path in source_paths:
|
127
|
+
path = Path(source_path).resolve()
|
128
|
+
if path.is_dir():
|
129
|
+
xsd_files.extend(path.glob('*.[xX][sS][dD]'))
|
130
|
+
elif path.is_file() and path.suffix.lower() == '.xsd':
|
131
|
+
xsd_files.append(path)
|
132
|
+
elif not path.exists() and path.suffix.lower() == '.xsd':
|
133
|
+
parser.error(f"file {source_path} doesn't exists.")
|
134
|
+
if not xsd_files:
|
135
|
+
parser.error("no source xsd schemas provided.")
|
136
|
+
xsd_files = list(set(xsd_files))
|
137
|
+
xsd_files.sort()
|
138
|
+
return xsd_files
|
@@ -0,0 +1,73 @@
|
|
1
|
+
from lxml import etree
|
2
|
+
from xmlgenerator.arguments import parse_args
|
3
|
+
from xmlgenerator.configuration import load_config
|
4
|
+
from xmlgenerator.generator import XmlGenerator
|
5
|
+
from xmlgenerator.randomization import Randomizer
|
6
|
+
from xmlgenerator.substitution import Substitutor
|
7
|
+
from xmlgenerator.validation import XmlValidator
|
8
|
+
from xmlschema import XMLSchema
|
9
|
+
|
10
|
+
|
11
|
+
# TODO Generator - обработка стандартных xsd типов
|
12
|
+
# TODO кастомные переменные для локального контекста
|
13
|
+
# TODO валидация по Schematron
|
14
|
+
# TODO debug logging
|
15
|
+
# TODO типизировать
|
16
|
+
# TODO Почистить и перевести комментарии
|
17
|
+
# TODO Дописать тесты
|
18
|
+
# TODO нативная сборка
|
19
|
+
# TODO выкладка на github releases
|
20
|
+
# TODO опубликовать https://pypi.org/
|
21
|
+
|
22
|
+
|
23
|
+
def main():
|
24
|
+
args, xsd_files, output_path = parse_args()
|
25
|
+
|
26
|
+
config = load_config(args.config_yaml)
|
27
|
+
|
28
|
+
print(f"Найдено схем: {len(xsd_files)}")
|
29
|
+
|
30
|
+
randomizer = Randomizer(args.seed)
|
31
|
+
substitutor = Substitutor(randomizer)
|
32
|
+
generator = XmlGenerator(randomizer, substitutor)
|
33
|
+
validator = XmlValidator(args.validation, args.fail_fast)
|
34
|
+
|
35
|
+
for xsd_file in xsd_files:
|
36
|
+
print(f"Processing schema: {xsd_file.name}")
|
37
|
+
|
38
|
+
# get configuration override for current schema
|
39
|
+
local_config = config.get_for_file(xsd_file.name)
|
40
|
+
|
41
|
+
# Reset context for current schema
|
42
|
+
substitutor.reset_context(xsd_file.name, local_config)
|
43
|
+
|
44
|
+
# Load XSD schema
|
45
|
+
xsd_schema = XMLSchema(xsd_file) # loglevel='DEBUG'
|
46
|
+
# Generate XML document
|
47
|
+
xml_root = generator.generate_xml(xsd_schema, local_config)
|
48
|
+
|
49
|
+
# Marshall to string
|
50
|
+
xml_str = etree.tostring(xml_root, encoding=args.encoding, pretty_print=args.pretty)
|
51
|
+
decoded = xml_str.decode('cp1251' if args.encoding == 'windows-1251' else args.encoding)
|
52
|
+
|
53
|
+
# Print out to console
|
54
|
+
if not output_path:
|
55
|
+
print(decoded)
|
56
|
+
|
57
|
+
# Validation (if enabled)
|
58
|
+
validator.validate(xsd_schema, decoded)
|
59
|
+
|
60
|
+
# Get output filename for current schema (without extension)
|
61
|
+
xml_filename = substitutor.get_output_filename()
|
62
|
+
|
63
|
+
# Export XML to file
|
64
|
+
if output_path:
|
65
|
+
output_file = output_path
|
66
|
+
if output_path.is_dir():
|
67
|
+
output_file = output_path / f'{xml_filename}.xml'
|
68
|
+
with open(output_file, 'wb') as f:
|
69
|
+
f.write(xml_str)
|
70
|
+
print(f"Saved document: {output_file.name}")
|
71
|
+
|
72
|
+
if __name__ == "__main__":
|
73
|
+
main()
|
@@ -0,0 +1,135 @@
|
|
1
|
+
import dataclasses
|
2
|
+
import re
|
3
|
+
import sys
|
4
|
+
from dataclasses import dataclass, field, Field
|
5
|
+
from typing import Dict, get_args, get_origin, Any
|
6
|
+
|
7
|
+
import yaml
|
8
|
+
|
9
|
+
|
10
|
+
@dataclass
|
11
|
+
class RandomizationConfig:
|
12
|
+
probability: float = field(default=None)
|
13
|
+
max_occurs: int = field(default=None)
|
14
|
+
min_length: int = field(default=None)
|
15
|
+
max_length: int = field(default=None)
|
16
|
+
min_inclusive: int = field(default=None)
|
17
|
+
max_inclusive: int = field(default=None)
|
18
|
+
|
19
|
+
|
20
|
+
@dataclass
|
21
|
+
class GlobalRandomizationConfig(RandomizationConfig):
|
22
|
+
probability: float = field(default=0.5)
|
23
|
+
|
24
|
+
|
25
|
+
@dataclass
|
26
|
+
class GeneratorConfig:
|
27
|
+
source_filename: str = None
|
28
|
+
output_filename: str = None
|
29
|
+
randomization: RandomizationConfig = field(default_factory=lambda: RandomizationConfig())
|
30
|
+
value_override: Dict[str, str] = field(default_factory=lambda: {})
|
31
|
+
|
32
|
+
|
33
|
+
@dataclass
|
34
|
+
class GlobalGeneratorConfig(GeneratorConfig):
|
35
|
+
source_filename: str = field(default='(?P<extracted>.*).(xsd|XSD)')
|
36
|
+
output_filename: str = field(default='{{ source_filename }}_{{ uuid }}')
|
37
|
+
randomization: GlobalRandomizationConfig = field(default_factory=lambda: GlobalRandomizationConfig())
|
38
|
+
|
39
|
+
|
40
|
+
@dataclass
|
41
|
+
class Config:
|
42
|
+
global_: GlobalGeneratorConfig = field(default_factory=lambda: GlobalGeneratorConfig())
|
43
|
+
specific: Dict[str, GeneratorConfig] = field(default_factory=lambda: {})
|
44
|
+
|
45
|
+
def get_for_file(self, xsd_name):
|
46
|
+
for pattern, conf in self.specific.items():
|
47
|
+
if re.match(pattern, xsd_name):
|
48
|
+
base_dict = dataclasses.asdict(self.global_)
|
49
|
+
override_dict = dataclasses.asdict(conf, dict_factory=lambda x: {k: v for (k, v) in x if v is not None})
|
50
|
+
updated_dict = _recursive_update(base_dict, override_dict)
|
51
|
+
merged_config = _map_to_class(updated_dict, GeneratorConfig, "")
|
52
|
+
local_override = conf.value_override
|
53
|
+
global_override = self.global_.value_override
|
54
|
+
merged_config.value_override = _merge_dicts(local_override, global_override)
|
55
|
+
return merged_config
|
56
|
+
|
57
|
+
return self.global_
|
58
|
+
|
59
|
+
|
60
|
+
def load_config(file_path: str | None) -> "Config":
|
61
|
+
if not file_path:
|
62
|
+
return Config()
|
63
|
+
with open(file_path, 'r') as file:
|
64
|
+
config_data: dict[str, str] = yaml.safe_load(file) or {}
|
65
|
+
return _map_to_class(config_data, Config, "")
|
66
|
+
|
67
|
+
|
68
|
+
def _map_to_class(data_dict: dict, cls, parent_path: str):
|
69
|
+
# Обработка dataclass
|
70
|
+
if dataclasses.is_dataclass(cls):
|
71
|
+
class_fields: dict[str, Field] = cls.__dataclass_fields__
|
72
|
+
required_fields: list[str] = []
|
73
|
+
yaml_items: dict[str, Any] = {}
|
74
|
+
|
75
|
+
for name, class_field in class_fields.items():
|
76
|
+
if class_field.default is dataclasses.MISSING and class_field.default_factory is dataclasses.MISSING:
|
77
|
+
required_fields.append(name)
|
78
|
+
|
79
|
+
if data_dict:
|
80
|
+
for yaml_name, value in data_dict.items():
|
81
|
+
class_field_name = yaml_name if yaml_name != "global" else "global_"
|
82
|
+
if class_field_name not in class_fields:
|
83
|
+
print(f"YAML parse error: unexpected property: {parent_path}.{yaml_name}", file=sys.stderr)
|
84
|
+
sys.exit(1)
|
85
|
+
|
86
|
+
# Определяем тип поля
|
87
|
+
field_type = class_fields[class_field_name].type
|
88
|
+
yaml_items[class_field_name] = _map_to_class(value, field_type, f"{parent_path}.{yaml_name}")
|
89
|
+
|
90
|
+
# Проверка на отсутствие обязательных полей
|
91
|
+
missing_fields = required_fields - yaml_items.keys()
|
92
|
+
if missing_fields:
|
93
|
+
print(f"YAML parse error: missing required properties in {parent_path}:", file=sys.stderr)
|
94
|
+
for missing_field in missing_fields:
|
95
|
+
yaml_field_name = missing_field if missing_field != "global_" else "global"
|
96
|
+
print(yaml_field_name, file=sys.stderr)
|
97
|
+
sys.exit(1)
|
98
|
+
|
99
|
+
return cls(**yaml_items)
|
100
|
+
|
101
|
+
# Обработка словарей
|
102
|
+
elif get_origin(cls) is dict:
|
103
|
+
key_type, value_type = get_args(cls)
|
104
|
+
if not data_dict:
|
105
|
+
data_dict = {}
|
106
|
+
return {
|
107
|
+
k: _map_to_class(v, value_type, f"{parent_path}.{k}")
|
108
|
+
for k, v in data_dict.items()
|
109
|
+
}
|
110
|
+
|
111
|
+
# Обработка списков
|
112
|
+
elif get_origin(cls) is list:
|
113
|
+
item_type = get_args(cls)[0]
|
114
|
+
return [_map_to_class(item, item_type, f"{parent_path}[{i}]") for i, item in enumerate(data_dict)]
|
115
|
+
|
116
|
+
# Базовые типы (int, str, bool и т.д.)
|
117
|
+
else:
|
118
|
+
return data_dict
|
119
|
+
|
120
|
+
|
121
|
+
def _recursive_update(original, updates):
|
122
|
+
for key, value in updates.items():
|
123
|
+
if key in original and isinstance(original[key], dict) and isinstance(value, dict):
|
124
|
+
_recursive_update(original[key], value)
|
125
|
+
else:
|
126
|
+
original[key] = value
|
127
|
+
return original
|
128
|
+
|
129
|
+
|
130
|
+
def _merge_dicts(base_dict, extra_dict):
|
131
|
+
merged_dict = dict(base_dict)
|
132
|
+
for key, value in extra_dict.items():
|
133
|
+
if key not in merged_dict:
|
134
|
+
merged_dict[key] = value
|
135
|
+
return merged_dict
|
@@ -0,0 +1,387 @@
|
|
1
|
+
import re
|
2
|
+
import sys
|
3
|
+
|
4
|
+
import rstr
|
5
|
+
import xmlschema
|
6
|
+
from lxml import etree
|
7
|
+
from xmlschema.validators import XsdComplexType, XsdAtomicRestriction, XsdTotalDigitsFacet, XsdElement, \
|
8
|
+
XsdGroup, XsdFractionDigitsFacet, XsdLengthFacet, XsdMaxLengthFacet, XsdMinExclusiveFacet, XsdMinInclusiveFacet, \
|
9
|
+
XsdMinLengthFacet, XsdAnyElement, XsdAtomicBuiltin, XsdEnumerationFacets, XsdMaxExclusiveFacet, XsdMaxInclusiveFacet
|
10
|
+
|
11
|
+
from xmlgenerator.configuration import GeneratorConfig
|
12
|
+
from xmlgenerator.randomization import Randomizer
|
13
|
+
from xmlgenerator.substitution import Substitutor
|
14
|
+
|
15
|
+
|
16
|
+
class XmlGenerator:
|
17
|
+
def __init__(self, randomizer: Randomizer, substitutor: Substitutor):
|
18
|
+
self.randomizer = randomizer
|
19
|
+
self.substitutor = substitutor
|
20
|
+
|
21
|
+
def generate_xml(self, xsd_schema: xmlschema.XMLSchema, local_config: GeneratorConfig) -> etree.Element:
|
22
|
+
xsd_root_element = xsd_schema.root_elements[0]
|
23
|
+
xml_root_element = etree.Element(xsd_root_element.name)
|
24
|
+
self._add_elements(xml_root_element, xsd_root_element, local_config)
|
25
|
+
return xml_root_element
|
26
|
+
|
27
|
+
def _add_elements(self, xml_element: etree.Element, xsd_element, local_config: GeneratorConfig) -> None:
|
28
|
+
rnd = self.randomizer.rnd
|
29
|
+
|
30
|
+
xsd_element_type = getattr(xsd_element, 'type', None)
|
31
|
+
|
32
|
+
# Add attributes if they are
|
33
|
+
attributes = getattr(xsd_element, 'attributes', dict())
|
34
|
+
if len(attributes) > 0 and xsd_element_type.local_name != 'anyType':
|
35
|
+
for attr_name, attr in attributes.items():
|
36
|
+
use = attr.use # optional | required | prohibited
|
37
|
+
if use == 'prohibited':
|
38
|
+
continue
|
39
|
+
elif use == 'optional':
|
40
|
+
if rnd.random() > local_config.randomization.probability:
|
41
|
+
continue # skip optional attribute
|
42
|
+
|
43
|
+
attr_value = self._generate_value(attr.type, attr_name, local_config)
|
44
|
+
if attr_value is not None:
|
45
|
+
xml_element.set(attr_name, str(attr_value))
|
46
|
+
|
47
|
+
# Process child elements --------------------------------------------------------------------------------------
|
48
|
+
if isinstance(xsd_element, XsdElement):
|
49
|
+
if isinstance(xsd_element_type, XsdAtomicRestriction):
|
50
|
+
text = self._generate_value(xsd_element_type, xsd_element.name, local_config)
|
51
|
+
xml_element.text = text
|
52
|
+
return
|
53
|
+
elif isinstance(xsd_element_type, XsdComplexType):
|
54
|
+
xsd_element_type_content = xsd_element_type.content
|
55
|
+
if isinstance(xsd_element_type_content, XsdGroup):
|
56
|
+
self._add_elements(xml_element, xsd_element_type_content, local_config)
|
57
|
+
else:
|
58
|
+
raise RuntimeError()
|
59
|
+
elif isinstance(xsd_element_type, XsdAtomicBuiltin):
|
60
|
+
text = self._generate_value(xsd_element_type, xsd_element.name, local_config)
|
61
|
+
xml_element.text = text
|
62
|
+
return
|
63
|
+
else:
|
64
|
+
raise RuntimeError()
|
65
|
+
|
66
|
+
elif isinstance(xsd_element, XsdGroup):
|
67
|
+
model = xsd_element.model
|
68
|
+
|
69
|
+
group_min_occurs = getattr(xsd_element, 'min_occurs', None)
|
70
|
+
group_max_occurs = getattr(xsd_element, 'max_occurs', None)
|
71
|
+
group_min_occurs = group_min_occurs if group_min_occurs is not None else 0
|
72
|
+
group_max_occurs = group_max_occurs if group_max_occurs is not None else 10 # TODO externalize
|
73
|
+
group_occurs = rnd.randint(group_min_occurs, group_max_occurs)
|
74
|
+
|
75
|
+
if model == 'all':
|
76
|
+
for _ in range(group_occurs):
|
77
|
+
xsd_group_content = xsd_element.content
|
78
|
+
for xsd_child_element_type in xsd_group_content:
|
79
|
+
|
80
|
+
element_min_occurs = getattr(xsd_child_element_type, 'min_occurs', None)
|
81
|
+
element_max_occurs = getattr(xsd_child_element_type, 'max_occurs', None)
|
82
|
+
element_min_occurs = element_min_occurs if element_min_occurs is not None else 0
|
83
|
+
element_max_occurs = element_max_occurs if element_max_occurs is not None else 10 # TODO externalize
|
84
|
+
element_occurs = rnd.randint(element_min_occurs, element_max_occurs)
|
85
|
+
|
86
|
+
for _ in range(element_occurs):
|
87
|
+
xml_child_element = etree.SubElement(xml_element, xsd_child_element_type.name)
|
88
|
+
self._add_elements(xml_child_element, xsd_child_element_type, local_config)
|
89
|
+
return
|
90
|
+
|
91
|
+
elif model == 'sequence':
|
92
|
+
for _ in range(group_occurs):
|
93
|
+
xsd_group_content = xsd_element.content
|
94
|
+
for xsd_child_element_type in xsd_group_content:
|
95
|
+
|
96
|
+
element_min_occurs = getattr(xsd_child_element_type, 'min_occurs', None)
|
97
|
+
element_max_occurs = getattr(xsd_child_element_type, 'max_occurs', None)
|
98
|
+
element_min_occurs = element_min_occurs if element_min_occurs is not None else 0
|
99
|
+
element_max_occurs = element_max_occurs if element_max_occurs is not None else 10 # TODO externalize
|
100
|
+
element_occurs = rnd.randint(element_min_occurs, element_max_occurs)
|
101
|
+
|
102
|
+
if isinstance(xsd_child_element_type, XsdElement):
|
103
|
+
for _ in range(element_occurs):
|
104
|
+
xml_child_element = etree.SubElement(xml_element, xsd_child_element_type.name)
|
105
|
+
self._add_elements(xml_child_element, xsd_child_element_type, local_config)
|
106
|
+
|
107
|
+
elif isinstance(xsd_child_element_type, XsdGroup):
|
108
|
+
xml_child_element = xml_element
|
109
|
+
self._add_elements(xml_child_element, xsd_child_element_type, local_config)
|
110
|
+
|
111
|
+
elif isinstance(xsd_child_element_type, XsdAnyElement):
|
112
|
+
xml_child_element = etree.SubElement(xml_element, "Any")
|
113
|
+
self._add_elements(xml_child_element, xsd_child_element_type, local_config)
|
114
|
+
|
115
|
+
else:
|
116
|
+
raise RuntimeError(xsd_child_element_type)
|
117
|
+
return
|
118
|
+
|
119
|
+
elif model == 'choice':
|
120
|
+
for _ in range(group_occurs):
|
121
|
+
xsd_child_element_type = rnd.choice(xsd_element)
|
122
|
+
|
123
|
+
element_min_occurs = getattr(xsd_child_element_type, 'min_occurs', None)
|
124
|
+
element_max_occurs = getattr(xsd_child_element_type, 'max_occurs', None)
|
125
|
+
element_min_occurs = element_min_occurs if element_min_occurs is not None else 0
|
126
|
+
element_max_occurs = element_max_occurs if element_max_occurs is not None else 10 # TODO externalize
|
127
|
+
element_occurs = rnd.randint(element_min_occurs, element_max_occurs)
|
128
|
+
|
129
|
+
for _ in range(element_occurs):
|
130
|
+
xml_child_element = etree.SubElement(xml_element, xsd_child_element_type.name)
|
131
|
+
self._add_elements(xml_child_element, xsd_child_element_type, local_config)
|
132
|
+
return
|
133
|
+
|
134
|
+
else:
|
135
|
+
raise RuntimeError()
|
136
|
+
|
137
|
+
elif isinstance(xsd_element, XsdAnyElement):
|
138
|
+
# для any не добавляем никаких дочерних тегов и атрибутов
|
139
|
+
pass
|
140
|
+
|
141
|
+
else:
|
142
|
+
raise RuntimeError()
|
143
|
+
|
144
|
+
def _generate_value(self, xsd_type, target_name, local_config: GeneratorConfig) -> str | None:
|
145
|
+
if xsd_type is None:
|
146
|
+
raise RuntimeError(f"xsd_type is None. Target name: {target_name}")
|
147
|
+
|
148
|
+
if isinstance(xsd_type, XsdComplexType):
|
149
|
+
return None
|
150
|
+
|
151
|
+
rnd = self.randomizer.rnd
|
152
|
+
|
153
|
+
# -------------------------------------------------------------------------------------------------------------
|
154
|
+
# Выясняем ограничения
|
155
|
+
min_length = getattr(xsd_type, 'min_length', None) # None | int
|
156
|
+
max_length = getattr(xsd_type, 'max_length', None) # None | int
|
157
|
+
|
158
|
+
min_value = getattr(xsd_type, 'min_value', None) # None | int
|
159
|
+
max_value = getattr(xsd_type, 'max_value', None) # None
|
160
|
+
|
161
|
+
total_digits = None
|
162
|
+
fraction_digits = None
|
163
|
+
enumeration = getattr(xsd_type, 'enumeration', None)
|
164
|
+
patterns = getattr(xsd_type, 'patterns', None)
|
165
|
+
|
166
|
+
validators = getattr(xsd_type, 'validators', None)
|
167
|
+
for validator in validators:
|
168
|
+
if isinstance(validator, XsdMinExclusiveFacet):
|
169
|
+
min_value = validator.value
|
170
|
+
elif isinstance(validator, XsdMinInclusiveFacet):
|
171
|
+
min_value = validator.value
|
172
|
+
elif isinstance(validator, XsdMaxExclusiveFacet):
|
173
|
+
max_value = validator.value
|
174
|
+
elif isinstance(validator, XsdMaxInclusiveFacet):
|
175
|
+
max_value = validator.value
|
176
|
+
elif isinstance(validator, XsdLengthFacet):
|
177
|
+
min_length = validator.value
|
178
|
+
max_length = validator.value
|
179
|
+
elif isinstance(validator, XsdMinLengthFacet):
|
180
|
+
min_length = validator.value
|
181
|
+
elif isinstance(validator, XsdMaxLengthFacet):
|
182
|
+
max_length = validator.value
|
183
|
+
elif isinstance(validator, XsdTotalDigitsFacet):
|
184
|
+
total_digits = validator.value
|
185
|
+
elif isinstance(validator, XsdFractionDigitsFacet):
|
186
|
+
fraction_digits = validator.value
|
187
|
+
elif isinstance(validator, XsdEnumerationFacets):
|
188
|
+
enumeration = validator.enumeration
|
189
|
+
elif callable(validator):
|
190
|
+
pass
|
191
|
+
else:
|
192
|
+
raise RuntimeError(f"Unhandled validator: {validator}")
|
193
|
+
|
194
|
+
min_length = min_length or -1
|
195
|
+
max_length = max_length or -1
|
196
|
+
|
197
|
+
min_value = min_value or 0
|
198
|
+
max_value = max_value or 100000
|
199
|
+
|
200
|
+
# -------------------------------------------------------------------------------------------------------------
|
201
|
+
# Ищем переопределение значения в конфигурации
|
202
|
+
|
203
|
+
value_override = local_config.value_override
|
204
|
+
is_found, overridden_value = self.substitutor.substitute_value(target_name, value_override.items())
|
205
|
+
if is_found:
|
206
|
+
return overridden_value
|
207
|
+
|
208
|
+
# -------------------------------------------------------------------------------------------------------------
|
209
|
+
# If there is an enumeration, select a random value from it
|
210
|
+
|
211
|
+
if enumeration is not None:
|
212
|
+
return rnd.choice(enumeration)
|
213
|
+
|
214
|
+
# -------------------------------------------------------------------------------------------------------------\
|
215
|
+
# Генерируем значения для стандартных типов и типов с ограничениями
|
216
|
+
if isinstance(xsd_type, XsdAtomicBuiltin) or isinstance(xsd_type, XsdAtomicRestriction):
|
217
|
+
return self._generate_value_by_type(
|
218
|
+
xsd_type, target_name,
|
219
|
+
patterns,
|
220
|
+
min_length, max_length,
|
221
|
+
min_value, max_value,
|
222
|
+
total_digits, fraction_digits
|
223
|
+
)
|
224
|
+
|
225
|
+
# -------------------------------------------------------------------------------------------------------------
|
226
|
+
# Проверяем базовый тип
|
227
|
+
base_type = getattr(xsd_type, 'base_type', None)
|
228
|
+
|
229
|
+
# невозможный кейс (только если попался комплексный тип)
|
230
|
+
if base_type is None:
|
231
|
+
raise RuntimeError(f"base_type is None. Target name: {target_name}")
|
232
|
+
|
233
|
+
raise RuntimeError(f"Can't generate value - unhandled type. Target name: {target_name}")
|
234
|
+
|
235
|
+
|
236
|
+
def _generate_value_by_type(self, xsd_type, target_name, patterns, min_length, max_length, min_value, max_value,
|
237
|
+
total_digits, fraction_digits) -> str | None:
|
238
|
+
|
239
|
+
type_id = xsd_type.id
|
240
|
+
base_type = xsd_type.base_type
|
241
|
+
if not type_id:
|
242
|
+
type_id = base_type.id
|
243
|
+
if not type_id:
|
244
|
+
type_id = xsd_type.root_type.id
|
245
|
+
|
246
|
+
match type_id:
|
247
|
+
case 'string':
|
248
|
+
return self._generate_string(target_name, patterns, min_length, max_length)
|
249
|
+
case 'boolean':
|
250
|
+
return self._generate_boolean()
|
251
|
+
case 'integer':
|
252
|
+
return self._generate_integer(total_digits, min_value, max_value)
|
253
|
+
case 'decimal':
|
254
|
+
return self._generate_decimal(total_digits, fraction_digits, min_value, max_value)
|
255
|
+
case 'float':
|
256
|
+
return self._generate_float(min_value, max_value)
|
257
|
+
case 'double':
|
258
|
+
return self._generate_double(min_value, max_value)
|
259
|
+
case 'duration':
|
260
|
+
return self._generate_duration()
|
261
|
+
case 'dateTime':
|
262
|
+
return self._generate_datetime()
|
263
|
+
case 'date':
|
264
|
+
return self._generate_date()
|
265
|
+
case 'time':
|
266
|
+
return self._generate_time()
|
267
|
+
case 'gYearMonth':
|
268
|
+
return self._generate_gyearmonth()
|
269
|
+
case 'gYear':
|
270
|
+
return self._generate_gyear()
|
271
|
+
case 'gMonthDay':
|
272
|
+
return self._generate_gmonthday()
|
273
|
+
case 'gDay':
|
274
|
+
return self._generate_gday()
|
275
|
+
case 'gMonth':
|
276
|
+
return self._generate_gmonth()
|
277
|
+
case 'hexBinary':
|
278
|
+
return self._generate_hex_binary()
|
279
|
+
case 'base64Binary':
|
280
|
+
return self._generate_base64_binary()
|
281
|
+
case 'anyURI':
|
282
|
+
return self._generate_any_uri()
|
283
|
+
case 'QName':
|
284
|
+
return self._generate_qname()
|
285
|
+
case 'NOTATION':
|
286
|
+
return self._generate_notation()
|
287
|
+
case _:
|
288
|
+
raise RuntimeError(type_id)
|
289
|
+
|
290
|
+
def _generate_string(self, target_name, patterns, min_length, max_length):
|
291
|
+
rnd = self.randomizer.rnd
|
292
|
+
if patterns is not None:
|
293
|
+
# Генерация строки по regex
|
294
|
+
random_pattern = rnd.choice(patterns)
|
295
|
+
xeger = rstr.xeger(random_pattern.attrib['value'])
|
296
|
+
xeger = re.sub(r'\s', ' ', xeger)
|
297
|
+
if min_length > -1 and len(xeger) < min_length:
|
298
|
+
print(
|
299
|
+
f"Possible mistake in schema: {target_name} generated value '{xeger}' can't be shorter than {min_length}",
|
300
|
+
file=sys.stderr)
|
301
|
+
if -1 < max_length < len(xeger):
|
302
|
+
print(f"Possible mistake in schema: {target_name} generated value '{xeger}' can't be longer than {max_length}", file=sys.stderr)
|
303
|
+
return xeger
|
304
|
+
|
305
|
+
# Иначе генерируем случайную строку
|
306
|
+
return self.randomizer.ascii_string(min_length, max_length)
|
307
|
+
|
308
|
+
def _generate_boolean(self):
|
309
|
+
rnd = self.randomizer.rnd
|
310
|
+
return rnd.choice(['true', 'false'])
|
311
|
+
|
312
|
+
def _generate_integer(self, total_digits, min_value, max_value):
|
313
|
+
rnd = self.randomizer.rnd
|
314
|
+
if total_digits:
|
315
|
+
min_value = 10 ** (total_digits - 1)
|
316
|
+
max_value = (10 ** total_digits) - 1
|
317
|
+
rnd_int = rnd.randint(min_value, max_value)
|
318
|
+
return str(rnd_int)
|
319
|
+
|
320
|
+
def _generate_decimal(self, total_digits, fraction_digits, min_value, max_value):
|
321
|
+
rnd = self.randomizer.rnd
|
322
|
+
if total_digits:
|
323
|
+
if fraction_digits and fraction_digits > 0:
|
324
|
+
integer_digits = total_digits - fraction_digits
|
325
|
+
integer_part = rnd.randint(10 ** (integer_digits - 1), (10 ** integer_digits) - 1)
|
326
|
+
fractional_part = rnd.randint(0, (10 ** fraction_digits) - 1)
|
327
|
+
return f"{integer_part}.{fractional_part:0{fraction_digits}}"
|
328
|
+
else:
|
329
|
+
min_value = 10 ** (total_digits - 1)
|
330
|
+
max_value = (10 ** total_digits) - 1
|
331
|
+
rnd_int = rnd.randint(min_value, max_value)
|
332
|
+
return str(rnd_int)
|
333
|
+
|
334
|
+
rnd_int = rnd.randint(min_value, max_value)
|
335
|
+
return f"{int(rnd_int / 100)}.{rnd_int % 100:02}"
|
336
|
+
|
337
|
+
def _generate_float(self, min_value, max_value):
|
338
|
+
rnd = self.randomizer.rnd
|
339
|
+
rnd_int = rnd.uniform(min_value, max_value)
|
340
|
+
rnd_int = round(rnd_int, 2)
|
341
|
+
return str(rnd_int)
|
342
|
+
|
343
|
+
def _generate_double(self, min_value, max_value):
|
344
|
+
return self._generate_float(min_value, max_value)
|
345
|
+
|
346
|
+
def _generate_duration(self):
|
347
|
+
raise RuntimeError("not yet implemented")
|
348
|
+
|
349
|
+
def _generate_datetime(self):
|
350
|
+
raise RuntimeError("not yet implemented")
|
351
|
+
|
352
|
+
def _generate_date(self):
|
353
|
+
raise RuntimeError("not yet implemented")
|
354
|
+
|
355
|
+
def _generate_time(self):
|
356
|
+
raise RuntimeError("not yet implemented")
|
357
|
+
|
358
|
+
def _generate_gyearmonth(self):
|
359
|
+
raise RuntimeError("not yet implemented")
|
360
|
+
|
361
|
+
def _generate_gyear(self):
|
362
|
+
rnd = self.randomizer.rnd
|
363
|
+
return rnd.randint(2000, 2050)
|
364
|
+
|
365
|
+
def _generate_gmonthday(self):
|
366
|
+
raise RuntimeError("not yet implemented")
|
367
|
+
|
368
|
+
def _generate_gday(self):
|
369
|
+
raise RuntimeError("not yet implemented")
|
370
|
+
|
371
|
+
def _generate_gmonth(self):
|
372
|
+
raise RuntimeError("not yet implemented")
|
373
|
+
|
374
|
+
def _generate_hex_binary(self):
|
375
|
+
raise RuntimeError("not yet implemented")
|
376
|
+
|
377
|
+
def _generate_base64_binary(self):
|
378
|
+
raise RuntimeError("not yet implemented")
|
379
|
+
|
380
|
+
def _generate_any_uri(self):
|
381
|
+
raise RuntimeError("not yet implemented")
|
382
|
+
|
383
|
+
def _generate_qname(self):
|
384
|
+
raise RuntimeError("not yet implemented")
|
385
|
+
|
386
|
+
def _generate_notation(self):
|
387
|
+
raise RuntimeError("not yet implemented")
|
@@ -0,0 +1,40 @@
|
|
1
|
+
import random
|
2
|
+
import string
|
3
|
+
from datetime import datetime, timedelta
|
4
|
+
|
5
|
+
from faker import Faker
|
6
|
+
|
7
|
+
|
8
|
+
class Randomizer:
|
9
|
+
def __init__(self, seed=None):
|
10
|
+
self.rnd = random.Random(seed)
|
11
|
+
self.fake = Faker(locale='ru_RU')
|
12
|
+
self.fake.seed_instance(seed)
|
13
|
+
|
14
|
+
def ascii_string(self, min_length=-1, max_length=-1):
|
15
|
+
min_length = min_length if min_length and min_length > -1 else 1
|
16
|
+
max_length = max_length if max_length and max_length >= min_length else 20
|
17
|
+
if max_length > 50:
|
18
|
+
max_length = 50
|
19
|
+
length = self.rnd.randint(min_length, max_length)
|
20
|
+
# Генерация случайной строки из букв латиницы
|
21
|
+
letters = string.ascii_letters # Все буквы латиницы (a-z, A-Z)
|
22
|
+
return ''.join(self.rnd.choice(letters) for _ in range(length))
|
23
|
+
|
24
|
+
def random_date(self, start_date: str, end_date: str) -> datetime:
|
25
|
+
# Преобразуем строки в объекты datetime
|
26
|
+
start = datetime.strptime(start_date, "%Y-%m-%d")
|
27
|
+
end = datetime.strptime(end_date, "%Y-%m-%d")
|
28
|
+
|
29
|
+
# Вычисляем разницу в днях между начальной и конечной датой
|
30
|
+
delta = (end - start).days
|
31
|
+
|
32
|
+
# Генерируем случайное количество дней в пределах delta
|
33
|
+
random_days = self.rnd.randint(0, delta)
|
34
|
+
|
35
|
+
# Добавляем случайное количество дней к начальной дате
|
36
|
+
return start + timedelta(days=random_days)
|
37
|
+
|
38
|
+
def snils_formatted(self):
|
39
|
+
snils = self.fake.snils()
|
40
|
+
return f"{snils[:3]}-{snils[3:6]}-{snils[6:9]} {snils[9:]}"
|
@@ -0,0 +1,118 @@
|
|
1
|
+
import re
|
2
|
+
import uuid
|
3
|
+
|
4
|
+
import rstr
|
5
|
+
|
6
|
+
from xmlgenerator.randomization import Randomizer
|
7
|
+
|
8
|
+
__all__ = ['Substitutor']
|
9
|
+
|
10
|
+
_pattern = re.compile(pattern=r'\{\{\s*(?:(?P<function>\S*?)(?:\(\s*(?P<argument>[^)]*)\s*\))?\s*(?:\|\s*(?P<modifier>.*?))?)?\s*}}')
|
11
|
+
|
12
|
+
class Substitutor:
|
13
|
+
def __init__(self, randomizer: Randomizer):
|
14
|
+
fake = randomizer.fake
|
15
|
+
self.randomizer = randomizer
|
16
|
+
self._local_context = {}
|
17
|
+
self._global_context = {}
|
18
|
+
self.providers_dict = {
|
19
|
+
# Функции локального контекста
|
20
|
+
"source_filename": lambda: self._local_context["source_filename"],
|
21
|
+
"source_extracted": lambda: self._local_context["source_extracted"],
|
22
|
+
"output_filename": lambda: self.get_output_filename(),
|
23
|
+
|
24
|
+
'uuid': lambda: str(uuid.uuid4()),
|
25
|
+
"regex": lambda a: rstr.xeger(a),
|
26
|
+
"number": self._rand_int,
|
27
|
+
"date": self._rand_date,
|
28
|
+
|
29
|
+
"last_name": fake.last_name_male,
|
30
|
+
"first_name": fake.first_name_male,
|
31
|
+
"middle_name": fake.middle_name_male,
|
32
|
+
'address_text': fake.address,
|
33
|
+
'administrative_unit': fake.administrative_unit,
|
34
|
+
'house_number': fake.building_number,
|
35
|
+
'city_name': fake.city_name,
|
36
|
+
'postcode': fake.postcode,
|
37
|
+
'company_name': fake.company,
|
38
|
+
'bank_name': fake.bank,
|
39
|
+
'phone_number': fake.phone_number,
|
40
|
+
'inn_fl': fake.individuals_inn,
|
41
|
+
'inn_ul': fake.businesses_inn,
|
42
|
+
'ogrn_ip': fake.individuals_ogrn,
|
43
|
+
'ogrn_fl': fake.businesses_ogrn,
|
44
|
+
'kpp': fake.kpp,
|
45
|
+
'snils_formatted': randomizer.snils_formatted,
|
46
|
+
}
|
47
|
+
|
48
|
+
def _rand_int(self, a):
|
49
|
+
args = str(a).split(sep=",")
|
50
|
+
return str(self.randomizer.rnd.randint(int(args[0]), int(args[1])))
|
51
|
+
|
52
|
+
def _rand_date(self, a):
|
53
|
+
args = str(a).split(sep=",")
|
54
|
+
date_from = args[0].strip(' ').strip("'").strip('"')
|
55
|
+
date_until = args[1].strip(' ').strip("'").strip('"')
|
56
|
+
random_date = self.randomizer.random_date(date_from, date_until)
|
57
|
+
return random_date.strftime('%Y%m%d') # TODO externalize pattern
|
58
|
+
|
59
|
+
def reset_context(self, xsd_filename, config_local):
|
60
|
+
self._local_context.clear()
|
61
|
+
self._local_context["source_filename"] = xsd_filename
|
62
|
+
|
63
|
+
source_filename = config_local.source_filename
|
64
|
+
matches = re.search(source_filename, xsd_filename).groupdict()
|
65
|
+
source_extracted = matches['extracted']
|
66
|
+
self._local_context["source_extracted"] = source_extracted
|
67
|
+
|
68
|
+
output_filename = config_local.output_filename
|
69
|
+
resolved_value = self._process_expression(output_filename)
|
70
|
+
self._local_context['output_filename'] = resolved_value
|
71
|
+
|
72
|
+
def get_output_filename(self):
|
73
|
+
return self._local_context.get("output_filename")
|
74
|
+
|
75
|
+
def substitute_value(self, target_name, items):
|
76
|
+
for target_name_pattern, expression in items:
|
77
|
+
if re.search(target_name_pattern, target_name, re.IGNORECASE):
|
78
|
+
if expression:
|
79
|
+
result_value = self._process_expression(expression)
|
80
|
+
return True, result_value
|
81
|
+
else:
|
82
|
+
return False, None
|
83
|
+
return False, None
|
84
|
+
|
85
|
+
def _process_expression(self, expression):
|
86
|
+
global_context = self._global_context
|
87
|
+
local_context = self._local_context
|
88
|
+
result_value: str = expression
|
89
|
+
span_to_replacement = {}
|
90
|
+
matches = _pattern.finditer(expression)
|
91
|
+
for match in matches:
|
92
|
+
func_name = match[1]
|
93
|
+
func_args = match[2]
|
94
|
+
func_mod = match[3]
|
95
|
+
func_lambda = self.providers_dict[func_name]
|
96
|
+
if not func_lambda:
|
97
|
+
raise RuntimeError(f"Unknown function {func_name}")
|
98
|
+
|
99
|
+
provider_func = lambda: func_lambda() if not func_args else func_lambda(func_args)
|
100
|
+
|
101
|
+
match func_mod:
|
102
|
+
case None:
|
103
|
+
resolved_value = provider_func()
|
104
|
+
case 'global':
|
105
|
+
resolved_value = global_context.get(func_name) or provider_func()
|
106
|
+
global_context[func_name] = resolved_value
|
107
|
+
case 'local':
|
108
|
+
resolved_value = local_context.get(func_name) or provider_func()
|
109
|
+
local_context[func_name] = resolved_value
|
110
|
+
case _:
|
111
|
+
raise RuntimeError(f"Unknown modifier: {func_mod}")
|
112
|
+
|
113
|
+
span_to_replacement[match.span()] = resolved_value
|
114
|
+
|
115
|
+
for span, replacement in reversed(list(span_to_replacement.items())):
|
116
|
+
result_value = result_value[:span[0]] + replacement + result_value[span[1]:]
|
117
|
+
|
118
|
+
return result_value
|
@@ -0,0 +1,52 @@
|
|
1
|
+
import sys
|
2
|
+
|
3
|
+
from xmlschema import XMLSchemaValidationError
|
4
|
+
|
5
|
+
|
6
|
+
class XmlValidator:
|
7
|
+
def __init__(self, post_validate: str, fail_fast: bool):
|
8
|
+
self.fail_fast = fail_fast
|
9
|
+
match post_validate:
|
10
|
+
case 'schema':
|
11
|
+
self.validation_func = self._validate_with_schema
|
12
|
+
case 'schematron':
|
13
|
+
self.validation_func = self._validate_with_schematron
|
14
|
+
|
15
|
+
def validate(self, xsd_schema, document):
|
16
|
+
self.validation_func(xsd_schema, document)
|
17
|
+
|
18
|
+
def _validate_with_schema(self, xsd_schema, document):
|
19
|
+
try:
|
20
|
+
xsd_schema.validate(document)
|
21
|
+
except XMLSchemaValidationError as err:
|
22
|
+
print(err, file=sys.stderr)
|
23
|
+
if self.fail_fast:
|
24
|
+
sys.exit(1)
|
25
|
+
|
26
|
+
def _validate_with_schematron(self, xsd_schema, document):
|
27
|
+
raise RuntimeError("not yet implemented")
|
28
|
+
|
29
|
+
# TODO
|
30
|
+
# def validate_xml_with_schematron(xml_file, schematron_file):
|
31
|
+
# # Загрузка Schematron-схемы
|
32
|
+
# with open(schematron_file, 'rb') as f:
|
33
|
+
# schematron_doc = etree.parse(f)
|
34
|
+
#
|
35
|
+
# # Преобразование Schematron в XSLT
|
36
|
+
# schematron = etree.Schematron(schematron_doc)
|
37
|
+
#
|
38
|
+
# # Загрузка XML-документа
|
39
|
+
# with open(xml_file, 'rb') as f:
|
40
|
+
# xml_doc = etree.parse(f)
|
41
|
+
#
|
42
|
+
# # Валидация XML-документа
|
43
|
+
# is_valid = schematron.validate(xml_doc)
|
44
|
+
#
|
45
|
+
# if is_valid:
|
46
|
+
# print("XML документ валиден по Schematron-схеме.")
|
47
|
+
# else:
|
48
|
+
# print("XML документ не валиден по Schematron-схеме.")
|
49
|
+
# print(schematron.error_log)
|
50
|
+
|
51
|
+
# Пример использования
|
52
|
+
# validate_xml_with_schematron('example.xml', 'schema.sch')
|
@@ -0,0 +1,314 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: xmlgenerator
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: Generates XML documents from XSD schemas
|
5
|
+
Home-page: https://github.com/lexakimov/xmlgenerator
|
6
|
+
Author: Alexey Akimov
|
7
|
+
Author-email: lex.akimov23@gmail.com
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
10
|
+
Classifier: Operating System :: OS Independent
|
11
|
+
Requires-Python: >=3.6
|
12
|
+
Description-Content-Type: text/markdown
|
13
|
+
License-File: LICENSE
|
14
|
+
Requires-Dist: lxml
|
15
|
+
Requires-Dist: xmlschema
|
16
|
+
Requires-Dist: Faker
|
17
|
+
Requires-Dist: rstr
|
18
|
+
Requires-Dist: PyYAML
|
19
|
+
Dynamic: author
|
20
|
+
Dynamic: author-email
|
21
|
+
Dynamic: classifier
|
22
|
+
Dynamic: description
|
23
|
+
Dynamic: description-content-type
|
24
|
+
Dynamic: home-page
|
25
|
+
Dynamic: license-file
|
26
|
+
Dynamic: requires-dist
|
27
|
+
Dynamic: requires-python
|
28
|
+
Dynamic: summary
|
29
|
+
|
30
|
+
# XML Generator
|
31
|
+
|
32
|
+
- [Русский 🇷🇺](README_RU.md)
|
33
|
+
- [English 🇺🇸](README.md)
|
34
|
+
|
35
|
+
Generates XML documents based on XSD schemas with the ability to customize data through a YAML configuration file.
|
36
|
+
Simplifies the creation of test or demonstration XML data for complex schemas.
|
37
|
+
|
38
|
+
## Features
|
39
|
+
|
40
|
+
- Generation of XML documents based on XSD schemas
|
41
|
+
- Customization of generated values via a YAML configuration file
|
42
|
+
- Validation of generated documents
|
43
|
+
- Command-line interface for convenient use
|
44
|
+
|
45
|
+
## Installation
|
46
|
+
|
47
|
+
### Installation via pip
|
48
|
+
|
49
|
+
```bash
|
50
|
+
pip install xmlgenerator
|
51
|
+
```
|
52
|
+
|
53
|
+
### Build from source
|
54
|
+
|
55
|
+
1. **Clone the repository:**
|
56
|
+
```bash
|
57
|
+
git clone https://github.com/lexakimov/xmlgenerator.git
|
58
|
+
cd xmlgenerator
|
59
|
+
```
|
60
|
+
|
61
|
+
2. **Create and activate a virtual environment (recommended):**
|
62
|
+
```bash
|
63
|
+
python -m venv .venv
|
64
|
+
```
|
65
|
+
* **For Linux/macOS:**
|
66
|
+
```bash
|
67
|
+
source .venv/bin/activate
|
68
|
+
```
|
69
|
+
* **For Windows (Command Prompt/PowerShell):**
|
70
|
+
```bash
|
71
|
+
.\.venv\Scripts\activate
|
72
|
+
```
|
73
|
+
|
74
|
+
3. **Install dependencies:**
|
75
|
+
```bash
|
76
|
+
pip install -r requirements.txt
|
77
|
+
```
|
78
|
+
|
79
|
+
4.1. **Install the package:**
|
80
|
+
```bash
|
81
|
+
pip install .
|
82
|
+
# or for development mode (code changes will be immediately reflected)
|
83
|
+
# pip install -e .
|
84
|
+
```
|
85
|
+
|
86
|
+
4.2. **Otherwise, build single executable:**
|
87
|
+
```bash
|
88
|
+
python build_native.py
|
89
|
+
```
|
90
|
+
|
91
|
+
## CLI Usage
|
92
|
+
|
93
|
+
The main command to run the generator is `xmlgenerator`.
|
94
|
+
|
95
|
+
**Examples:**
|
96
|
+
|
97
|
+
- Generate XML from a single schema and print to console:
|
98
|
+
```bash
|
99
|
+
xmlgenerator path/to/your/schema.xsd
|
100
|
+
```
|
101
|
+
|
102
|
+
- Generate XML from all schemas in a directory and save to the `output` folder using a configuration file:
|
103
|
+
```bash
|
104
|
+
xmlgenerator -c config.yml -o output/ path/to/schemas/
|
105
|
+
```
|
106
|
+
|
107
|
+
- Generate XML from a specific schema, save to a file with pretty formatting and windows-1251 encoding:
|
108
|
+
```bash
|
109
|
+
xmlgenerator -o output.xml -p -e windows-1251 path/to/your/schema.xsd
|
110
|
+
```
|
111
|
+
|
112
|
+
- Generate XML with validation disabled:
|
113
|
+
```bash
|
114
|
+
xmlgenerator -v none path/to/your/schema.xsd
|
115
|
+
```
|
116
|
+
|
117
|
+
**Install shell completions:**
|
118
|
+
|
119
|
+
```shell
|
120
|
+
# also available: zsh, tcsh
|
121
|
+
xmlgenerator -C bash | sudo tee /etc/bash_completion.d/xmlgenerator
|
122
|
+
```
|
123
|
+
|
124
|
+
**Detailed CLI Usage:**
|
125
|
+
|
126
|
+
```
|
127
|
+
usage: xmlgenerator [-h] [-c <config.yml>] [-o <output.xml>] [-p] [-v <validation>] [-ff] [-e <encoding>]
|
128
|
+
[--seed <seed>] [-d] [-V]
|
129
|
+
xsd [xsd ...]
|
130
|
+
|
131
|
+
Generates XML documents from XSD schemas
|
132
|
+
|
133
|
+
positional arguments:
|
134
|
+
xsd paths to xsd schema(s) or directory with xsd schemas
|
135
|
+
|
136
|
+
options:
|
137
|
+
-h, --help show this help message and exit
|
138
|
+
-c, --config <config.yml> pass yaml configuration file
|
139
|
+
-o, --output <output.xml> save output to dir or file
|
140
|
+
-p, --pretty prettify output XML
|
141
|
+
-v, --validation <validation> validate generated XML document (none, schema, schematron, default is schema)
|
142
|
+
-ff, --fail-fast terminate execution on validation error (default is true)
|
143
|
+
-e, --encoding <encoding> output XML encoding (utf-8, windows-1251, default is utf-8)
|
144
|
+
--seed <seed> set randomization seed
|
145
|
+
-d, --debug enable debug mode
|
146
|
+
-V, --version shows current version
|
147
|
+
-C, --completion <shell> print shell completion script (bash, zsh, tcsh)
|
148
|
+
```
|
149
|
+
|
150
|
+
## Configuration
|
151
|
+
|
152
|
+
The generator can be configured using a YAML file passed via the `-c` or `--config` option.
|
153
|
+
|
154
|
+
**Configuration File Structure:**
|
155
|
+
|
156
|
+
```yaml
|
157
|
+
# Global settings (apply to all schemas)
|
158
|
+
global:
|
159
|
+
|
160
|
+
# Regular expression to extract a substring from the source xsd schema filename.
|
161
|
+
# The extracted substring can be used via the `source_extracted` function.
|
162
|
+
# The regular expression must contain the group `extracted`.
|
163
|
+
# Default value: `(?P<extracted>.*).(xsd|XSD)` (extracts the filename without extension).
|
164
|
+
source_filename: ...
|
165
|
+
|
166
|
+
# Filename template for saving the generated document.
|
167
|
+
# Default value: `{{ source_filename }}_{{ uuid }}` (xsd schema filename + random UUID)
|
168
|
+
output_filename: ...
|
169
|
+
|
170
|
+
# Random value generator settings
|
171
|
+
randomization:
|
172
|
+
# Probability of adding optional elements (0.0-1.0)
|
173
|
+
# Default value: 0.5
|
174
|
+
probability: 1
|
175
|
+
# Limit for the maximum number of elements
|
176
|
+
max_occurs: 5
|
177
|
+
# Minimum string length
|
178
|
+
min_length: 5
|
179
|
+
# Maximum string length
|
180
|
+
max_length: 20
|
181
|
+
# Minimum numeric value
|
182
|
+
min_inclusive: 10
|
183
|
+
# Maximum numeric value
|
184
|
+
max_inclusive: 1000000
|
185
|
+
|
186
|
+
# Override generated values for tags and attributes.
|
187
|
+
# Key - string or regular expression to match the tag/attribute name.
|
188
|
+
# Value - string with optional use of placeholders:
|
189
|
+
# `{{ function }}` - substitutes the value provided by the predefined function.
|
190
|
+
# `{{ function | modifier }}` - same, but with a modifier [ global | local ].
|
191
|
+
# - `global` - a single value will be used along all generation.
|
192
|
+
# - `local` - a single value will be used in context of current document.
|
193
|
+
#
|
194
|
+
# The list of available functions is below.
|
195
|
+
# The order of entries matters; the first matching override will be selected.
|
196
|
+
# Key matching is case-insensitive.
|
197
|
+
value_override:
|
198
|
+
name_regexp_1: "static value"
|
199
|
+
name_regexp_2: "{{ function_call }}"
|
200
|
+
"name_regexp_\d": "static-text-and-{{ function_call }}"
|
201
|
+
name: "static-text-and-{{ function_call }}-{{ another_function_call }}"
|
202
|
+
|
203
|
+
# Extend/override global settings for specific files.
|
204
|
+
# Key - string or regular expression to match the xsd filename(s).
|
205
|
+
# The order of entries matters; the first matching override will be selected.
|
206
|
+
# Key matching is case-insensitive.
|
207
|
+
specific:
|
208
|
+
# Each value can have the same set of parameters as the global section
|
209
|
+
"SCHEM.*":
|
210
|
+
# for schemas named "SCHEM.*", xml document names will only contain UUIDv4 + '.xml'
|
211
|
+
output_filename: "{{ uuid }}"
|
212
|
+
# Random value generator settings for schemas named "SCHEM.*"
|
213
|
+
randomization:
|
214
|
+
# for schemas named "SCHEM.*", the probability of adding optional elements will be 30%
|
215
|
+
probability: 0.3
|
216
|
+
value_override:
|
217
|
+
# override the value set by the global configuration
|
218
|
+
name_regexp_1: "static value"
|
219
|
+
# reset overrides for tags/attributes containing 'name' set by the global configuration
|
220
|
+
name:
|
221
|
+
```
|
222
|
+
|
223
|
+
Configuration Priority:
|
224
|
+
|
225
|
+
- specific settings
|
226
|
+
- global settings
|
227
|
+
- default settings
|
228
|
+
|
229
|
+
### Placeholder Functions
|
230
|
+
|
231
|
+
In the `value_override` sections, you can specify either a string value or special placeholders:
|
232
|
+
|
233
|
+
- `{{ function }}` - Substitutes the value provided by the predefined function.
|
234
|
+
- `{{ function | modifier }}` - Same, but with a modifier `[ global | local ]`, where:
|
235
|
+
- `global`: The function will generate and use *the same single value* throughout the *entire generation process*
|
236
|
+
for all documents.
|
237
|
+
- `local`: The function will generate and use *the same single value* within the scope of *a single generated
|
238
|
+
document*.
|
239
|
+
- No modifier: A new value is generated each time the function is called.
|
240
|
+
|
241
|
+
**List of Placeholder Functions:**
|
242
|
+
|
243
|
+
| Function | Description |
|
244
|
+
|------------------------------------|------------------------------------------------------------------------------------------------------------|
|
245
|
+
| `source_filename` | Filename of the source xsd schema with extension (e.g., `schema.xsd`) |
|
246
|
+
| `source_extracted` | String extracted from the source xsd filename using the regex specified in `source_filename_extract_regex` |
|
247
|
+
| `output_filename` | String described by the `output_filename_template` configuration parameter |
|
248
|
+
| `uuid` | Random UUIDv4 |
|
249
|
+
| `regex("pattern")` | Random string value matching the specified regular expression |
|
250
|
+
| `number(A, B)` | Random number between A and B |
|
251
|
+
| `date("2010-01-01", "2025-01-01")` | Random date within the specified range |
|
252
|
+
| `last_name` | Last Name |
|
253
|
+
| `first_name` | First Name |
|
254
|
+
| `middle_name` | Middle Name |
|
255
|
+
| `address_text` | Address |
|
256
|
+
| `administrative_unit` | Administrative Unit (e.g., District) |
|
257
|
+
| `house_number` | House Number |
|
258
|
+
| `city_name` | City Name |
|
259
|
+
| `postcode` | Postal Code |
|
260
|
+
| `company_name` | Company Name |
|
261
|
+
| `bank_name` | Bank Name |
|
262
|
+
| `phone_number` | Phone Number |
|
263
|
+
| `inn_fl` | Individual Taxpayer Number (Physical Person) |
|
264
|
+
| `inn_ul` | Taxpayer Identification Number (Legal Entity) |
|
265
|
+
| `ogrn_ip` | Primary State Registration Number (Individual Entrepreneur) |
|
266
|
+
| `ogrn_fl` | Primary State Registration Number (Physical Person) |
|
267
|
+
| `kpp` | Reason Code for Registration |
|
268
|
+
| `snils_formatted` | SNILS (Personal Insurance Account Number) in the format `123-456-789 90` |
|
269
|
+
|
270
|
+
**Configuration Examples:**
|
271
|
+
|
272
|
+
```yaml
|
273
|
+
# TODO Add configuration examples.
|
274
|
+
```
|
275
|
+
|
276
|
+
---
|
277
|
+
|
278
|
+
## Validation
|
279
|
+
|
280
|
+
Generated XML documents are checked for conformance against the schema used for generation.
|
281
|
+
By default, validation against the source XSD schema is used.
|
282
|
+
|
283
|
+
If a document does not conform to the schema, execution stops immediately.
|
284
|
+
This behavior can be disabled using the flag `-ff false` or `--fail-fast false`.
|
285
|
+
|
286
|
+
To disable validation, use the flag `-v none` or `--validation none`.
|
287
|
+
|
288
|
+
## Contribution
|
289
|
+
|
290
|
+
Contributions are welcome! Please open an issue or submit a pull request on GitHub.
|
291
|
+
|
292
|
+
### Project Structure
|
293
|
+
|
294
|
+
- `xmlgenerator/` - main project code
|
295
|
+
- `tests/` - tests
|
296
|
+
|
297
|
+
### Running Tests
|
298
|
+
|
299
|
+
```bash
|
300
|
+
pytest
|
301
|
+
```
|
302
|
+
|
303
|
+
---
|
304
|
+
|
305
|
+
## License
|
306
|
+
|
307
|
+
This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
|
308
|
+
|
309
|
+
## Contacts
|
310
|
+
|
311
|
+
For any questions or issues, please contact [lex.akimov23@gmail.com].
|
312
|
+
|
313
|
+
You can also create an [Issue on GitHub](https://github.com/lexakimov/xmlgenerator/issues) to report bugs or suggest
|
314
|
+
improvements.
|
@@ -0,0 +1,14 @@
|
|
1
|
+
xmlgenerator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
xmlgenerator/arguments.py,sha256=0hbWgXOTj5GhXL3wHms8kHk5QAEzzN-1fBAc-lKAkBk,4446
|
3
|
+
xmlgenerator/bootstrap.py,sha256=X5bN3jn6iWBtigzeGGgSzbVG0MGrHzdFrNOEf3MhHuQ,2598
|
4
|
+
xmlgenerator/configuration.py,sha256=z3qbIL_2Lafp2bGTVWYPAAXzp-FuU_whQ7cz0RmbBKk,5101
|
5
|
+
xmlgenerator/generator.py,sha256=QAKI36zqIYMjlrphKA8LlC0WWnF8M_6G-ZPWeZn31W4,17768
|
6
|
+
xmlgenerator/randomization.py,sha256=_-PoMwSxAxI18x0_arQz0QkyZNUHtH8MB1tlgI3iOAE,1710
|
7
|
+
xmlgenerator/substitution.py,sha256=Al4XFXdx8RI6V7nYDjfc2JEYFPbfmw5Mwet4sMCWdCU,4765
|
8
|
+
xmlgenerator/validation.py,sha256=Q6sbIPqCargQ0YOJPuvcNdIohcR4yo8tJBLbuceQaag,1755
|
9
|
+
xmlgenerator-0.1.0.dist-info/licenses/LICENSE,sha256=QlXK8O3UcoAYUYwVJNgB9MSM7O94ogNo_1hd9GzznUQ,1070
|
10
|
+
xmlgenerator-0.1.0.dist-info/METADATA,sha256=6PqKFVwvR3NAOQUEQ8YRk1NN5n5ltHkxgIZp2RMRr8M,12653
|
11
|
+
xmlgenerator-0.1.0.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
|
12
|
+
xmlgenerator-0.1.0.dist-info/entry_points.txt,sha256=ly9hKr3o4AzFUkelBZNRzyKYf-Ld4kfcffvBu1oHq54,61
|
13
|
+
xmlgenerator-0.1.0.dist-info/top_level.txt,sha256=jr7FbMBm8MQ6j8I_-nWzQQEseXzwSCZNXgrkWuk9P4E,13
|
14
|
+
xmlgenerator-0.1.0.dist-info/RECORD,,
|
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 Alexey Akimov
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -0,0 +1 @@
|
|
1
|
+
xmlgenerator
|