xmlgenerator 0.2.1__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. {xmlgenerator-0.2.1/xmlgenerator.egg-info → xmlgenerator-0.3.0}/PKG-INFO +4 -3
  2. {xmlgenerator-0.2.1 → xmlgenerator-0.3.0}/README.md +3 -2
  3. {xmlgenerator-0.2.1 → xmlgenerator-0.3.0}/setup.py +1 -1
  4. {xmlgenerator-0.2.1 → xmlgenerator-0.3.0}/xmlgenerator/arguments.py +7 -1
  5. {xmlgenerator-0.2.1 → xmlgenerator-0.3.0}/xmlgenerator/bootstrap.py +6 -4
  6. {xmlgenerator-0.2.1 → xmlgenerator-0.3.0}/xmlgenerator/configuration.py +2 -1
  7. {xmlgenerator-0.2.1 → xmlgenerator-0.3.0}/xmlgenerator/generator.py +193 -149
  8. xmlgenerator-0.3.0/xmlgenerator/randomization.py +140 -0
  9. {xmlgenerator-0.2.1 → xmlgenerator-0.3.0}/xmlgenerator/substitution.py +48 -51
  10. {xmlgenerator-0.2.1 → xmlgenerator-0.3.0/xmlgenerator.egg-info}/PKG-INFO +4 -3
  11. xmlgenerator-0.2.1/xmlgenerator/randomization.py +0 -76
  12. {xmlgenerator-0.2.1 → xmlgenerator-0.3.0}/LICENSE +0 -0
  13. {xmlgenerator-0.2.1 → xmlgenerator-0.3.0}/setup.cfg +0 -0
  14. {xmlgenerator-0.2.1 → xmlgenerator-0.3.0}/xmlgenerator/__init__.py +0 -0
  15. {xmlgenerator-0.2.1 → xmlgenerator-0.3.0}/xmlgenerator/validation.py +0 -0
  16. {xmlgenerator-0.2.1 → xmlgenerator-0.3.0}/xmlgenerator.egg-info/SOURCES.txt +0 -0
  17. {xmlgenerator-0.2.1 → xmlgenerator-0.3.0}/xmlgenerator.egg-info/dependency_links.txt +0 -0
  18. {xmlgenerator-0.2.1 → xmlgenerator-0.3.0}/xmlgenerator.egg-info/entry_points.txt +0 -0
  19. {xmlgenerator-0.2.1 → xmlgenerator-0.3.0}/xmlgenerator.egg-info/requires.txt +0 -0
  20. {xmlgenerator-0.2.1 → xmlgenerator-0.3.0}/xmlgenerator.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xmlgenerator
3
- Version: 0.2.1
3
+ Version: 0.3.0
4
4
  Summary: Generates XML documents from XSD schemas
5
5
  Home-page: https://github.com/lexakimov/xmlgenerator
6
6
  Author: Alexey Akimov
@@ -136,12 +136,13 @@ positional arguments:
136
136
  options:
137
137
  -h, --help show this help message and exit
138
138
  -c, --config <config.yml> pass yaml configuration file
139
+ -l, --locale <locale> randomizer locale (default: en_US)
139
140
  -o, --output <output.xml> save output to dir or file
140
141
  -p, --pretty prettify output XML
141
142
  -v, --validation <validation> validate generated XML document (none, schema, schematron, default is schema)
142
143
  -ff, --fail-fast terminate execution on validation error (default is true)
143
144
  -e, --encoding <encoding> output XML encoding (utf-8, windows-1251, default is utf-8)
144
- --seed <seed> set randomization seed
145
+ -s, --seed <seed> set randomization seed
145
146
  -d, --debug enable debug mode
146
147
  -V, --version shows current version
147
148
  -C, --completion <shell> print shell completion script (bash, zsh, tcsh)
@@ -164,7 +165,7 @@ global:
164
165
  source_filename: ...
165
166
 
166
167
  # Filename template for saving the generated document.
167
- # Default value: `{{ source_filename }}_{{ uuid }}` (xsd schema filename + random UUID)
168
+ # Default value: `{{ source_extracted }}_{{ uuid }}` (xsd schema filename + random UUID)
168
169
  output_filename: ...
169
170
 
170
171
  # Random value generator settings
@@ -107,12 +107,13 @@ positional arguments:
107
107
  options:
108
108
  -h, --help show this help message and exit
109
109
  -c, --config <config.yml> pass yaml configuration file
110
+ -l, --locale <locale> randomizer locale (default: en_US)
110
111
  -o, --output <output.xml> save output to dir or file
111
112
  -p, --pretty prettify output XML
112
113
  -v, --validation <validation> validate generated XML document (none, schema, schematron, default is schema)
113
114
  -ff, --fail-fast terminate execution on validation error (default is true)
114
115
  -e, --encoding <encoding> output XML encoding (utf-8, windows-1251, default is utf-8)
115
- --seed <seed> set randomization seed
116
+ -s, --seed <seed> set randomization seed
116
117
  -d, --debug enable debug mode
117
118
  -V, --version shows current version
118
119
  -C, --completion <shell> print shell completion script (bash, zsh, tcsh)
@@ -135,7 +136,7 @@ global:
135
136
  source_filename: ...
136
137
 
137
138
  # Filename template for saving the generated document.
138
- # Default value: `{{ source_filename }}_{{ uuid }}` (xsd schema filename + random UUID)
139
+ # Default value: `{{ source_extracted }}_{{ uuid }}` (xsd schema filename + random UUID)
139
140
  output_filename: ...
140
141
 
141
142
  # Random value generator settings
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name='xmlgenerator',
5
- version='0.2.1',
5
+ version='0.3.0',
6
6
  packages=find_packages(exclude=("tests", "tests.*")),
7
7
  entry_points={
8
8
  'console_scripts': [
@@ -39,6 +39,12 @@ def _get_parser():
39
39
  dest="config_yaml",
40
40
  help="pass yaml configuration file"
41
41
  )
42
+ parser.add_argument(
43
+ "-l", "--locale",
44
+ metavar="<locale>",
45
+ default="en_US",
46
+ help="randomizer locale (default: %(default)s)"
47
+ )
42
48
  output_arg = parser.add_argument(
43
49
  "-o", "--output",
44
50
  metavar="<output.xml>",
@@ -71,7 +77,7 @@ def _get_parser():
71
77
  help="output XML encoding (utf-8, windows-1251, default is utf-8)"
72
78
  )
73
79
  parser.add_argument(
74
- "--seed",
80
+ "-s", "--seed",
75
81
  metavar="<seed>",
76
82
  help="set randomization seed"
77
83
  )
@@ -12,6 +12,7 @@ from xmlgenerator.randomization import Randomizer
12
12
  from xmlgenerator.substitution import Substitutor
13
13
  from xmlgenerator.validation import XmlValidator
14
14
 
15
+ # TODO конфигурация ограничений - occurs
15
16
  # TODO Generator - обработка стандартных xsd типов
16
17
  # TODO кастомные переменные для локального контекста
17
18
  # TODO валидация по Schematron
@@ -42,14 +43,15 @@ def _main():
42
43
 
43
44
  config = load_config(args.config_yaml)
44
45
 
45
- randomizer = Randomizer(args.seed)
46
+ randomizer = Randomizer(args.seed, args.locale)
46
47
  substitutor = Substitutor(randomizer)
47
48
  generator = XmlGenerator(randomizer, substitutor)
48
49
  validator = XmlValidator(args.validation, args.fail_fast)
49
50
 
50
- logger.debug('found %s schemas', len(xsd_files))
51
- for xsd_file in xsd_files:
52
- logger.info('processing schema: %s', xsd_file.name)
51
+ total_count = len(xsd_files)
52
+ logger.debug('found %s schemas', total_count)
53
+ for index, xsd_file in enumerate(xsd_files):
54
+ logger.info('processing schema %s of %s: %s', index + 1, total_count, xsd_file.name)
53
55
 
54
56
  # get configuration override for current schema
55
57
  local_config = config.get_for_file(xsd_file.name)
@@ -13,6 +13,7 @@ logger = logging.getLogger(__name__)
13
13
  @dataclass
14
14
  class RandomizationConfig:
15
15
  probability: float = field(default=None)
16
+ min_occurs: int = field(default=None)
16
17
  max_occurs: int = field(default=None)
17
18
  min_length: int = field(default=None)
18
19
  max_length: int = field(default=None)
@@ -36,7 +37,7 @@ class GeneratorConfig:
36
37
  @dataclass
37
38
  class GlobalGeneratorConfig(GeneratorConfig):
38
39
  source_filename: str = field(default='(?P<extracted>.*).(xsd|XSD)')
39
- output_filename: str = field(default='{{ source_filename }}_{{ uuid }}')
40
+ output_filename: str = field(default='{{ source_extracted }}_{{ uuid }}')
40
41
  randomization: GlobalRandomizationConfig = field(default_factory=lambda: GlobalRandomizationConfig())
41
42
 
42
43
 
@@ -1,5 +1,4 @@
1
1
  import logging
2
- import re
3
2
 
4
3
  import xmlschema
5
4
  from lxml import etree
@@ -20,53 +19,56 @@ class XmlGenerator:
20
19
  self.substitutor = substitutor
21
20
 
22
21
  def generate_xml(self, xsd_schema: xmlschema.XMLSchema, local_config: GeneratorConfig) -> etree.Element:
22
+ ns_map = {None if k == '' else k: v for k, v in xsd_schema.namespaces.items() if v != ''}
23
23
  xsd_root_element = xsd_schema.root_elements[0]
24
- xml_root_element = etree.Element(xsd_root_element.name)
25
- self._add_elements(xml_root_element, xsd_root_element, local_config)
24
+ xml_root_element = etree.Element(xsd_root_element.name, nsmap=ns_map)
25
+ xml_tree = etree.ElementTree(xml_root_element)
26
+ self._add_elements(xml_tree, xml_root_element, xsd_root_element, local_config)
26
27
  return xml_root_element
27
28
 
28
- def _add_elements(self, xml_element: etree.Element, xsd_element, local_config: GeneratorConfig) -> None:
29
- rnd = self.randomizer.rnd
30
-
31
- xsd_element_type = getattr(xsd_element, 'type', None)
32
- logger.debug('fill down element "%s" with type %s', xsd_element.name, type(xsd_element_type).__name__)
33
-
34
- # Add attributes if they are
35
- attributes = getattr(xsd_element, 'attributes', dict())
36
- if len(attributes) > 0 and xsd_element_type.local_name != 'anyType':
37
- logger.debug('add attributes to element %s', xsd_element.name)
38
- for attr_name, attr in attributes.items():
39
- logger.debug('attribute: %s', attr_name)
40
- use = attr.use # optional | required | prohibited
41
- if use == 'prohibited':
42
- logger.debug('skipped')
43
- continue
44
- elif use == 'optional':
45
- if rnd.random() > local_config.randomization.probability:
46
- logger.debug('skipped')
47
- continue # skip optional attribute
48
-
49
- attr_value = self._generate_value(attr.type, attr_name, local_config)
50
- if attr_value is not None:
51
- xml_element.set(attr_name, str(attr_value))
52
- logger.debug(f'attribute %s set with value %s', attr_name, attr_value)
53
-
29
+ def _add_elements(self, xml_tree, xml_element: etree.Element, xsd_element, local_config: GeneratorConfig) -> None:
54
30
  # Process child elements --------------------------------------------------------------------------------------
55
31
  if isinstance(xsd_element, XsdElement):
56
- if isinstance(xsd_element_type, XsdAtomicRestriction):
32
+ element_xpath = xml_tree.getpath(xml_element)
33
+ logger.debug('element: %s [created]', element_xpath)
34
+
35
+ xsd_element_type = getattr(xsd_element, 'type', None)
36
+
37
+ # Add attributes if they are
38
+ attributes = getattr(xsd_element, 'attributes', dict())
39
+ if len(attributes) > 0 and xsd_element_type.local_name != 'anyType':
40
+ for attr_name, attr in attributes.items():
41
+ logger.debug('element: %s; attribute "%s" [processing]', element_xpath, attr_name)
42
+ use = attr.use # optional | required | prohibited
43
+ if use == 'prohibited':
44
+ logger.debug('element: %s; attribute: "%s" [skipped]', element_xpath, attr_name)
45
+ continue
46
+ elif use == 'optional':
47
+ if self.randomizer.random() > local_config.randomization.probability:
48
+ logger.debug('element: %s; attribute: "%s" [skipped]', element_xpath, attr_name)
49
+ continue
50
+
51
+ attr_value = self._generate_value(attr.type, attr_name, local_config)
52
+ if attr_value is not None:
53
+ xml_element.set(attr_name, str(attr_value))
54
+ logger.debug('element: %s; attribute: "%s" = "%s"', element_xpath, attr_name, attr_value)
55
+
56
+ if isinstance(xsd_element_type, XsdAtomicBuiltin):
57
+ text = self._generate_value(xsd_element_type, xsd_element.name, local_config)
58
+ xml_element.text = text
59
+ logger.debug('element: %s = "%s"', element_xpath, text)
60
+ return
61
+ elif isinstance(xsd_element_type, XsdAtomicRestriction):
57
62
  text = self._generate_value(xsd_element_type, xsd_element.name, local_config)
58
63
  xml_element.text = text
64
+ logger.debug('element: %s = "%s"', element_xpath, text)
59
65
  return
60
66
  elif isinstance(xsd_element_type, XsdComplexType):
61
67
  xsd_element_type_content = xsd_element_type.content
62
68
  if isinstance(xsd_element_type_content, XsdGroup):
63
- self._add_elements(xml_element, xsd_element_type_content, local_config)
69
+ self._add_elements(xml_tree, xml_element, xsd_element_type_content, local_config)
64
70
  else:
65
71
  raise RuntimeError()
66
- elif isinstance(xsd_element_type, XsdAtomicBuiltin):
67
- text = self._generate_value(xsd_element_type, xsd_element.name, local_config)
68
- xml_element.text = text
69
- return
70
72
  else:
71
73
  raise RuntimeError()
72
74
 
@@ -75,9 +77,9 @@ class XmlGenerator:
75
77
 
76
78
  group_min_occurs = getattr(xsd_element, 'min_occurs', None)
77
79
  group_max_occurs = getattr(xsd_element, 'max_occurs', None)
78
- group_min_occurs = group_min_occurs if group_min_occurs is not None else 0
80
+ group_min_occurs = group_min_occurs if group_min_occurs is not None else 0 # TODO externalize
79
81
  group_max_occurs = group_max_occurs if group_max_occurs is not None else 10 # TODO externalize
80
- group_occurs = rnd.randint(group_min_occurs, group_max_occurs)
82
+ group_occurs = self.randomizer.integer(group_min_occurs, group_max_occurs)
81
83
 
82
84
  if model == 'all':
83
85
  for _ in range(group_occurs):
@@ -86,13 +88,13 @@ class XmlGenerator:
86
88
 
87
89
  element_min_occurs = getattr(xsd_child_element_type, 'min_occurs', None)
88
90
  element_max_occurs = getattr(xsd_child_element_type, 'max_occurs', None)
89
- element_min_occurs = element_min_occurs if element_min_occurs is not None else 0
91
+ element_min_occurs = element_min_occurs if element_min_occurs is not None else 0 # TODO externalize
90
92
  element_max_occurs = element_max_occurs if element_max_occurs is not None else 10 # TODO externalize
91
- element_occurs = rnd.randint(element_min_occurs, element_max_occurs)
93
+ element_occurs = self.randomizer.integer(element_min_occurs, element_max_occurs)
92
94
 
93
95
  for _ in range(element_occurs):
94
96
  xml_child_element = etree.SubElement(xml_element, xsd_child_element_type.name)
95
- self._add_elements(xml_child_element, xsd_child_element_type, local_config)
97
+ self._add_elements(xml_tree, xml_child_element, xsd_child_element_type, local_config)
96
98
  return
97
99
 
98
100
  elif model == 'sequence':
@@ -102,22 +104,22 @@ class XmlGenerator:
102
104
 
103
105
  element_min_occurs = getattr(xsd_child_element_type, 'min_occurs', None)
104
106
  element_max_occurs = getattr(xsd_child_element_type, 'max_occurs', None)
105
- element_min_occurs = element_min_occurs if element_min_occurs is not None else 0
107
+ element_min_occurs = element_min_occurs if element_min_occurs is not None else 0 # TODO externalize
106
108
  element_max_occurs = element_max_occurs if element_max_occurs is not None else 10 # TODO externalize
107
- element_occurs = rnd.randint(element_min_occurs, element_max_occurs)
109
+ element_occurs = self.randomizer.integer(element_min_occurs, element_max_occurs)
108
110
 
109
111
  if isinstance(xsd_child_element_type, XsdElement):
110
112
  for _ in range(element_occurs):
111
113
  xml_child_element = etree.SubElement(xml_element, xsd_child_element_type.name)
112
- self._add_elements(xml_child_element, xsd_child_element_type, local_config)
114
+ self._add_elements(xml_tree, xml_child_element, xsd_child_element_type, local_config)
113
115
 
114
116
  elif isinstance(xsd_child_element_type, XsdGroup):
115
117
  xml_child_element = xml_element
116
- self._add_elements(xml_child_element, xsd_child_element_type, local_config)
118
+ self._add_elements(xml_tree, xml_child_element, xsd_child_element_type, local_config)
117
119
 
118
120
  elif isinstance(xsd_child_element_type, XsdAnyElement):
119
121
  xml_child_element = etree.SubElement(xml_element, "Any")
120
- self._add_elements(xml_child_element, xsd_child_element_type, local_config)
122
+ self._add_elements(xml_tree, xml_child_element, xsd_child_element_type, local_config)
121
123
 
122
124
  else:
123
125
  raise RuntimeError(xsd_child_element_type)
@@ -125,17 +127,17 @@ class XmlGenerator:
125
127
 
126
128
  elif model == 'choice':
127
129
  for _ in range(group_occurs):
128
- xsd_child_element_type = rnd.choice(xsd_element)
130
+ xsd_child_element_type = self.randomizer.any(xsd_element)
129
131
 
130
132
  element_min_occurs = getattr(xsd_child_element_type, 'min_occurs', None)
131
133
  element_max_occurs = getattr(xsd_child_element_type, 'max_occurs', None)
132
- element_min_occurs = element_min_occurs if element_min_occurs is not None else 0
134
+ element_min_occurs = element_min_occurs if element_min_occurs is not None else 0 # TODO externalize
133
135
  element_max_occurs = element_max_occurs if element_max_occurs is not None else 10 # TODO externalize
134
- element_occurs = rnd.randint(element_min_occurs, element_max_occurs)
136
+ element_occurs = self.randomizer.integer(element_min_occurs, element_max_occurs)
135
137
 
136
138
  for _ in range(element_occurs):
137
139
  xml_child_element = etree.SubElement(xml_element, xsd_child_element_type.name)
138
- self._add_elements(xml_child_element, xsd_child_element_type, local_config)
140
+ self._add_elements(xml_tree, xml_child_element, xsd_child_element_type, local_config)
139
141
  return
140
142
 
141
143
  else:
@@ -155,79 +157,92 @@ class XmlGenerator:
155
157
  if isinstance(xsd_type, XsdComplexType):
156
158
  return None
157
159
 
158
- rnd = self.randomizer.rnd
159
-
160
- # -------------------------------------------------------------------------------------------------------------
161
- # Выясняем ограничения
162
- min_length = getattr(xsd_type, 'min_length', None) # None | int
163
- max_length = getattr(xsd_type, 'max_length', None) # None | int
164
-
165
- min_value = getattr(xsd_type, 'min_value', None) # None | int
166
- max_value = getattr(xsd_type, 'max_value', None) # None
167
-
168
- total_digits = None
169
- fraction_digits = None
170
- enumeration = getattr(xsd_type, 'enumeration', None)
171
- patterns = getattr(xsd_type, 'patterns', None)
172
-
173
- validators = getattr(xsd_type, 'validators', None)
174
- for validator in validators:
175
- if isinstance(validator, XsdMinExclusiveFacet):
176
- min_value = validator.value
177
- elif isinstance(validator, XsdMinInclusiveFacet):
178
- min_value = validator.value
179
- elif isinstance(validator, XsdMaxExclusiveFacet):
180
- max_value = validator.value
181
- elif isinstance(validator, XsdMaxInclusiveFacet):
182
- max_value = validator.value
183
- elif isinstance(validator, XsdLengthFacet):
184
- min_length = validator.value
185
- max_length = validator.value
186
- elif isinstance(validator, XsdMinLengthFacet):
187
- min_length = validator.value
188
- elif isinstance(validator, XsdMaxLengthFacet):
189
- max_length = validator.value
190
- elif isinstance(validator, XsdTotalDigitsFacet):
191
- total_digits = validator.value
192
- elif isinstance(validator, XsdFractionDigitsFacet):
193
- fraction_digits = validator.value
194
- elif isinstance(validator, XsdEnumerationFacets):
195
- enumeration = validator.enumeration
196
- elif callable(validator):
197
- pass
198
- else:
199
- raise RuntimeError(f"Unhandled validator: {validator}")
200
-
201
- min_length = min_length or -1
202
- max_length = max_length or -1
203
-
204
- min_value = min_value or 0
205
- max_value = max_value or 100000
206
-
207
160
  # -------------------------------------------------------------------------------------------------------------
208
161
  # Ищем переопределение значения в конфигурации
209
-
210
162
  value_override = local_config.value_override
211
163
  is_found, overridden_value = self.substitutor.substitute_value(target_name, value_override.items())
212
164
  if is_found:
165
+ logger.debug('value resolved: "%s"', overridden_value)
213
166
  return overridden_value
214
167
 
215
168
  # -------------------------------------------------------------------------------------------------------------
216
169
  # If there is an enumeration, select a random value from it
217
-
170
+ enumeration = getattr(xsd_type, 'enumeration', None)
218
171
  if enumeration is not None:
219
- return rnd.choice(enumeration)
172
+ random_enum = self.randomizer.any(enumeration)
173
+ logger.debug('use random value from enumeration: "%s" %s', random_enum, enumeration)
174
+ return str(random_enum)
220
175
 
221
- # -------------------------------------------------------------------------------------------------------------\
176
+ # -------------------------------------------------------------------------------------------------------------
222
177
  # Генерируем значения для стандартных типов и типов с ограничениями
223
178
  if isinstance(xsd_type, XsdAtomicBuiltin) or isinstance(xsd_type, XsdAtomicRestriction):
224
- return self._generate_value_by_type(
225
- xsd_type, target_name,
226
- patterns,
179
+ # Выясняем ограничения
180
+ min_length = getattr(xsd_type, 'min_length', None) # None | int
181
+ max_length = getattr(xsd_type, 'max_length', None) # None | int
182
+
183
+ min_value = getattr(xsd_type, 'min_value', None) # None | int
184
+ max_value = getattr(xsd_type, 'max_value', None) # None
185
+
186
+ total_digits = None
187
+ fraction_digits = None
188
+ patterns = getattr(xsd_type, 'patterns', None)
189
+
190
+ validators = getattr(xsd_type, 'validators', None)
191
+ for validator in validators:
192
+ if isinstance(validator, XsdMinExclusiveFacet):
193
+ min_value = validator.value
194
+ elif isinstance(validator, XsdMinInclusiveFacet):
195
+ min_value = validator.value
196
+ elif isinstance(validator, XsdMaxExclusiveFacet):
197
+ max_value = validator.value
198
+ elif isinstance(validator, XsdMaxInclusiveFacet):
199
+ max_value = validator.value
200
+ elif isinstance(validator, XsdLengthFacet):
201
+ min_length = validator.value
202
+ max_length = validator.value
203
+ elif isinstance(validator, XsdMinLengthFacet):
204
+ min_length = validator.value
205
+ elif isinstance(validator, XsdMaxLengthFacet):
206
+ max_length = validator.value
207
+ elif isinstance(validator, XsdTotalDigitsFacet):
208
+ total_digits = validator.value
209
+ elif isinstance(validator, XsdFractionDigitsFacet):
210
+ fraction_digits = validator.value
211
+ elif isinstance(validator, XsdEnumerationFacets):
212
+ pass
213
+ elif callable(validator):
214
+ pass
215
+ else:
216
+ raise RuntimeError(f"Unhandled validator: {validator}")
217
+
218
+ rand_config = local_config.randomization
219
+
220
+ logger.debug(
221
+ 'restrictions before override: min_length: %4s; max_length: %4s; min_value: %4s; max_value: %4s',
222
+ min_length, max_length, min_value, max_value
223
+ )
224
+
225
+ min_length, max_length = calculate_bounds_1(
226
+ min_length, max_length, rand_config.min_length, rand_config.max_length
227
+ )
228
+
229
+ min_value, max_value = calculate_bounds_1(
230
+ min_value, max_value, rand_config.min_inclusive, rand_config.max_inclusive
231
+ )
232
+
233
+ logger.debug(
234
+ 'restrictions after override: min_length: %4s; max_length: %4s; min_value: %4s; max_value: %4s',
235
+ min_length, max_length, min_value, max_value
236
+ )
237
+
238
+ generated_value = self._generate_value_by_type(
239
+ xsd_type, patterns,
227
240
  min_length, max_length,
228
241
  min_value, max_value,
229
242
  total_digits, fraction_digits
230
243
  )
244
+ logger.debug('value generated: "%s"', generated_value)
245
+ return generated_value
231
246
 
232
247
  # -------------------------------------------------------------------------------------------------------------
233
248
  # Проверяем базовый тип
@@ -239,7 +254,7 @@ class XmlGenerator:
239
254
 
240
255
  raise RuntimeError(f"Can't generate value - unhandled type. Target name: {target_name}")
241
256
 
242
- def _generate_value_by_type(self, xsd_type, target_name, patterns, min_length, max_length, min_value, max_value,
257
+ def _generate_value_by_type(self, xsd_type, patterns, min_length, max_length, min_value, max_value,
243
258
  total_digits, fraction_digits) -> str | None:
244
259
 
245
260
  type_id = xsd_type.id
@@ -249,9 +264,11 @@ class XmlGenerator:
249
264
  if not type_id:
250
265
  type_id = xsd_type.root_type.id
251
266
 
267
+ logger.debug('generate value for type: "%s"', type_id)
268
+
252
269
  match type_id:
253
270
  case 'string':
254
- return self._generate_string(target_name, patterns, min_length, max_length)
271
+ return self._generate_string(patterns, min_length, max_length)
255
272
  case 'boolean':
256
273
  return self._generate_boolean()
257
274
  case 'integer':
@@ -293,66 +310,57 @@ class XmlGenerator:
293
310
  case _:
294
311
  raise RuntimeError(type_id)
295
312
 
296
- def _generate_string(self, target_name, patterns, min_length, max_length):
297
- rnd = self.randomizer.rnd
298
- re_gen = self.randomizer.re_gen
313
+ def _generate_string(self, patterns, min_length, max_length):
299
314
  if patterns is not None:
300
315
  # Генерация строки по regex
301
- random_pattern = rnd.choice(patterns)
302
- xeger = re_gen.xeger(random_pattern.attrib['value'])
303
- xeger = re.sub(r'\s', ' ', xeger)
304
- if min_length > -1 and len(xeger) < min_length:
305
- logger.warning(
306
- "Possible mistake in schema: %s generated value '%s' can't be shorter than %s",
307
- target_name, xeger, min_length
308
- )
309
- if -1 < max_length < len(xeger):
310
- logger.warning(
311
- "Possible mistake in schema: %s generated value '%s' can't be longer than %s",
312
- target_name, xeger, max_length
313
- )
314
- return xeger
316
+ random_enum = self.randomizer.any(patterns)
317
+ random_pattern = random_enum.attrib['value']
318
+ return self.randomizer.regex(random_pattern)
315
319
 
316
320
  # Иначе генерируем случайную строку
317
321
  return self.randomizer.ascii_string(min_length, max_length)
318
322
 
319
323
  def _generate_boolean(self):
320
- rnd = self.randomizer.rnd
321
- return rnd.choice(['true', 'false'])
324
+ return self.randomizer.any(['true', 'false'])
322
325
 
323
326
  def _generate_integer(self, total_digits, min_value, max_value):
324
- rnd = self.randomizer.rnd
325
327
  if total_digits:
326
328
  min_value = 10 ** (total_digits - 1)
327
329
  max_value = (10 ** total_digits) - 1
328
- rnd_int = rnd.randint(min_value, max_value)
330
+ rnd_int = self.randomizer.integer(min_value, max_value)
329
331
  return str(rnd_int)
330
332
 
331
333
  def _generate_decimal(self, total_digits, fraction_digits, min_value, max_value):
332
- rnd = self.randomizer.rnd
333
- if total_digits:
334
- if fraction_digits and fraction_digits > 0:
335
- integer_digits = total_digits - fraction_digits
336
- integer_part = rnd.randint(10 ** (integer_digits - 1), (10 ** integer_digits) - 1)
337
- fractional_part = rnd.randint(0, (10 ** fraction_digits) - 1)
338
- return f"{integer_part}.{fractional_part:0{fraction_digits}}"
339
- else:
340
- min_value = 10 ** (total_digits - 1)
341
- max_value = (10 ** total_digits) - 1
342
- rnd_int = rnd.randint(min_value, max_value)
343
- return str(rnd_int)
334
+ if fraction_digits is None:
335
+ fraction_digits = self.randomizer.integer(1, 3)
336
+
337
+ if fraction_digits > 4:
338
+ fraction_digits = self.randomizer.integer(1, 4)
339
+
340
+ if total_digits is None:
341
+ total_digits = 10 + fraction_digits
342
+
343
+ if total_digits > 10:
344
+ total_digits = self.randomizer.integer(6, total_digits - 2)
345
+
346
+ integer_digits = total_digits - fraction_digits
347
+
348
+ # negative
349
+ min_value_fact = -(10 ** integer_digits - 1)
350
+
351
+ # positive
352
+ max_value_fact = 10 ** integer_digits - 1
344
353
 
345
- rnd_int = rnd.randint(min_value, max_value)
346
- return f"{int(rnd_int / 100)}.{rnd_int % 100:02}"
354
+ min_value_fact, max_value_fact = calculate_bounds_2(min_value_fact, max_value_fact, min_value, max_value)
355
+
356
+ random_float = self.randomizer.float(min_value_fact, max_value_fact)
357
+ return f"{random_float:.{fraction_digits}f}"
347
358
 
348
359
  def _generate_float(self, min_value, max_value):
349
- rnd = self.randomizer.rnd
350
- rnd_int = rnd.uniform(min_value, max_value)
351
- rnd_int = round(rnd_int, 2)
352
- return str(rnd_int)
360
+ return self._generate_double(min_value, max_value)
353
361
 
354
362
  def _generate_double(self, min_value, max_value):
355
- return self._generate_float(min_value, max_value)
363
+ return self._generate_decimal(None, 2, min_value, max_value)
356
364
 
357
365
  def _generate_duration(self):
358
366
  raise RuntimeError("not yet implemented")
@@ -378,8 +386,7 @@ class XmlGenerator:
378
386
  return formatted
379
387
 
380
388
  def _generate_gyear(self):
381
- rnd = self.randomizer.rnd
382
- return str(rnd.randint(2000, 2050))
389
+ return str(self.randomizer.integer(2000, 2050))
383
390
 
384
391
  def _generate_gmonthday(self):
385
392
  random_date = self.randomizer.random_date()
@@ -410,3 +417,40 @@ class XmlGenerator:
410
417
 
411
418
  def _generate_notation(self):
412
419
  raise RuntimeError("not yet implemented")
420
+
421
+
422
+ def calculate_bounds_1(fact_min, fact_max, config_min, config_max):
423
+ if config_min:
424
+ if fact_min is None:
425
+ fact_min = config_min
426
+ else:
427
+ new_min = max(fact_min, config_min)
428
+ if fact_max and new_min <= fact_max:
429
+ fact_min = new_min
430
+
431
+ if config_max:
432
+ if fact_max is None:
433
+ fact_max = config_max
434
+ else:
435
+ new_max = min(fact_max, config_max)
436
+ if new_max >= fact_min:
437
+ fact_max = new_max
438
+
439
+ if fact_max and fact_min and fact_max < fact_min:
440
+ fact_max = fact_min = min(fact_max, fact_min)
441
+
442
+ return fact_min, fact_max
443
+
444
+
445
+ def calculate_bounds_2(fact_min, fact_max, config_min, config_max):
446
+ if config_min is not None:
447
+ new_min = max(fact_min, config_min)
448
+ if fact_max and new_min <= fact_max:
449
+ fact_min = new_min
450
+
451
+ if config_max is not None:
452
+ new_max = min(fact_max, config_max)
453
+ if new_max >= fact_min:
454
+ fact_max = new_max
455
+
456
+ return fact_min, fact_max
@@ -0,0 +1,140 @@
1
+ import logging
2
+ import random
3
+ import re
4
+ import string
5
+ import sys
6
+ from datetime import datetime, date, time, timedelta
7
+ from decimal import Decimal
8
+
9
+ import rstr
10
+ from faker import Faker
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class Randomizer:
16
+ def __init__(self, seed=None, locale='ru_RU'):
17
+ if not seed:
18
+ seed = random.randrange(sys.maxsize)
19
+ logger.debug('initialize with random seed: %s', seed)
20
+ else:
21
+ logger.debug('initialize with provided seed: %s', seed)
22
+
23
+ self._rnd = random.Random(seed)
24
+ self._fake = Faker(locale=locale)
25
+ self._fake.seed_instance(seed)
26
+ self._rstr = rstr.Rstr(self._rnd)
27
+
28
+ def random(self):
29
+ return self._rnd.random()
30
+
31
+ def any(self, options):
32
+ return self._rnd.choice(options)
33
+
34
+ def regex(self, pattern):
35
+ xeger = self._rstr.xeger(pattern)
36
+ return re.sub(r'\s', ' ', xeger)
37
+
38
+ def uuid(self):
39
+ return self._fake.uuid4()
40
+
41
+ def integer(self, min_value, max_value):
42
+ return self._rnd.randint(min_value, max_value)
43
+
44
+ def float(self, min_value, max_value):
45
+ if isinstance(min_value, Decimal):
46
+ min_value = float(min_value)
47
+ if isinstance(max_value, Decimal):
48
+ max_value = float(max_value)
49
+ return self._rnd.uniform(min_value, max_value)
50
+
51
+ def ascii_string(self, min_length, max_length):
52
+ if min_length is None:
53
+ min_length = 1
54
+ if max_length is None:
55
+ max_length = 20
56
+
57
+ length = self._rnd.randint(min_length, max_length)
58
+ letters = string.ascii_lowercase
59
+ return ''.join(self._rnd.choice(letters) for _ in range(length)).capitalize()
60
+
61
+ def random_date(self, start_date: str = '1990-01-01', end_date: str = '2025-12-31') -> date:
62
+ start = date.fromisoformat(start_date)
63
+ end = date.fromisoformat(end_date)
64
+
65
+ delta = (end - start).days
66
+ random_days = self._rnd.randint(0, delta)
67
+ return start + timedelta(days=random_days)
68
+
69
+ def random_time(self, start_time: str = '00:00:00', end_time: str = '23:59:59') -> time:
70
+ start = time.fromisoformat(start_time)
71
+ end = time.fromisoformat(end_time)
72
+
73
+ random_h = self._rnd.randint(start.hour, end.hour)
74
+ random_m = self._rnd.randint(start.minute, end.minute)
75
+ random_s = self._rnd.randint(start.second, end.second)
76
+
77
+ return time(hour=random_h, minute=random_m, second=random_s)
78
+
79
+ def random_datetime(self, start_date: str = '1990-01-01', end_date: str = '2025-12-31') -> datetime:
80
+ start = datetime.strptime(start_date, "%Y-%m-%d")
81
+ end = datetime.strptime(end_date, "%Y-%m-%d")
82
+
83
+ delta = (end - start).days
84
+ random_days = self._rnd.randint(0, delta)
85
+ return start + timedelta(days=random_days)
86
+
87
+ def last_name(self):
88
+ return self._fake.last_name_male()
89
+
90
+ def first_name(self):
91
+ return self._fake.first_name_male()
92
+
93
+ def middle_name(self):
94
+ return self._fake.middle_name_male()
95
+
96
+ def address_text(self):
97
+ return self._fake.address()
98
+
99
+ def administrative_unit(self):
100
+ return self._fake.administrative_unit()
101
+
102
+ def house_number(self):
103
+ return self._fake.building_number()
104
+
105
+ def city_name(self):
106
+ return self._fake.city_name() if hasattr(self._fake, 'city_name') else self._fake.city()
107
+
108
+ def country(self):
109
+ return self._fake.country()
110
+
111
+ def postcode(self):
112
+ return self._fake.postcode()
113
+
114
+ def company_name(self):
115
+ return self._fake.company()
116
+
117
+ def bank_name(self):
118
+ return self._fake.bank()
119
+
120
+ def phone_number(self):
121
+ return self._fake.phone_number()
122
+
123
+ def inn_fl(self):
124
+ return self._fake.individuals_inn()
125
+
126
+ def inn_ul(self):
127
+ return self._fake.businesses_inn()
128
+
129
+ def ogrn_ip(self):
130
+ return self._fake.individuals_ogrn()
131
+
132
+ def ogrn_fl(self):
133
+ return self._fake.businesses_ogrn()
134
+
135
+ def kpp(self):
136
+ return self._fake.kpp()
137
+
138
+ def snils_formatted(self):
139
+ snils = self._fake.snils()
140
+ return f"{snils[:3]}-{snils[3:6]}-{snils[6:9]} {snils[9:]}"
@@ -13,62 +13,41 @@ logger = logging.getLogger(__name__)
13
13
 
14
14
  class Substitutor:
15
15
  def __init__(self, randomizer: Randomizer):
16
- fake = randomizer.fake
17
16
  self.randomizer = randomizer
18
17
  self._local_context = {}
19
18
  self._global_context = {}
20
19
  self.providers_dict = {
21
- # Функции локального контекста
22
- 'source_filename': lambda: self._local_context["source_filename"],
23
- 'source_extracted': lambda: self._local_context["source_extracted"],
24
- 'output_filename': lambda: self.get_output_filename(),
25
-
26
- 'uuid': lambda: fake.uuid4(),
27
- 'regex': self._rand_regex,
28
- 'any': self._rand_any,
29
- 'number': self._rand_int,
30
- 'date': self._rand_date,
31
-
32
- 'last_name': fake.last_name_male,
33
- 'first_name': fake.first_name_male,
34
- 'middle_name': fake.middle_name_male,
35
- 'address_text': fake.address,
36
- 'administrative_unit': fake.administrative_unit,
37
- 'house_number': fake.building_number,
38
- 'city_name': fake.city_name,
39
- 'postcode': fake.postcode,
40
- 'company_name': fake.company,
41
- 'bank_name': fake.bank,
42
- 'phone_number': fake.phone_number,
43
- 'inn_fl': fake.individuals_inn,
44
- 'inn_ul': fake.businesses_inn,
45
- 'ogrn_ip': fake.individuals_ogrn,
46
- 'ogrn_fl': fake.businesses_ogrn,
47
- 'kpp': fake.kpp,
48
- 'snils_formatted': randomizer.snils_formatted,
20
+ # local scope functions
21
+ 'source_filename': lambda args: self._local_context["source_filename"],
22
+ 'source_extracted': lambda args: self._local_context["source_extracted"],
23
+ 'output_filename': lambda args: self.get_output_filename(),
24
+
25
+ 'any': lambda args: self._any(args),
26
+ 'regex': lambda args: self._regex(args),
27
+ 'uuid': lambda args: self.randomizer.uuid(),
28
+ 'number': lambda args: self._number(args),
29
+ 'date': lambda args: self._date_formatted(args),
30
+
31
+ 'last_name': lambda args: self.randomizer.last_name(),
32
+ 'first_name': lambda args: self.randomizer.first_name(),
33
+ 'middle_name': lambda args: self.randomizer.middle_name(),
34
+ 'address_text': lambda args: self.randomizer.address_text(),
35
+ 'administrative_unit': lambda args: self.randomizer.administrative_unit(),
36
+ 'house_number': lambda args: self.randomizer.house_number(),
37
+ 'city_name': lambda args: self.randomizer.city_name(),
38
+ 'country': lambda args: self.randomizer.country(),
39
+ 'postcode': lambda args: self.randomizer.postcode(),
40
+ 'company_name': lambda args: self.randomizer.company_name(),
41
+ 'bank_name': lambda args: self.randomizer.bank_name(),
42
+ 'phone_number': lambda args: self.randomizer.phone_number(),
43
+ 'inn_fl': lambda args: self.randomizer.inn_fl(),
44
+ 'inn_ul': lambda args: self.randomizer.inn_ul(),
45
+ 'ogrn_ip': lambda args: self.randomizer.ogrn_ip(),
46
+ 'ogrn_fl': lambda args: self.randomizer.ogrn_fl(),
47
+ 'kpp': lambda args: self.randomizer.kpp(),
48
+ 'snils_formatted': lambda args: self.randomizer.snils_formatted(),
49
49
  }
50
50
 
51
- def _rand_regex(self, a):
52
- pattern = a.strip("'").strip('"')
53
- return self.randomizer.re_gen.xeger(pattern)
54
-
55
- def _rand_any(self, a):
56
- args = str(a).split(sep=",")
57
- value = self.randomizer.rnd.choice(args)
58
- value = value.strip(' ').strip("'").strip('"')
59
- return value
60
-
61
- def _rand_int(self, a):
62
- args = str(a).split(sep=",")
63
- return str(self.randomizer.rnd.randint(int(args[0]), int(args[1])))
64
-
65
- def _rand_date(self, a):
66
- args = str(a).split(sep=",")
67
- date_from = args[0].strip(' ').strip("'").strip('"')
68
- date_until = args[1].strip(' ').strip("'").strip('"')
69
- random_date = self.randomizer.random_datetime(date_from, date_until)
70
- return random_date.strftime('%Y%m%d') # TODO externalize pattern
71
-
72
51
  def reset_context(self, xsd_filename, config_local):
73
52
  self._local_context.clear()
74
53
  self._local_context["source_filename"] = xsd_filename
@@ -115,7 +94,7 @@ class Substitutor:
115
94
  if not func_lambda:
116
95
  raise RuntimeError(f"Unknown function {func_name}")
117
96
 
118
- provider_func = lambda: func_lambda() if not func_args else func_lambda(func_args)
97
+ provider_func = lambda: func_lambda(func_args)
119
98
 
120
99
  match func_mod:
121
100
  case None:
@@ -136,3 +115,21 @@ class Substitutor:
136
115
 
137
116
  logger.debug('expression resolved to value: %s', result_value)
138
117
  return result_value
118
+
119
+ def _any(self, args):
120
+ separated_args = str(args).split(sep=",")
121
+ options = [i.strip(' ').strip("'").strip('"') for i in separated_args]
122
+ return self.randomizer.any(options)
123
+
124
+ def _regex(self, args):
125
+ pattern = args.strip("'").strip('"')
126
+ return self.randomizer.regex(pattern)
127
+
128
+ def _number(self, args):
129
+ left_bound, right_bound = (int(i) for i in str(args).split(sep=","))
130
+ return str(self.randomizer.integer(left_bound, right_bound))
131
+
132
+ def _date_formatted(self, args):
133
+ date_from, date_until = (i.strip(' ').strip("'").strip('"') for i in str(args).split(sep=","))
134
+ random_date = self.randomizer.random_datetime(date_from, date_until)
135
+ return random_date.strftime("%Y%m%d")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xmlgenerator
3
- Version: 0.2.1
3
+ Version: 0.3.0
4
4
  Summary: Generates XML documents from XSD schemas
5
5
  Home-page: https://github.com/lexakimov/xmlgenerator
6
6
  Author: Alexey Akimov
@@ -136,12 +136,13 @@ positional arguments:
136
136
  options:
137
137
  -h, --help show this help message and exit
138
138
  -c, --config <config.yml> pass yaml configuration file
139
+ -l, --locale <locale> randomizer locale (default: en_US)
139
140
  -o, --output <output.xml> save output to dir or file
140
141
  -p, --pretty prettify output XML
141
142
  -v, --validation <validation> validate generated XML document (none, schema, schematron, default is schema)
142
143
  -ff, --fail-fast terminate execution on validation error (default is true)
143
144
  -e, --encoding <encoding> output XML encoding (utf-8, windows-1251, default is utf-8)
144
- --seed <seed> set randomization seed
145
+ -s, --seed <seed> set randomization seed
145
146
  -d, --debug enable debug mode
146
147
  -V, --version shows current version
147
148
  -C, --completion <shell> print shell completion script (bash, zsh, tcsh)
@@ -164,7 +165,7 @@ global:
164
165
  source_filename: ...
165
166
 
166
167
  # Filename template for saving the generated document.
167
- # Default value: `{{ source_filename }}_{{ uuid }}` (xsd schema filename + random UUID)
168
+ # Default value: `{{ source_extracted }}_{{ uuid }}` (xsd schema filename + random UUID)
168
169
  output_filename: ...
169
170
 
170
171
  # Random value generator settings
@@ -1,76 +0,0 @@
1
- import logging
2
- import random
3
- import string
4
- import sys
5
- from datetime import datetime, date, time, timedelta
6
-
7
- import rstr
8
- from faker import Faker
9
-
10
- logger = logging.getLogger(__name__)
11
-
12
-
13
- class Randomizer:
14
- def __init__(self, seed=None):
15
- if not seed:
16
- seed = random.randrange(sys.maxsize)
17
- logger.debug('initialize with random seed: %s', seed)
18
- else:
19
- logger.debug('initialize with provided seed: %s', seed)
20
-
21
- self.rnd = random.Random(seed)
22
- self.fake = Faker(locale='ru_RU')
23
- self.fake.seed_instance(seed)
24
- self.re_gen = rstr.Rstr(self.rnd)
25
-
26
- def ascii_string(self, min_length=-1, max_length=-1):
27
- min_length = min_length if min_length and min_length > -1 else 1
28
- max_length = max_length if max_length and max_length >= min_length else 20
29
- if max_length > 50:
30
- max_length = 50
31
- length = self.rnd.randint(min_length, max_length)
32
- # Генерация случайной строки из букв латиницы
33
- letters = string.ascii_letters # Все буквы латиницы (a-z, A-Z)
34
- return ''.join(self.rnd.choice(letters) for _ in range(length))
35
-
36
- def random_date(self, start_date: str = '1990-01-01', end_date: str = '2025-12-31') -> date:
37
- # Преобразуем строки в объекты datetime
38
- start = date.fromisoformat(start_date)
39
- end = date.fromisoformat(end_date)
40
-
41
- # Вычисляем разницу в днях между начальной и конечной датой
42
- delta = (end - start).days
43
-
44
- # Генерируем случайное количество дней в пределах delta
45
- random_days = self.rnd.randint(0, delta)
46
-
47
- # Добавляем случайное количество дней к начальной дате
48
- return start + timedelta(days=random_days)
49
-
50
- def random_time(self, start_time: str = '00:00:00', end_time: str = '23:59:59') -> time:
51
- start = time.fromisoformat(start_time)
52
- end = time.fromisoformat(end_time)
53
-
54
- random_h = self.rnd.randint(start.hour, end.hour)
55
- random_m = self.rnd.randint(start.minute, end.minute)
56
- random_s = self.rnd.randint(start.second, end.second)
57
-
58
- return time(hour=random_h, minute=random_m, second=random_s)
59
-
60
- def random_datetime(self, start_date: str = '1990-01-01', end_date: str = '2025-12-31') -> datetime:
61
- # Преобразуем строки в объекты datetime
62
- start = datetime.strptime(start_date, "%Y-%m-%d")
63
- end = datetime.strptime(end_date, "%Y-%m-%d")
64
-
65
- # Вычисляем разницу в днях между начальной и конечной датой
66
- delta = (end - start).days
67
-
68
- # Генерируем случайное количество дней в пределах delta
69
- random_days = self.rnd.randint(0, delta)
70
-
71
- # Добавляем случайное количество дней к начальной дате
72
- return start + timedelta(days=random_days)
73
-
74
- def snils_formatted(self):
75
- snils = self.fake.snils()
76
- return f"{snils[:3]}-{snils[3:6]}-{snils[6:9]} {snils[9:]}"
File without changes
File without changes