xmlgenerator 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
xmlgenerator/arguments.py CHANGED
@@ -39,6 +39,12 @@ def _get_parser():
39
39
  dest="config_yaml",
40
40
  help="pass yaml configuration file"
41
41
  )
42
+ parser.add_argument(
43
+ "-l", "--locale",
44
+ metavar="<locale>",
45
+ default="en_US",
46
+ help="randomizer locale (default: %(default)s)"
47
+ )
42
48
  output_arg = parser.add_argument(
43
49
  "-o", "--output",
44
50
  metavar="<output.xml>",
@@ -71,7 +77,7 @@ def _get_parser():
71
77
  help="output XML encoding (utf-8, windows-1251, default is utf-8)"
72
78
  )
73
79
  parser.add_argument(
74
- "--seed",
80
+ "-s", "--seed",
75
81
  metavar="<seed>",
76
82
  help="set randomization seed"
77
83
  )
xmlgenerator/bootstrap.py CHANGED
@@ -12,6 +12,7 @@ from xmlgenerator.randomization import Randomizer
12
12
  from xmlgenerator.substitution import Substitutor
13
13
  from xmlgenerator.validation import XmlValidator
14
14
 
15
+ # TODO конфигурация ограничений - occurs
15
16
  # TODO Generator - обработка стандартных xsd типов
16
17
  # TODO кастомные переменные для локального контекста
17
18
  # TODO валидация по Schematron
@@ -42,14 +43,15 @@ def _main():
42
43
 
43
44
  config = load_config(args.config_yaml)
44
45
 
45
- randomizer = Randomizer(args.seed)
46
+ randomizer = Randomizer(args.seed, args.locale)
46
47
  substitutor = Substitutor(randomizer)
47
48
  generator = XmlGenerator(randomizer, substitutor)
48
49
  validator = XmlValidator(args.validation, args.fail_fast)
49
50
 
50
- logger.debug('found %s schemas', len(xsd_files))
51
- for xsd_file in xsd_files:
52
- logger.debug('processing schema: %s', xsd_file.name)
51
+ total_count = len(xsd_files)
52
+ logger.debug('found %s schemas', total_count)
53
+ for index, xsd_file in enumerate(xsd_files):
54
+ logger.info('processing schema %s of %s: %s', index + 1, total_count, xsd_file.name)
53
55
 
54
56
  # get configuration override for current schema
55
57
  local_config = config.get_for_file(xsd_file.name)
@@ -13,6 +13,7 @@ logger = logging.getLogger(__name__)
13
13
  @dataclass
14
14
  class RandomizationConfig:
15
15
  probability: float = field(default=None)
16
+ min_occurs: int = field(default=None)
16
17
  max_occurs: int = field(default=None)
17
18
  min_length: int = field(default=None)
18
19
  max_length: int = field(default=None)
@@ -36,7 +37,7 @@ class GeneratorConfig:
36
37
  @dataclass
37
38
  class GlobalGeneratorConfig(GeneratorConfig):
38
39
  source_filename: str = field(default='(?P<extracted>.*).(xsd|XSD)')
39
- output_filename: str = field(default='{{ source_filename }}_{{ uuid }}')
40
+ output_filename: str = field(default='{{ source_extracted }}_{{ uuid }}')
40
41
  randomization: GlobalRandomizationConfig = field(default_factory=lambda: GlobalRandomizationConfig())
41
42
 
42
43
 
xmlgenerator/generator.py CHANGED
@@ -1,7 +1,5 @@
1
1
  import logging
2
- import re
3
2
 
4
- import rstr
5
3
  import xmlschema
6
4
  from lxml import etree
7
5
  from xmlschema.validators import XsdComplexType, XsdAtomicRestriction, XsdTotalDigitsFacet, XsdElement, \
@@ -21,53 +19,56 @@ class XmlGenerator:
21
19
  self.substitutor = substitutor
22
20
 
23
21
  def generate_xml(self, xsd_schema: xmlschema.XMLSchema, local_config: GeneratorConfig) -> etree.Element:
22
+ ns_map = {None if k == '' else k: v for k, v in xsd_schema.namespaces.items() if v != ''}
24
23
  xsd_root_element = xsd_schema.root_elements[0]
25
- xml_root_element = etree.Element(xsd_root_element.name)
26
- self._add_elements(xml_root_element, xsd_root_element, local_config)
24
+ xml_root_element = etree.Element(xsd_root_element.name, nsmap=ns_map)
25
+ xml_tree = etree.ElementTree(xml_root_element)
26
+ self._add_elements(xml_tree, xml_root_element, xsd_root_element, local_config)
27
27
  return xml_root_element
28
28
 
29
- def _add_elements(self, xml_element: etree.Element, xsd_element, local_config: GeneratorConfig) -> None:
30
- rnd = self.randomizer.rnd
31
-
32
- xsd_element_type = getattr(xsd_element, 'type', None)
33
- logger.debug('fill down element "%s" with type %s', xsd_element.name, type(xsd_element_type).__name__)
34
-
35
- # Add attributes if they are
36
- attributes = getattr(xsd_element, 'attributes', dict())
37
- if len(attributes) > 0 and xsd_element_type.local_name != 'anyType':
38
- logger.debug('add attributes to element %s', xsd_element.name)
39
- for attr_name, attr in attributes.items():
40
- logger.debug('attribute: %s', attr_name)
41
- use = attr.use # optional | required | prohibited
42
- if use == 'prohibited':
43
- logger.debug('skipped')
44
- continue
45
- elif use == 'optional':
46
- if rnd.random() > local_config.randomization.probability:
47
- logger.debug('skipped')
48
- continue # skip optional attribute
49
-
50
- attr_value = self._generate_value(attr.type, attr_name, local_config)
51
- if attr_value is not None:
52
- xml_element.set(attr_name, str(attr_value))
53
- logger.debug(f'attribute %s set with value %s', attr_name, attr_value)
54
-
29
+ def _add_elements(self, xml_tree, xml_element: etree.Element, xsd_element, local_config: GeneratorConfig) -> None:
55
30
  # Process child elements --------------------------------------------------------------------------------------
56
31
  if isinstance(xsd_element, XsdElement):
57
- if isinstance(xsd_element_type, XsdAtomicRestriction):
32
+ element_xpath = xml_tree.getpath(xml_element)
33
+ logger.debug('element: %s [created]', element_xpath)
34
+
35
+ xsd_element_type = getattr(xsd_element, 'type', None)
36
+
37
+ # Add attributes if they are
38
+ attributes = getattr(xsd_element, 'attributes', dict())
39
+ if len(attributes) > 0 and xsd_element_type.local_name != 'anyType':
40
+ for attr_name, attr in attributes.items():
41
+ logger.debug('element: %s; attribute "%s" [processing]', element_xpath, attr_name)
42
+ use = attr.use # optional | required | prohibited
43
+ if use == 'prohibited':
44
+ logger.debug('element: %s; attribute: "%s" [skipped]', element_xpath, attr_name)
45
+ continue
46
+ elif use == 'optional':
47
+ if self.randomizer.random() > local_config.randomization.probability:
48
+ logger.debug('element: %s; attribute: "%s" [skipped]', element_xpath, attr_name)
49
+ continue
50
+
51
+ attr_value = self._generate_value(attr.type, attr_name, local_config)
52
+ if attr_value is not None:
53
+ xml_element.set(attr_name, str(attr_value))
54
+ logger.debug('element: %s; attribute: "%s" = "%s"', element_xpath, attr_name, attr_value)
55
+
56
+ if isinstance(xsd_element_type, XsdAtomicBuiltin):
57
+ text = self._generate_value(xsd_element_type, xsd_element.name, local_config)
58
+ xml_element.text = text
59
+ logger.debug('element: %s = "%s"', element_xpath, text)
60
+ return
61
+ elif isinstance(xsd_element_type, XsdAtomicRestriction):
58
62
  text = self._generate_value(xsd_element_type, xsd_element.name, local_config)
59
63
  xml_element.text = text
64
+ logger.debug('element: %s = "%s"', element_xpath, text)
60
65
  return
61
66
  elif isinstance(xsd_element_type, XsdComplexType):
62
67
  xsd_element_type_content = xsd_element_type.content
63
68
  if isinstance(xsd_element_type_content, XsdGroup):
64
- self._add_elements(xml_element, xsd_element_type_content, local_config)
69
+ self._add_elements(xml_tree, xml_element, xsd_element_type_content, local_config)
65
70
  else:
66
71
  raise RuntimeError()
67
- elif isinstance(xsd_element_type, XsdAtomicBuiltin):
68
- text = self._generate_value(xsd_element_type, xsd_element.name, local_config)
69
- xml_element.text = text
70
- return
71
72
  else:
72
73
  raise RuntimeError()
73
74
 
@@ -76,9 +77,9 @@ class XmlGenerator:
76
77
 
77
78
  group_min_occurs = getattr(xsd_element, 'min_occurs', None)
78
79
  group_max_occurs = getattr(xsd_element, 'max_occurs', None)
79
- group_min_occurs = group_min_occurs if group_min_occurs is not None else 0
80
+ group_min_occurs = group_min_occurs if group_min_occurs is not None else 0 # TODO externalize
80
81
  group_max_occurs = group_max_occurs if group_max_occurs is not None else 10 # TODO externalize
81
- group_occurs = rnd.randint(group_min_occurs, group_max_occurs)
82
+ group_occurs = self.randomizer.integer(group_min_occurs, group_max_occurs)
82
83
 
83
84
  if model == 'all':
84
85
  for _ in range(group_occurs):
@@ -87,13 +88,13 @@ class XmlGenerator:
87
88
 
88
89
  element_min_occurs = getattr(xsd_child_element_type, 'min_occurs', None)
89
90
  element_max_occurs = getattr(xsd_child_element_type, 'max_occurs', None)
90
- element_min_occurs = element_min_occurs if element_min_occurs is not None else 0
91
+ element_min_occurs = element_min_occurs if element_min_occurs is not None else 0 # TODO externalize
91
92
  element_max_occurs = element_max_occurs if element_max_occurs is not None else 10 # TODO externalize
92
- element_occurs = rnd.randint(element_min_occurs, element_max_occurs)
93
+ element_occurs = self.randomizer.integer(element_min_occurs, element_max_occurs)
93
94
 
94
95
  for _ in range(element_occurs):
95
96
  xml_child_element = etree.SubElement(xml_element, xsd_child_element_type.name)
96
- self._add_elements(xml_child_element, xsd_child_element_type, local_config)
97
+ self._add_elements(xml_tree, xml_child_element, xsd_child_element_type, local_config)
97
98
  return
98
99
 
99
100
  elif model == 'sequence':
@@ -103,22 +104,22 @@ class XmlGenerator:
103
104
 
104
105
  element_min_occurs = getattr(xsd_child_element_type, 'min_occurs', None)
105
106
  element_max_occurs = getattr(xsd_child_element_type, 'max_occurs', None)
106
- element_min_occurs = element_min_occurs if element_min_occurs is not None else 0
107
+ element_min_occurs = element_min_occurs if element_min_occurs is not None else 0 # TODO externalize
107
108
  element_max_occurs = element_max_occurs if element_max_occurs is not None else 10 # TODO externalize
108
- element_occurs = rnd.randint(element_min_occurs, element_max_occurs)
109
+ element_occurs = self.randomizer.integer(element_min_occurs, element_max_occurs)
109
110
 
110
111
  if isinstance(xsd_child_element_type, XsdElement):
111
112
  for _ in range(element_occurs):
112
113
  xml_child_element = etree.SubElement(xml_element, xsd_child_element_type.name)
113
- self._add_elements(xml_child_element, xsd_child_element_type, local_config)
114
+ self._add_elements(xml_tree, xml_child_element, xsd_child_element_type, local_config)
114
115
 
115
116
  elif isinstance(xsd_child_element_type, XsdGroup):
116
117
  xml_child_element = xml_element
117
- self._add_elements(xml_child_element, xsd_child_element_type, local_config)
118
+ self._add_elements(xml_tree, xml_child_element, xsd_child_element_type, local_config)
118
119
 
119
120
  elif isinstance(xsd_child_element_type, XsdAnyElement):
120
121
  xml_child_element = etree.SubElement(xml_element, "Any")
121
- self._add_elements(xml_child_element, xsd_child_element_type, local_config)
122
+ self._add_elements(xml_tree, xml_child_element, xsd_child_element_type, local_config)
122
123
 
123
124
  else:
124
125
  raise RuntimeError(xsd_child_element_type)
@@ -126,17 +127,17 @@ class XmlGenerator:
126
127
 
127
128
  elif model == 'choice':
128
129
  for _ in range(group_occurs):
129
- xsd_child_element_type = rnd.choice(xsd_element)
130
+ xsd_child_element_type = self.randomizer.any(xsd_element)
130
131
 
131
132
  element_min_occurs = getattr(xsd_child_element_type, 'min_occurs', None)
132
133
  element_max_occurs = getattr(xsd_child_element_type, 'max_occurs', None)
133
- element_min_occurs = element_min_occurs if element_min_occurs is not None else 0
134
+ element_min_occurs = element_min_occurs if element_min_occurs is not None else 0 # TODO externalize
134
135
  element_max_occurs = element_max_occurs if element_max_occurs is not None else 10 # TODO externalize
135
- element_occurs = rnd.randint(element_min_occurs, element_max_occurs)
136
+ element_occurs = self.randomizer.integer(element_min_occurs, element_max_occurs)
136
137
 
137
138
  for _ in range(element_occurs):
138
139
  xml_child_element = etree.SubElement(xml_element, xsd_child_element_type.name)
139
- self._add_elements(xml_child_element, xsd_child_element_type, local_config)
140
+ self._add_elements(xml_tree, xml_child_element, xsd_child_element_type, local_config)
140
141
  return
141
142
 
142
143
  else:
@@ -156,79 +157,92 @@ class XmlGenerator:
156
157
  if isinstance(xsd_type, XsdComplexType):
157
158
  return None
158
159
 
159
- rnd = self.randomizer.rnd
160
-
161
- # -------------------------------------------------------------------------------------------------------------
162
- # Выясняем ограничения
163
- min_length = getattr(xsd_type, 'min_length', None) # None | int
164
- max_length = getattr(xsd_type, 'max_length', None) # None | int
165
-
166
- min_value = getattr(xsd_type, 'min_value', None) # None | int
167
- max_value = getattr(xsd_type, 'max_value', None) # None
168
-
169
- total_digits = None
170
- fraction_digits = None
171
- enumeration = getattr(xsd_type, 'enumeration', None)
172
- patterns = getattr(xsd_type, 'patterns', None)
173
-
174
- validators = getattr(xsd_type, 'validators', None)
175
- for validator in validators:
176
- if isinstance(validator, XsdMinExclusiveFacet):
177
- min_value = validator.value
178
- elif isinstance(validator, XsdMinInclusiveFacet):
179
- min_value = validator.value
180
- elif isinstance(validator, XsdMaxExclusiveFacet):
181
- max_value = validator.value
182
- elif isinstance(validator, XsdMaxInclusiveFacet):
183
- max_value = validator.value
184
- elif isinstance(validator, XsdLengthFacet):
185
- min_length = validator.value
186
- max_length = validator.value
187
- elif isinstance(validator, XsdMinLengthFacet):
188
- min_length = validator.value
189
- elif isinstance(validator, XsdMaxLengthFacet):
190
- max_length = validator.value
191
- elif isinstance(validator, XsdTotalDigitsFacet):
192
- total_digits = validator.value
193
- elif isinstance(validator, XsdFractionDigitsFacet):
194
- fraction_digits = validator.value
195
- elif isinstance(validator, XsdEnumerationFacets):
196
- enumeration = validator.enumeration
197
- elif callable(validator):
198
- pass
199
- else:
200
- raise RuntimeError(f"Unhandled validator: {validator}")
201
-
202
- min_length = min_length or -1
203
- max_length = max_length or -1
204
-
205
- min_value = min_value or 0
206
- max_value = max_value or 100000
207
-
208
160
  # -------------------------------------------------------------------------------------------------------------
209
161
  # Ищем переопределение значения в конфигурации
210
-
211
162
  value_override = local_config.value_override
212
163
  is_found, overridden_value = self.substitutor.substitute_value(target_name, value_override.items())
213
164
  if is_found:
165
+ logger.debug('value resolved: "%s"', overridden_value)
214
166
  return overridden_value
215
167
 
216
168
  # -------------------------------------------------------------------------------------------------------------
217
169
  # If there is an enumeration, select a random value from it
218
-
170
+ enumeration = getattr(xsd_type, 'enumeration', None)
219
171
  if enumeration is not None:
220
- return rnd.choice(enumeration)
172
+ random_enum = self.randomizer.any(enumeration)
173
+ logger.debug('use random value from enumeration: "%s" %s', random_enum, enumeration)
174
+ return str(random_enum)
221
175
 
222
- # -------------------------------------------------------------------------------------------------------------\
176
+ # -------------------------------------------------------------------------------------------------------------
223
177
  # Генерируем значения для стандартных типов и типов с ограничениями
224
178
  if isinstance(xsd_type, XsdAtomicBuiltin) or isinstance(xsd_type, XsdAtomicRestriction):
225
- return self._generate_value_by_type(
226
- xsd_type, target_name,
227
- patterns,
179
+ # Выясняем ограничения
180
+ min_length = getattr(xsd_type, 'min_length', None) # None | int
181
+ max_length = getattr(xsd_type, 'max_length', None) # None | int
182
+
183
+ min_value = getattr(xsd_type, 'min_value', None) # None | int
184
+ max_value = getattr(xsd_type, 'max_value', None) # None
185
+
186
+ total_digits = None
187
+ fraction_digits = None
188
+ patterns = getattr(xsd_type, 'patterns', None)
189
+
190
+ validators = getattr(xsd_type, 'validators', None)
191
+ for validator in validators:
192
+ if isinstance(validator, XsdMinExclusiveFacet):
193
+ min_value = validator.value
194
+ elif isinstance(validator, XsdMinInclusiveFacet):
195
+ min_value = validator.value
196
+ elif isinstance(validator, XsdMaxExclusiveFacet):
197
+ max_value = validator.value
198
+ elif isinstance(validator, XsdMaxInclusiveFacet):
199
+ max_value = validator.value
200
+ elif isinstance(validator, XsdLengthFacet):
201
+ min_length = validator.value
202
+ max_length = validator.value
203
+ elif isinstance(validator, XsdMinLengthFacet):
204
+ min_length = validator.value
205
+ elif isinstance(validator, XsdMaxLengthFacet):
206
+ max_length = validator.value
207
+ elif isinstance(validator, XsdTotalDigitsFacet):
208
+ total_digits = validator.value
209
+ elif isinstance(validator, XsdFractionDigitsFacet):
210
+ fraction_digits = validator.value
211
+ elif isinstance(validator, XsdEnumerationFacets):
212
+ pass
213
+ elif callable(validator):
214
+ pass
215
+ else:
216
+ raise RuntimeError(f"Unhandled validator: {validator}")
217
+
218
+ rand_config = local_config.randomization
219
+
220
+ logger.debug(
221
+ 'restrictions before override: min_length: %4s; max_length: %4s; min_value: %4s; max_value: %4s',
222
+ min_length, max_length, min_value, max_value
223
+ )
224
+
225
+ min_length, max_length = calculate_bounds_1(
226
+ min_length, max_length, rand_config.min_length, rand_config.max_length
227
+ )
228
+
229
+ min_value, max_value = calculate_bounds_1(
230
+ min_value, max_value, rand_config.min_inclusive, rand_config.max_inclusive
231
+ )
232
+
233
+ logger.debug(
234
+ 'restrictions after override: min_length: %4s; max_length: %4s; min_value: %4s; max_value: %4s',
235
+ min_length, max_length, min_value, max_value
236
+ )
237
+
238
+ generated_value = self._generate_value_by_type(
239
+ xsd_type, patterns,
228
240
  min_length, max_length,
229
241
  min_value, max_value,
230
242
  total_digits, fraction_digits
231
243
  )
244
+ logger.debug('value generated: "%s"', generated_value)
245
+ return generated_value
232
246
 
233
247
  # -------------------------------------------------------------------------------------------------------------
234
248
  # Проверяем базовый тип
@@ -240,7 +254,7 @@ class XmlGenerator:
240
254
 
241
255
  raise RuntimeError(f"Can't generate value - unhandled type. Target name: {target_name}")
242
256
 
243
- def _generate_value_by_type(self, xsd_type, target_name, patterns, min_length, max_length, min_value, max_value,
257
+ def _generate_value_by_type(self, xsd_type, patterns, min_length, max_length, min_value, max_value,
244
258
  total_digits, fraction_digits) -> str | None:
245
259
 
246
260
  type_id = xsd_type.id
@@ -250,9 +264,11 @@ class XmlGenerator:
250
264
  if not type_id:
251
265
  type_id = xsd_type.root_type.id
252
266
 
267
+ logger.debug('generate value for type: "%s"', type_id)
268
+
253
269
  match type_id:
254
270
  case 'string':
255
- return self._generate_string(target_name, patterns, min_length, max_length)
271
+ return self._generate_string(patterns, min_length, max_length)
256
272
  case 'boolean':
257
273
  return self._generate_boolean()
258
274
  case 'integer':
@@ -294,93 +310,98 @@ class XmlGenerator:
294
310
  case _:
295
311
  raise RuntimeError(type_id)
296
312
 
297
- def _generate_string(self, target_name, patterns, min_length, max_length):
298
- rnd = self.randomizer.rnd
313
+ def _generate_string(self, patterns, min_length, max_length):
299
314
  if patterns is not None:
300
315
  # Генерация строки по regex
301
- random_pattern = rnd.choice(patterns)
302
- xeger = rstr.xeger(random_pattern.attrib['value'])
303
- xeger = re.sub(r'\s', ' ', xeger)
304
- if min_length > -1 and len(xeger) < min_length:
305
- logger.warning(
306
- "Possible mistake in schema: %s generated value '%s' can't be shorter than %s",
307
- target_name, xeger, min_length
308
- )
309
- if -1 < max_length < len(xeger):
310
- logger.warning(
311
- "Possible mistake in schema: %s generated value '%s' can't be longer than %s",
312
- target_name, xeger, max_length
313
- )
314
- return xeger
316
+ random_enum = self.randomizer.any(patterns)
317
+ random_pattern = random_enum.attrib['value']
318
+ return self.randomizer.regex(random_pattern)
315
319
 
316
320
  # Иначе генерируем случайную строку
317
321
  return self.randomizer.ascii_string(min_length, max_length)
318
322
 
319
323
  def _generate_boolean(self):
320
- rnd = self.randomizer.rnd
321
- return rnd.choice(['true', 'false'])
324
+ return self.randomizer.any(['true', 'false'])
322
325
 
323
326
  def _generate_integer(self, total_digits, min_value, max_value):
324
- rnd = self.randomizer.rnd
325
327
  if total_digits:
326
328
  min_value = 10 ** (total_digits - 1)
327
329
  max_value = (10 ** total_digits) - 1
328
- rnd_int = rnd.randint(min_value, max_value)
330
+ rnd_int = self.randomizer.integer(min_value, max_value)
329
331
  return str(rnd_int)
330
332
 
331
333
  def _generate_decimal(self, total_digits, fraction_digits, min_value, max_value):
332
- rnd = self.randomizer.rnd
333
- if total_digits:
334
- if fraction_digits and fraction_digits > 0:
335
- integer_digits = total_digits - fraction_digits
336
- integer_part = rnd.randint(10 ** (integer_digits - 1), (10 ** integer_digits) - 1)
337
- fractional_part = rnd.randint(0, (10 ** fraction_digits) - 1)
338
- return f"{integer_part}.{fractional_part:0{fraction_digits}}"
339
- else:
340
- min_value = 10 ** (total_digits - 1)
341
- max_value = (10 ** total_digits) - 1
342
- rnd_int = rnd.randint(min_value, max_value)
343
- return str(rnd_int)
334
+ if fraction_digits is None:
335
+ fraction_digits = self.randomizer.integer(1, 3)
336
+
337
+ if fraction_digits > 4:
338
+ fraction_digits = self.randomizer.integer(1, 4)
339
+
340
+ if total_digits is None:
341
+ total_digits = 10 + fraction_digits
342
+
343
+ if total_digits > 10:
344
+ total_digits = self.randomizer.integer(6, total_digits - 2)
345
+
346
+ integer_digits = total_digits - fraction_digits
344
347
 
345
- rnd_int = rnd.randint(min_value, max_value)
346
- return f"{int(rnd_int / 100)}.{rnd_int % 100:02}"
348
+ # negative
349
+ min_value_fact = -(10 ** integer_digits - 1)
350
+
351
+ # positive
352
+ max_value_fact = 10 ** integer_digits - 1
353
+
354
+ min_value_fact, max_value_fact = calculate_bounds_2(min_value_fact, max_value_fact, min_value, max_value)
355
+
356
+ random_float = self.randomizer.float(min_value_fact, max_value_fact)
357
+ return f"{random_float:.{fraction_digits}f}"
347
358
 
348
359
  def _generate_float(self, min_value, max_value):
349
- rnd = self.randomizer.rnd
350
- rnd_int = rnd.uniform(min_value, max_value)
351
- rnd_int = round(rnd_int, 2)
352
- return str(rnd_int)
360
+ return self._generate_double(min_value, max_value)
353
361
 
354
362
  def _generate_double(self, min_value, max_value):
355
- return self._generate_float(min_value, max_value)
363
+ return self._generate_decimal(None, 2, min_value, max_value)
356
364
 
357
365
  def _generate_duration(self):
358
366
  raise RuntimeError("not yet implemented")
359
367
 
360
368
  def _generate_datetime(self):
361
- raise RuntimeError("not yet implemented")
369
+ random_datetime = self.randomizer.random_datetime()
370
+ formatted = random_datetime.isoformat()
371
+ return formatted
362
372
 
363
373
  def _generate_date(self):
364
- raise RuntimeError("not yet implemented")
374
+ random_date = self.randomizer.random_date()
375
+ formatted = random_date.isoformat()
376
+ return formatted
365
377
 
366
378
  def _generate_time(self):
367
- raise RuntimeError("not yet implemented")
379
+ random_time = self.randomizer.random_time()
380
+ formatted = random_time.isoformat()
381
+ return formatted
368
382
 
369
383
  def _generate_gyearmonth(self):
370
- raise RuntimeError("not yet implemented")
384
+ random_date = self.randomizer.random_date()
385
+ formatted = random_date.strftime('%Y-%m')
386
+ return formatted
371
387
 
372
388
  def _generate_gyear(self):
373
- rnd = self.randomizer.rnd
374
- return rnd.randint(2000, 2050)
389
+ return str(self.randomizer.integer(2000, 2050))
375
390
 
376
391
  def _generate_gmonthday(self):
377
- raise RuntimeError("not yet implemented")
392
+ random_date = self.randomizer.random_date()
393
+ formatted = random_date.strftime('--%m-%d')
394
+ return formatted
378
395
 
379
396
  def _generate_gday(self):
380
- raise RuntimeError("not yet implemented")
397
+ random_date = self.randomizer.random_date()
398
+ formatted = random_date.strftime('---%d')
399
+ return formatted
381
400
 
382
401
  def _generate_gmonth(self):
383
- raise RuntimeError("not yet implemented")
402
+ random_date = self.randomizer.random_date()
403
+ formatted = random_date.strftime('--%m--')
404
+ return formatted
384
405
 
385
406
  def _generate_hex_binary(self):
386
407
  raise RuntimeError("not yet implemented")
@@ -396,3 +417,40 @@ class XmlGenerator:
396
417
 
397
418
  def _generate_notation(self):
398
419
  raise RuntimeError("not yet implemented")
420
+
421
+
422
+ def calculate_bounds_1(fact_min, fact_max, config_min, config_max):
423
+ if config_min:
424
+ if fact_min is None:
425
+ fact_min = config_min
426
+ else:
427
+ new_min = max(fact_min, config_min)
428
+ if fact_max and new_min <= fact_max:
429
+ fact_min = new_min
430
+
431
+ if config_max:
432
+ if fact_max is None:
433
+ fact_max = config_max
434
+ else:
435
+ new_max = min(fact_max, config_max)
436
+ if new_max >= fact_min:
437
+ fact_max = new_max
438
+
439
+ if fact_max and fact_min and fact_max < fact_min:
440
+ fact_max = fact_min = min(fact_max, fact_min)
441
+
442
+ return fact_min, fact_max
443
+
444
+
445
+ def calculate_bounds_2(fact_min, fact_max, config_min, config_max):
446
+ if config_min is not None:
447
+ new_min = max(fact_min, config_min)
448
+ if fact_max and new_min <= fact_max:
449
+ fact_min = new_min
450
+
451
+ if config_max is not None:
452
+ new_max = min(fact_max, config_max)
453
+ if new_max >= fact_min:
454
+ fact_max = new_max
455
+
456
+ return fact_min, fact_max
@@ -1,50 +1,140 @@
1
1
  import logging
2
2
  import random
3
+ import re
3
4
  import string
4
5
  import sys
5
- from datetime import datetime, timedelta
6
+ from datetime import datetime, date, time, timedelta
7
+ from decimal import Decimal
6
8
 
9
+ import rstr
7
10
  from faker import Faker
8
11
 
9
12
  logger = logging.getLogger(__name__)
10
13
 
11
14
 
12
15
  class Randomizer:
13
- def __init__(self, seed=None):
16
+ def __init__(self, seed=None, locale='ru_RU'):
14
17
  if not seed:
15
18
  seed = random.randrange(sys.maxsize)
16
19
  logger.debug('initialize with random seed: %s', seed)
17
20
  else:
18
21
  logger.debug('initialize with provided seed: %s', seed)
19
22
 
20
- self.rnd = random.Random(seed)
21
- self.fake = Faker(locale='ru_RU')
22
- self.fake.seed_instance(seed)
23
-
24
- def ascii_string(self, min_length=-1, max_length=-1):
25
- min_length = min_length if min_length and min_length > -1 else 1
26
- max_length = max_length if max_length and max_length >= min_length else 20
27
- if max_length > 50:
28
- max_length = 50
29
- length = self.rnd.randint(min_length, max_length)
30
- # Генерация случайной строки из букв латиницы
31
- letters = string.ascii_letters # Все буквы латиницы (a-z, A-Z)
32
- return ''.join(self.rnd.choice(letters) for _ in range(length))
33
-
34
- def random_date(self, start_date: str, end_date: str) -> datetime:
35
- # Преобразуем строки в объекты datetime
23
+ self._rnd = random.Random(seed)
24
+ self._fake = Faker(locale=locale)
25
+ self._fake.seed_instance(seed)
26
+ self._rstr = rstr.Rstr(self._rnd)
27
+
28
+ def random(self):
29
+ return self._rnd.random()
30
+
31
+ def any(self, options):
32
+ return self._rnd.choice(options)
33
+
34
+ def regex(self, pattern):
35
+ xeger = self._rstr.xeger(pattern)
36
+ return re.sub(r'\s', ' ', xeger)
37
+
38
+ def uuid(self):
39
+ return self._fake.uuid4()
40
+
41
+ def integer(self, min_value, max_value):
42
+ return self._rnd.randint(min_value, max_value)
43
+
44
+ def float(self, min_value, max_value):
45
+ if isinstance(min_value, Decimal):
46
+ min_value = float(min_value)
47
+ if isinstance(max_value, Decimal):
48
+ max_value = float(max_value)
49
+ return self._rnd.uniform(min_value, max_value)
50
+
51
+ def ascii_string(self, min_length, max_length):
52
+ if min_length is None:
53
+ min_length = 1
54
+ if max_length is None:
55
+ max_length = 20
56
+
57
+ length = self._rnd.randint(min_length, max_length)
58
+ letters = string.ascii_lowercase
59
+ return ''.join(self._rnd.choice(letters) for _ in range(length)).capitalize()
60
+
61
+ def random_date(self, start_date: str = '1990-01-01', end_date: str = '2025-12-31') -> date:
62
+ start = date.fromisoformat(start_date)
63
+ end = date.fromisoformat(end_date)
64
+
65
+ delta = (end - start).days
66
+ random_days = self._rnd.randint(0, delta)
67
+ return start + timedelta(days=random_days)
68
+
69
+ def random_time(self, start_time: str = '00:00:00', end_time: str = '23:59:59') -> time:
70
+ start = time.fromisoformat(start_time)
71
+ end = time.fromisoformat(end_time)
72
+
73
+ random_h = self._rnd.randint(start.hour, end.hour)
74
+ random_m = self._rnd.randint(start.minute, end.minute)
75
+ random_s = self._rnd.randint(start.second, end.second)
76
+
77
+ return time(hour=random_h, minute=random_m, second=random_s)
78
+
79
+ def random_datetime(self, start_date: str = '1990-01-01', end_date: str = '2025-12-31') -> datetime:
36
80
  start = datetime.strptime(start_date, "%Y-%m-%d")
37
81
  end = datetime.strptime(end_date, "%Y-%m-%d")
38
82
 
39
- # Вычисляем разницу в днях между начальной и конечной датой
40
83
  delta = (end - start).days
84
+ random_days = self._rnd.randint(0, delta)
85
+ return start + timedelta(days=random_days)
41
86
 
42
- # Генерируем случайное количество дней в пределах delta
43
- random_days = self.rnd.randint(0, delta)
87
+ def last_name(self):
88
+ return self._fake.last_name_male()
89
+
90
+ def first_name(self):
91
+ return self._fake.first_name_male()
92
+
93
+ def middle_name(self):
94
+ return self._fake.middle_name_male()
95
+
96
+ def address_text(self):
97
+ return self._fake.address()
98
+
99
+ def administrative_unit(self):
100
+ return self._fake.administrative_unit()
101
+
102
+ def house_number(self):
103
+ return self._fake.building_number()
104
+
105
+ def city_name(self):
106
+ return self._fake.city_name() if hasattr(self._fake, 'city_name') else self._fake.city()
44
107
 
45
- # Добавляем случайное количество дней к начальной дате
46
- return start + timedelta(days=random_days)
108
+ def country(self):
109
+ return self._fake.country()
110
+
111
+ def postcode(self):
112
+ return self._fake.postcode()
113
+
114
+ def company_name(self):
115
+ return self._fake.company()
116
+
117
+ def bank_name(self):
118
+ return self._fake.bank()
119
+
120
+ def phone_number(self):
121
+ return self._fake.phone_number()
122
+
123
+ def inn_fl(self):
124
+ return self._fake.individuals_inn()
125
+
126
+ def inn_ul(self):
127
+ return self._fake.businesses_inn()
128
+
129
+ def ogrn_ip(self):
130
+ return self._fake.individuals_ogrn()
131
+
132
+ def ogrn_fl(self):
133
+ return self._fake.businesses_ogrn()
134
+
135
+ def kpp(self):
136
+ return self._fake.kpp()
47
137
 
48
138
  def snils_formatted(self):
49
- snils = self.fake.snils()
139
+ snils = self._fake.snils()
50
140
  return f"{snils[:3]}-{snils[3:6]}-{snils[6:9]} {snils[9:]}"
@@ -1,64 +1,53 @@
1
1
  import logging
2
2
  import re
3
- import uuid
4
-
5
- import rstr
6
3
 
7
4
  from xmlgenerator.randomization import Randomizer
8
5
 
9
6
  __all__ = ['Substitutor']
10
7
 
11
- _pattern = re.compile(pattern=r'\{\{\s*(?:(?P<function>\S*?)(?:\(\s*(?P<argument>[^)]*)\s*\))?\s*(?:\|\s*(?P<modifier>.*?))?)?\s*}}')
8
+ _pattern = re.compile(
9
+ r'\{\{\s*(?:(?P<function>\S*?)(?:\(\s*(?P<argument>[^)]*)\s*\))?\s*(?:\|\s*(?P<modifier>.*?))?)?\s*}}')
12
10
 
13
11
  logger = logging.getLogger(__name__)
14
12
 
13
+
15
14
  class Substitutor:
16
15
  def __init__(self, randomizer: Randomizer):
17
- fake = randomizer.fake
18
16
  self.randomizer = randomizer
19
17
  self._local_context = {}
20
18
  self._global_context = {}
21
19
  self.providers_dict = {
22
- # Функции локального контекста
23
- "source_filename": lambda: self._local_context["source_filename"],
24
- "source_extracted": lambda: self._local_context["source_extracted"],
25
- "output_filename": lambda: self.get_output_filename(),
26
-
27
- 'uuid': lambda: str(uuid.uuid4()),
28
- "regex": lambda a: rstr.xeger(a),
29
- "number": self._rand_int,
30
- "date": self._rand_date,
31
-
32
- "last_name": fake.last_name_male,
33
- "first_name": fake.first_name_male,
34
- "middle_name": fake.middle_name_male,
35
- 'address_text': fake.address,
36
- 'administrative_unit': fake.administrative_unit,
37
- 'house_number': fake.building_number,
38
- 'city_name': fake.city_name,
39
- 'postcode': fake.postcode,
40
- 'company_name': fake.company,
41
- 'bank_name': fake.bank,
42
- 'phone_number': fake.phone_number,
43
- 'inn_fl': fake.individuals_inn,
44
- 'inn_ul': fake.businesses_inn,
45
- 'ogrn_ip': fake.individuals_ogrn,
46
- 'ogrn_fl': fake.businesses_ogrn,
47
- 'kpp': fake.kpp,
48
- 'snils_formatted': randomizer.snils_formatted,
20
+ # local scope functions
21
+ 'source_filename': lambda args: self._local_context["source_filename"],
22
+ 'source_extracted': lambda args: self._local_context["source_extracted"],
23
+ 'output_filename': lambda args: self.get_output_filename(),
24
+
25
+ 'any': lambda args: self._any(args),
26
+ 'regex': lambda args: self._regex(args),
27
+ 'uuid': lambda args: self.randomizer.uuid(),
28
+ 'number': lambda args: self._number(args),
29
+ 'date': lambda args: self._date_formatted(args),
30
+
31
+ 'last_name': lambda args: self.randomizer.last_name(),
32
+ 'first_name': lambda args: self.randomizer.first_name(),
33
+ 'middle_name': lambda args: self.randomizer.middle_name(),
34
+ 'address_text': lambda args: self.randomizer.address_text(),
35
+ 'administrative_unit': lambda args: self.randomizer.administrative_unit(),
36
+ 'house_number': lambda args: self.randomizer.house_number(),
37
+ 'city_name': lambda args: self.randomizer.city_name(),
38
+ 'country': lambda args: self.randomizer.country(),
39
+ 'postcode': lambda args: self.randomizer.postcode(),
40
+ 'company_name': lambda args: self.randomizer.company_name(),
41
+ 'bank_name': lambda args: self.randomizer.bank_name(),
42
+ 'phone_number': lambda args: self.randomizer.phone_number(),
43
+ 'inn_fl': lambda args: self.randomizer.inn_fl(),
44
+ 'inn_ul': lambda args: self.randomizer.inn_ul(),
45
+ 'ogrn_ip': lambda args: self.randomizer.ogrn_ip(),
46
+ 'ogrn_fl': lambda args: self.randomizer.ogrn_fl(),
47
+ 'kpp': lambda args: self.randomizer.kpp(),
48
+ 'snils_formatted': lambda args: self.randomizer.snils_formatted(),
49
49
  }
50
50
 
51
- def _rand_int(self, a):
52
- args = str(a).split(sep=",")
53
- return str(self.randomizer.rnd.randint(int(args[0]), int(args[1])))
54
-
55
- def _rand_date(self, a):
56
- args = str(a).split(sep=",")
57
- date_from = args[0].strip(' ').strip("'").strip('"')
58
- date_until = args[1].strip(' ').strip("'").strip('"')
59
- random_date = self.randomizer.random_date(date_from, date_until)
60
- return random_date.strftime('%Y%m%d') # TODO externalize pattern
61
-
62
51
  def reset_context(self, xsd_filename, config_local):
63
52
  self._local_context.clear()
64
53
  self._local_context["source_filename"] = xsd_filename
@@ -105,7 +94,7 @@ class Substitutor:
105
94
  if not func_lambda:
106
95
  raise RuntimeError(f"Unknown function {func_name}")
107
96
 
108
- provider_func = lambda: func_lambda() if not func_args else func_lambda(func_args)
97
+ provider_func = lambda: func_lambda(func_args)
109
98
 
110
99
  match func_mod:
111
100
  case None:
@@ -126,3 +115,21 @@ class Substitutor:
126
115
 
127
116
  logger.debug('expression resolved to value: %s', result_value)
128
117
  return result_value
118
+
119
+ def _any(self, args):
120
+ separated_args = str(args).split(sep=",")
121
+ options = [i.strip(' ').strip("'").strip('"') for i in separated_args]
122
+ return self.randomizer.any(options)
123
+
124
+ def _regex(self, args):
125
+ pattern = args.strip("'").strip('"')
126
+ return self.randomizer.regex(pattern)
127
+
128
+ def _number(self, args):
129
+ left_bound, right_bound = (int(i) for i in str(args).split(sep=","))
130
+ return str(self.randomizer.integer(left_bound, right_bound))
131
+
132
+ def _date_formatted(self, args):
133
+ date_from, date_until = (i.strip(' ').strip("'").strip('"') for i in str(args).split(sep=","))
134
+ random_date = self.randomizer.random_datetime(date_from, date_until)
135
+ return random_date.strftime("%Y%m%d")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xmlgenerator
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Generates XML documents from XSD schemas
5
5
  Home-page: https://github.com/lexakimov/xmlgenerator
6
6
  Author: Alexey Akimov
@@ -136,12 +136,13 @@ positional arguments:
136
136
  options:
137
137
  -h, --help show this help message and exit
138
138
  -c, --config <config.yml> pass yaml configuration file
139
+ -l, --locale <locale> randomizer locale (default: en_US)
139
140
  -o, --output <output.xml> save output to dir or file
140
141
  -p, --pretty prettify output XML
141
142
  -v, --validation <validation> validate generated XML document (none, schema, schematron, default is schema)
142
143
  -ff, --fail-fast terminate execution on validation error (default is true)
143
144
  -e, --encoding <encoding> output XML encoding (utf-8, windows-1251, default is utf-8)
144
- --seed <seed> set randomization seed
145
+ -s, --seed <seed> set randomization seed
145
146
  -d, --debug enable debug mode
146
147
  -V, --version shows current version
147
148
  -C, --completion <shell> print shell completion script (bash, zsh, tcsh)
@@ -164,7 +165,7 @@ global:
164
165
  source_filename: ...
165
166
 
166
167
  # Filename template for saving the generated document.
167
- # Default value: `{{ source_filename }}_{{ uuid }}` (xsd schema filename + random UUID)
168
+ # Default value: `{{ source_extracted }}_{{ uuid }}` (xsd schema filename + random UUID)
168
169
  output_filename: ...
169
170
 
170
171
  # Random value generator settings
@@ -247,6 +248,7 @@ In the `value_override` sections, you can specify either a string value or speci
247
248
  | `output_filename` | String described by the `output_filename_template` configuration parameter |
248
249
  | `uuid` | Random UUIDv4 |
249
250
  | `regex("pattern")` | Random string value matching the specified regular expression |
251
+ | `any('A', "B", C)` | Random value from enumeration |
250
252
  | `number(A, B)` | Random number between A and B |
251
253
  | `date("2010-01-01", "2025-01-01")` | Random date within the specified range |
252
254
  | `last_name` | Last Name |
@@ -0,0 +1,14 @@
1
+ xmlgenerator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ xmlgenerator/arguments.py,sha256=0WHKt7eOS7M3_R-rYdp_52Q8rgArCF9VlIDkPVP_8dk,4784
3
+ xmlgenerator/bootstrap.py,sha256=7ONv9Eh46z6xd8w_V7FO1156FonAX-LHYpRrQ44PrBU,3668
4
+ xmlgenerator/configuration.py,sha256=JYhz_lONxd0faUiZHG-TVEs6yocn0s__Ulwtcvq9eDs,5946
5
+ xmlgenerator/generator.py,sha256=eBW8UlY8Bu8xh4oo5jp4_yo6OKz9T3xvQeZYC66lui4,20814
6
+ xmlgenerator/randomization.py,sha256=ekNQJYgcmDCf6uCYiZnWat7u_9kO6TAQQ8qZFIpiB7o,4205
7
+ xmlgenerator/substitution.py,sha256=1nvjQLSUS9Yo8r2T3f420Upbwm6iikUQG3lG5TQUSDU,6016
8
+ xmlgenerator/validation.py,sha256=uCJjS5YmRDlAp9C-5Rd4E2Brh6_3WOG2-dSGxDiaH14,2023
9
+ xmlgenerator-0.3.0.dist-info/licenses/LICENSE,sha256=QlXK8O3UcoAYUYwVJNgB9MSM7O94ogNo_1hd9GzznUQ,1070
10
+ xmlgenerator-0.3.0.dist-info/METADATA,sha256=ONyx3zcbuX1zQTnt-5ORWcxU1shJDYbgsyFBlEWx8E4,12870
11
+ xmlgenerator-0.3.0.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
12
+ xmlgenerator-0.3.0.dist-info/entry_points.txt,sha256=ly9hKr3o4AzFUkelBZNRzyKYf-Ld4kfcffvBu1oHq54,61
13
+ xmlgenerator-0.3.0.dist-info/top_level.txt,sha256=jr7FbMBm8MQ6j8I_-nWzQQEseXzwSCZNXgrkWuk9P4E,13
14
+ xmlgenerator-0.3.0.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- xmlgenerator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- xmlgenerator/arguments.py,sha256=E0b5ndlGAxu39OgRatqmPdWSkS5EvCTLUEMKFgezi7c,4612
3
- xmlgenerator/bootstrap.py,sha256=FW1wyWRLY9LFFWFTV0XNycFWMfkXP2jwfcfIapzFGUA,3511
4
- xmlgenerator/configuration.py,sha256=DiYUpNCjkerjr73yD209q8FJwGWgipbwWXuLkt25rQM,5903
5
- xmlgenerator/generator.py,sha256=aAXGyiqzkU5eWiOaixMs9sPvm9EZZjQVZBdQvqtyB_A,18306
6
- xmlgenerator/randomization.py,sha256=cPnUWrvylIw2GH1FAW2SvyqdQQJDcoU8AgEmakPfB-I,1993
7
- xmlgenerator/substitution.py,sha256=p0j5QXhNHBJPlDJAQ33dV8wlnWVdO05cOvh9ggP9uFY,5277
8
- xmlgenerator/validation.py,sha256=uCJjS5YmRDlAp9C-5Rd4E2Brh6_3WOG2-dSGxDiaH14,2023
9
- xmlgenerator-0.2.0.dist-info/licenses/LICENSE,sha256=QlXK8O3UcoAYUYwVJNgB9MSM7O94ogNo_1hd9GzznUQ,1070
10
- xmlgenerator-0.2.0.dist-info/METADATA,sha256=rLtz6c0Oosz79qqZvR5A7l5ZBjcnHJr2m6wWLn327CY,12653
11
- xmlgenerator-0.2.0.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
12
- xmlgenerator-0.2.0.dist-info/entry_points.txt,sha256=ly9hKr3o4AzFUkelBZNRzyKYf-Ld4kfcffvBu1oHq54,61
13
- xmlgenerator-0.2.0.dist-info/top_level.txt,sha256=jr7FbMBm8MQ6j8I_-nWzQQEseXzwSCZNXgrkWuk9P4E,13
14
- xmlgenerator-0.2.0.dist-info/RECORD,,