xml2db 0.12.2__tar.gz → 0.12.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {xml2db-0.12.2/src/xml2db.egg-info → xml2db-0.12.4}/PKG-INFO +6 -6
- {xml2db-0.12.2 → xml2db-0.12.4}/pyproject.toml +4 -4
- {xml2db-0.12.2 → xml2db-0.12.4}/src/xml2db/document.py +21 -22
- {xml2db-0.12.2 → xml2db-0.12.4}/src/xml2db/model.py +61 -45
- {xml2db-0.12.2 → xml2db-0.12.4}/src/xml2db/table/column.py +13 -19
- {xml2db-0.12.2 → xml2db-0.12.4}/src/xml2db/table/reused_table.py +1 -0
- {xml2db-0.12.2 → xml2db-0.12.4}/src/xml2db/table/table.py +3 -0
- {xml2db-0.12.2 → xml2db-0.12.4}/src/xml2db/table/transformed_table.py +7 -1
- {xml2db-0.12.2 → xml2db-0.12.4}/src/xml2db/xml_converter.py +139 -41
- {xml2db-0.12.2 → xml2db-0.12.4/src/xml2db.egg-info}/PKG-INFO +6 -6
- xml2db-0.12.4/src/xml2db.egg-info/requires.txt +10 -0
- {xml2db-0.12.2 → xml2db-0.12.4}/tests/test_conversions.py +41 -12
- {xml2db-0.12.2 → xml2db-0.12.4}/tests/test_models_output.py +7 -4
- {xml2db-0.12.2 → xml2db-0.12.4}/tests/test_roundtrip.py +5 -14
- {xml2db-0.12.2 → xml2db-0.12.4}/tests/test_validation.py +11 -7
- xml2db-0.12.2/src/xml2db.egg-info/requires.txt +0 -10
- {xml2db-0.12.2 → xml2db-0.12.4}/LICENSE +0 -0
- {xml2db-0.12.2 → xml2db-0.12.4}/README.md +0 -0
- {xml2db-0.12.2 → xml2db-0.12.4}/setup.cfg +0 -0
- {xml2db-0.12.2 → xml2db-0.12.4}/src/xml2db/__init__.py +0 -0
- {xml2db-0.12.2 → xml2db-0.12.4}/src/xml2db/exceptions.py +0 -0
- {xml2db-0.12.2 → xml2db-0.12.4}/src/xml2db/table/__init__.py +0 -0
- {xml2db-0.12.2 → xml2db-0.12.4}/src/xml2db/table/duplicated_table.py +0 -0
- {xml2db-0.12.2 → xml2db-0.12.4}/src/xml2db/table/relations.py +0 -0
- {xml2db-0.12.2 → xml2db-0.12.4}/src/xml2db.egg-info/SOURCES.txt +0 -0
- {xml2db-0.12.2 → xml2db-0.12.4}/src/xml2db.egg-info/dependency_links.txt +0 -0
- {xml2db-0.12.2 → xml2db-0.12.4}/src/xml2db.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: xml2db
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.4
|
|
4
4
|
Summary: Import complex XML files to a relational database
|
|
5
5
|
Author-email: Commission de régulation de l'énergie <opensource@cre.fr>
|
|
6
6
|
Project-URL: Documentation, https://cre-dev.github.io/xml2db
|
|
@@ -13,11 +13,11 @@ Requires-Python: >=3.9
|
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
License-File: LICENSE
|
|
15
15
|
Requires-Dist: sqlalchemy>1.4
|
|
16
|
-
Requires-Dist: xmlschema
|
|
17
|
-
Requires-Dist: lxml
|
|
16
|
+
Requires-Dist: xmlschema>=3.3.2
|
|
17
|
+
Requires-Dist: lxml>=5.1.0
|
|
18
18
|
Provides-Extra: docs
|
|
19
|
-
Requires-Dist: mkdocs-material
|
|
20
|
-
Requires-Dist: mkdocstrings-python
|
|
19
|
+
Requires-Dist: mkdocs-material>=9.5.34; extra == "docs"
|
|
20
|
+
Requires-Dist: mkdocstrings-python>=1.11.1; extra == "docs"
|
|
21
21
|
Provides-Extra: tests
|
|
22
22
|
Requires-Dist: pytest>=7.0; extra == "tests"
|
|
23
23
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "xml2db"
|
|
7
|
-
version = "0.12.
|
|
7
|
+
version = "0.12.4"
|
|
8
8
|
authors = [
|
|
9
9
|
{ name="Commission de régulation de l'énergie", email="opensource@cre.fr" },
|
|
10
10
|
]
|
|
@@ -18,12 +18,12 @@ classifiers = [
|
|
|
18
18
|
]
|
|
19
19
|
dependencies = [
|
|
20
20
|
"sqlalchemy>1.4",
|
|
21
|
-
"xmlschema
|
|
22
|
-
"lxml
|
|
21
|
+
"xmlschema>=3.3.2",
|
|
22
|
+
"lxml>=5.1.0",
|
|
23
23
|
]
|
|
24
24
|
|
|
25
25
|
[project.optional-dependencies]
|
|
26
|
-
docs = ["mkdocs-material
|
|
26
|
+
docs = ["mkdocs-material>=9.5.34", "mkdocstrings-python>=1.11.1"]
|
|
27
27
|
tests = ["pytest>=7.0"]
|
|
28
28
|
|
|
29
29
|
[project.urls]
|
|
@@ -171,17 +171,15 @@ class Document:
|
|
|
171
171
|
record["xml2db_row_number"] = row_number
|
|
172
172
|
|
|
173
173
|
# build record from fields for columns and n-1 relations
|
|
174
|
-
for field_type, key,
|
|
174
|
+
for field_type, key, field in model_table.fields:
|
|
175
175
|
if field_type == "col":
|
|
176
|
-
|
|
177
|
-
if
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
else:
|
|
184
|
-
val = content[key]
|
|
176
|
+
content_key = (
|
|
177
|
+
(f"{key[:-5]}__attr" if field.has_suffix else f"{key}__attr")
|
|
178
|
+
if field.is_attr
|
|
179
|
+
else key
|
|
180
|
+
)
|
|
181
|
+
if content_key in content:
|
|
182
|
+
val = content[content_key]
|
|
185
183
|
|
|
186
184
|
if len(val) == 1:
|
|
187
185
|
record[key] = val[0]
|
|
@@ -320,25 +318,26 @@ class Document:
|
|
|
320
318
|
record = data_index[node_type]["records"][node_pk]
|
|
321
319
|
for field_type, rel_name, rel in tb.fields:
|
|
322
320
|
if field_type == "col" and record[rel_name] is not None:
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
321
|
+
content_key = (
|
|
322
|
+
(
|
|
323
|
+
f"{rel_name[:-5]}__attr"
|
|
324
|
+
if rel.has_suffix
|
|
325
|
+
else f"{rel_name}__attr"
|
|
326
|
+
)
|
|
327
|
+
if rel.is_attr
|
|
328
|
+
else rel_name
|
|
329
|
+
)
|
|
330
|
+
if isinstance(record[rel_name], datetime.datetime):
|
|
331
|
+
content[content_key] = [
|
|
333
332
|
record[rel_name].isoformat(timespec="milliseconds")
|
|
334
333
|
]
|
|
335
334
|
else:
|
|
336
|
-
content[
|
|
335
|
+
content[content_key] = (
|
|
337
336
|
list(csv.reader([str(record[rel_name])], escapechar="\\"))[
|
|
338
337
|
0
|
|
339
338
|
]
|
|
340
339
|
if "," in str(record[rel_name])
|
|
341
|
-
else [
|
|
340
|
+
else [record[rel_name]]
|
|
342
341
|
)
|
|
343
342
|
elif (
|
|
344
343
|
field_type == "rel1"
|
|
@@ -70,7 +70,7 @@ class DataModel:
|
|
|
70
70
|
def __init__(
|
|
71
71
|
self,
|
|
72
72
|
xsd_file: str,
|
|
73
|
-
short_name: str =
|
|
73
|
+
short_name: str = "DocumentRoot",
|
|
74
74
|
long_name: str = None,
|
|
75
75
|
base_url: str = None,
|
|
76
76
|
model_config: dict = None,
|
|
@@ -226,8 +226,7 @@ class DataModel:
|
|
|
226
226
|
"""
|
|
227
227
|
# parse the XML schema recursively and hold a reference to the head table
|
|
228
228
|
root_table = self._parse_tree(
|
|
229
|
-
self.xml_schema[0] if len(self.xml_schema) == 1 else self.xml_schema
|
|
230
|
-
is_root_table=True,
|
|
229
|
+
self.xml_schema[0] if len(self.xml_schema) == 1 else self.xml_schema
|
|
231
230
|
)
|
|
232
231
|
self.root_table = root_table.type_name
|
|
233
232
|
# compute a text representation of the original data model and store it
|
|
@@ -273,9 +272,7 @@ class DataModel:
|
|
|
273
272
|
for tb in self.fk_ordered_tables:
|
|
274
273
|
tb.build_sqlalchemy_tables()
|
|
275
274
|
|
|
276
|
-
def _parse_tree(
|
|
277
|
-
self, parent_node: xmlschema.XsdElement, is_root_table: bool = False
|
|
278
|
-
):
|
|
275
|
+
def _parse_tree(self, parent_node: xmlschema.XsdElement, nodes_path: list = None):
|
|
279
276
|
"""Parse a node of an XML schema recursively and create a target data model without any simplification
|
|
280
277
|
|
|
281
278
|
We parse the XSD tree recursively to create for each node (basically a complex type in the XSD) an equivalent \
|
|
@@ -289,7 +286,7 @@ class DataModel:
|
|
|
289
286
|
|
|
290
287
|
Args:
|
|
291
288
|
parent_node: the current XSD node being parsed
|
|
292
|
-
|
|
289
|
+
nodes_path: a list of nodes types from the root node
|
|
293
290
|
"""
|
|
294
291
|
|
|
295
292
|
# find current node type and name and returns corresponding table if it already exists
|
|
@@ -301,12 +298,16 @@ class DataModel:
|
|
|
301
298
|
if parent_type is None:
|
|
302
299
|
parent_type = parent_node.local_name
|
|
303
300
|
|
|
301
|
+
nodes_path = (nodes_path if nodes_path else []) + [parent_type]
|
|
302
|
+
|
|
304
303
|
# if this type has already been encountered, stop here and return existing table
|
|
305
304
|
if parent_type in self.tables:
|
|
306
305
|
parent_table = self.tables[parent_type]
|
|
307
306
|
return parent_table
|
|
308
307
|
|
|
309
|
-
#
|
|
308
|
+
# For database tables we use element names rather than XSD types, under the assumption that they are often
|
|
309
|
+
# more meaningful given that they are the one which appear in XML documents. However, same names can be used
|
|
310
|
+
# for different XSD types, so if an element name is used for different types,
|
|
310
311
|
# we add a suffix to the name to make it unique again (using a dict to keep the name/type association)
|
|
311
312
|
parent_name = (
|
|
312
313
|
parent_node.local_name
|
|
@@ -324,7 +325,7 @@ class DataModel:
|
|
|
324
325
|
parent_table = self._create_table_model(
|
|
325
326
|
parent_name,
|
|
326
327
|
parent_type,
|
|
327
|
-
|
|
328
|
+
len(nodes_path) == 1,
|
|
328
329
|
isinstance(parent_node, xmlschema.XMLSchema),
|
|
329
330
|
)
|
|
330
331
|
self.tables[parent_type] = parent_table
|
|
@@ -363,6 +364,13 @@ class DataModel:
|
|
|
363
364
|
if elem_type.base_type
|
|
364
365
|
else recurse_parse_simple_type(elem_type.member_types)
|
|
365
366
|
)
|
|
367
|
+
if elem_type.is_list():
|
|
368
|
+
return (
|
|
369
|
+
"string",
|
|
370
|
+
0,
|
|
371
|
+
None,
|
|
372
|
+
elem_type.allow_empty,
|
|
373
|
+
)
|
|
366
374
|
if elem_type.is_restriction():
|
|
367
375
|
dt = elem_type.base_type.local_name
|
|
368
376
|
mil = elem_type.min_length
|
|
@@ -384,7 +392,12 @@ class DataModel:
|
|
|
384
392
|
else None
|
|
385
393
|
)
|
|
386
394
|
ae = ae and bt_ae if ae is not None and bt_ae is not None else None
|
|
387
|
-
if elem_type.enumeration is not None
|
|
395
|
+
if elem_type.enumeration is not None and dt in [
|
|
396
|
+
"string",
|
|
397
|
+
"NMTOKEN",
|
|
398
|
+
"duration",
|
|
399
|
+
"token",
|
|
400
|
+
]:
|
|
388
401
|
mil = min([len(val) for val in elem_type.enumeration])
|
|
389
402
|
mal = max([len(val) for val in elem_type.enumeration])
|
|
390
403
|
return dt, mil, mal, ae
|
|
@@ -410,25 +423,31 @@ class DataModel:
|
|
|
410
423
|
),
|
|
411
424
|
]
|
|
412
425
|
|
|
413
|
-
# go through item attributes and add them as columns
|
|
426
|
+
# go through item attributes and add them as columns, adding a suffix if an element with the same name exists
|
|
427
|
+
children_names = None
|
|
414
428
|
for attrib_name, attrib in parent_node.attributes.items():
|
|
429
|
+
if children_names is None:
|
|
430
|
+
children_names = [child.local_name for child in parent_node]
|
|
415
431
|
(
|
|
416
432
|
data_type,
|
|
417
433
|
min_length,
|
|
418
434
|
max_length,
|
|
419
435
|
allow_empty,
|
|
420
436
|
) = recurse_parse_simple_type([attrib.type])
|
|
437
|
+
suffix = attrib_name in children_names
|
|
421
438
|
parent_table.add_column(
|
|
422
|
-
f"{attrib_name}",
|
|
439
|
+
f"{attrib_name}{'_attr' if suffix else ''}",
|
|
423
440
|
data_type,
|
|
424
441
|
[0, 1],
|
|
425
442
|
min_length,
|
|
426
443
|
max_length,
|
|
427
444
|
True,
|
|
445
|
+
suffix,
|
|
428
446
|
False,
|
|
429
447
|
allow_empty,
|
|
430
448
|
None,
|
|
431
449
|
)
|
|
450
|
+
|
|
432
451
|
nested_containers = []
|
|
433
452
|
# go through the children to add either arguments either relations to the current element
|
|
434
453
|
for child in parent_node:
|
|
@@ -454,6 +473,7 @@ class DataModel:
|
|
|
454
473
|
if child.parent
|
|
455
474
|
and child.parent.max_occurs != 1
|
|
456
475
|
and child.parent.model != "choice"
|
|
476
|
+
and child.max_occurs == 1
|
|
457
477
|
else None
|
|
458
478
|
),
|
|
459
479
|
)
|
|
@@ -482,32 +502,39 @@ class DataModel:
|
|
|
482
502
|
max_length,
|
|
483
503
|
False,
|
|
484
504
|
False,
|
|
505
|
+
False,
|
|
485
506
|
allow_empty,
|
|
486
507
|
nested_containers[-1][1],
|
|
487
508
|
)
|
|
488
509
|
|
|
489
510
|
elif ct.is_complex():
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
else "sequence"
|
|
495
|
-
)
|
|
496
|
-
occurs = get_occurs(child)
|
|
497
|
-
if child.is_single():
|
|
498
|
-
parent_table.add_relation_1(
|
|
499
|
-
child.local_name,
|
|
500
|
-
child_table,
|
|
501
|
-
occurs,
|
|
502
|
-
nested_containers[-1][1],
|
|
511
|
+
# ignoring recursive definitions by skipping these fields
|
|
512
|
+
if child.type.local_name in nodes_path:
|
|
513
|
+
logger.warning(
|
|
514
|
+
f"type '{child.type.local_name}' contains a recursive definition"
|
|
503
515
|
)
|
|
504
516
|
else:
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
517
|
+
child_table = self._parse_tree(child, nodes_path)
|
|
518
|
+
child_table.model_group = (
|
|
519
|
+
"choice"
|
|
520
|
+
if ct.model_group and ct.model_group.model == "choice"
|
|
521
|
+
else "sequence"
|
|
510
522
|
)
|
|
523
|
+
occurs = get_occurs(child)
|
|
524
|
+
if occurs[1] == 1:
|
|
525
|
+
parent_table.add_relation_1(
|
|
526
|
+
child.local_name,
|
|
527
|
+
child_table,
|
|
528
|
+
occurs,
|
|
529
|
+
nested_containers[-1][1],
|
|
530
|
+
)
|
|
531
|
+
else:
|
|
532
|
+
parent_table.add_relation_n(
|
|
533
|
+
child.local_name,
|
|
534
|
+
child_table,
|
|
535
|
+
occurs,
|
|
536
|
+
nested_containers[-1][1],
|
|
537
|
+
)
|
|
511
538
|
else:
|
|
512
539
|
raise ValueError("unknown case; please check")
|
|
513
540
|
else:
|
|
@@ -534,6 +561,7 @@ class DataModel:
|
|
|
534
561
|
min_length,
|
|
535
562
|
max_length,
|
|
536
563
|
False,
|
|
564
|
+
False,
|
|
537
565
|
True,
|
|
538
566
|
allow_empty,
|
|
539
567
|
None,
|
|
@@ -544,31 +572,19 @@ class DataModel:
|
|
|
544
572
|
def _repr_tree(
|
|
545
573
|
self,
|
|
546
574
|
parent_table: Union[DataModelTableReused, DataModelTableDuplicated],
|
|
547
|
-
visited_nodes: Union[set, None] = None,
|
|
548
575
|
):
|
|
549
576
|
"""Build a text representation of the data model tree
|
|
550
577
|
|
|
551
578
|
Args:
|
|
552
579
|
parent_table: the current data model table object
|
|
553
580
|
"""
|
|
554
|
-
if visited_nodes is None:
|
|
555
|
-
visited_nodes = set()
|
|
556
|
-
else:
|
|
557
|
-
visited_nodes = {item for item in visited_nodes}
|
|
558
|
-
visited_nodes.add(parent_table.name)
|
|
559
581
|
for field_type, name, field in parent_table.fields:
|
|
560
582
|
if field_type == "col":
|
|
561
583
|
yield f"{field.name}{field.occurs}: {field.data_type}"
|
|
562
|
-
|
|
563
|
-
mg = " (choice)" if field.other_table.model_group == "choice" else ""
|
|
564
|
-
yield f"{field.name}{field.occurs}{mg}:{' ...' if field_type in visited_nodes else ''}"
|
|
565
|
-
if field.other_table.name not in visited_nodes:
|
|
566
|
-
for line in self._repr_tree(field.other_table, visited_nodes):
|
|
567
|
-
yield f" {line}"
|
|
568
|
-
elif field_type == "reln":
|
|
584
|
+
else:
|
|
569
585
|
mg = " (choice)" if field.other_table.model_group == "choice" else ""
|
|
570
|
-
yield f"{field.name}{field.occurs}{mg}:
|
|
571
|
-
for line in self._repr_tree(field.other_table
|
|
586
|
+
yield f"{field.name}{field.occurs}{mg}:"
|
|
587
|
+
for line in self._repr_tree(field.other_table):
|
|
572
588
|
yield f" {line}"
|
|
573
589
|
|
|
574
590
|
def get_entity_rel_diagram(self, text_context: bool = True) -> str:
|
|
@@ -32,15 +32,22 @@ def types_mapping_default(temp: bool, col: "DataModelColumn") -> Any:
|
|
|
32
32
|
"""
|
|
33
33
|
if col.occurs[1] != 1:
|
|
34
34
|
return String(8000)
|
|
35
|
-
if col.data_type in ["decimal", "float"]:
|
|
35
|
+
if col.data_type in ["decimal", "float", "double"]:
|
|
36
36
|
return Double
|
|
37
37
|
if col.data_type == "dateTime":
|
|
38
38
|
return DateTime(timezone=True)
|
|
39
|
-
if col.data_type
|
|
39
|
+
if col.data_type in [
|
|
40
|
+
"integer",
|
|
41
|
+
"int",
|
|
42
|
+
"nonPositiveInteger",
|
|
43
|
+
"nonNegativeInteger",
|
|
44
|
+
"positiveInteger",
|
|
45
|
+
"negativeInteger",
|
|
46
|
+
]:
|
|
40
47
|
return Integer
|
|
41
48
|
if col.data_type == "boolean":
|
|
42
49
|
return Boolean
|
|
43
|
-
if col.data_type
|
|
50
|
+
if col.data_type in ["short", "byte"]:
|
|
44
51
|
return SmallInteger
|
|
45
52
|
if col.data_type == "long":
|
|
46
53
|
return BigInteger
|
|
@@ -77,20 +84,10 @@ def types_mapping_mssql(temp: bool, col: "DataModelColumn") -> Any:
|
|
|
77
84
|
"""
|
|
78
85
|
if col.occurs[1] != 1:
|
|
79
86
|
return mssql.VARCHAR(8000)
|
|
80
|
-
if col.data_type in ["decimal", "float"]:
|
|
81
|
-
return Double
|
|
82
87
|
if col.data_type == "dateTime":
|
|
83
88
|
# using the DATETIMEOFFSET directly in the temporary table caused issues when inserting data in the target
|
|
84
89
|
# table with INSERT INTO SELECT converts datetime VARCHAR to DATETIMEOFFSET without errors
|
|
85
90
|
return mssql.VARCHAR(100) if temp else mssql.DATETIMEOFFSET
|
|
86
|
-
if col.data_type == "integer" or col.data_type == "int":
|
|
87
|
-
return Integer
|
|
88
|
-
if col.data_type == "boolean":
|
|
89
|
-
return Boolean
|
|
90
|
-
if col.data_type == "byte":
|
|
91
|
-
return SmallInteger
|
|
92
|
-
if col.data_type == "long":
|
|
93
|
-
return BigInteger
|
|
94
91
|
if col.data_type == "date":
|
|
95
92
|
return mssql.VARCHAR(16)
|
|
96
93
|
if col.data_type == "time":
|
|
@@ -106,12 +103,7 @@ def types_mapping_mssql(temp: bool, col: "DataModelColumn") -> Any:
|
|
|
106
103
|
if col.max_length == col.min_length:
|
|
107
104
|
return mssql.BINARY(col.max_length)
|
|
108
105
|
return mssql.VARBINARY(col.max_length)
|
|
109
|
-
|
|
110
|
-
logger.warning(
|
|
111
|
-
f"unknown type '{col.data_type}' for column '{col.name}', defaulting to VARCHAR(1000) "
|
|
112
|
-
f"(this can be overridden by providing a field type in the configuration)"
|
|
113
|
-
)
|
|
114
|
-
return mssql.VARCHAR(1000)
|
|
106
|
+
return types_mapping_default(temp, col)
|
|
115
107
|
|
|
116
108
|
|
|
117
109
|
def types_mapping_mysql(temp: bool, col: "DataModelColumn") -> Any:
|
|
@@ -167,6 +159,7 @@ class DataModelColumn:
|
|
|
167
159
|
min_length: int,
|
|
168
160
|
max_length: Union[int, None],
|
|
169
161
|
is_attr: bool,
|
|
162
|
+
has_suffix: bool,
|
|
170
163
|
is_content: bool,
|
|
171
164
|
allow_empty: bool,
|
|
172
165
|
ngroup: Union[int, None],
|
|
@@ -181,6 +174,7 @@ class DataModelColumn:
|
|
|
181
174
|
self.min_length = min_length
|
|
182
175
|
self.max_length = max_length
|
|
183
176
|
self.is_attr = is_attr
|
|
177
|
+
self.has_suffix = has_suffix
|
|
184
178
|
self.is_content = is_content
|
|
185
179
|
self.allow_empty = allow_empty
|
|
186
180
|
self.ngroup = ngroup
|
|
@@ -130,6 +130,7 @@ class DataModelTable:
|
|
|
130
130
|
min_length: int,
|
|
131
131
|
max_length: Union[int, None],
|
|
132
132
|
is_attr: bool,
|
|
133
|
+
has_suffix: bool,
|
|
133
134
|
is_content: bool,
|
|
134
135
|
allow_empty: bool,
|
|
135
136
|
ngroup: Union[str, None],
|
|
@@ -143,6 +144,7 @@ class DataModelTable:
|
|
|
143
144
|
min_length: minimum length
|
|
144
145
|
max_length: maximum length
|
|
145
146
|
is_attr: is XML attribute or element?
|
|
147
|
+
has_suffix: for an attribute, do we need the '_attr' suffix?
|
|
146
148
|
is_content: is content of a mixed type element?
|
|
147
149
|
allow_empty: is nullable?
|
|
148
150
|
ngroup: a string id signaling that the column belongs to a nested sequence
|
|
@@ -155,6 +157,7 @@ class DataModelTable:
|
|
|
155
157
|
min_length,
|
|
156
158
|
max_length,
|
|
157
159
|
is_attr,
|
|
160
|
+
has_suffix,
|
|
158
161
|
is_content,
|
|
159
162
|
allow_empty,
|
|
160
163
|
ngroup,
|
|
@@ -76,6 +76,7 @@ class DataModelTableTransformed(DataModelTable):
|
|
|
76
76
|
False,
|
|
77
77
|
False,
|
|
78
78
|
False,
|
|
79
|
+
False,
|
|
79
80
|
None,
|
|
80
81
|
self.config,
|
|
81
82
|
self.data_model,
|
|
@@ -89,6 +90,7 @@ class DataModelTableTransformed(DataModelTable):
|
|
|
89
90
|
max(max_lengths) if all(e is not None for e in max_lengths) else None,
|
|
90
91
|
False,
|
|
91
92
|
False,
|
|
93
|
+
False,
|
|
92
94
|
any(allow_empty),
|
|
93
95
|
None,
|
|
94
96
|
self.config,
|
|
@@ -193,6 +195,7 @@ class DataModelTableTransformed(DataModelTable):
|
|
|
193
195
|
child_field.min_length,
|
|
194
196
|
child_field.max_length,
|
|
195
197
|
child_field.is_attr,
|
|
198
|
+
child_field.has_suffix,
|
|
196
199
|
child_field.is_content,
|
|
197
200
|
child_field.allow_empty,
|
|
198
201
|
child_field.ngroup,
|
|
@@ -276,9 +279,12 @@ class DataModelTableTransformed(DataModelTable):
|
|
|
276
279
|
|
|
277
280
|
# if the table can be transformed, stop here
|
|
278
281
|
if self._is_table_choice_transform_applicable():
|
|
282
|
+
fields_transform = {}
|
|
283
|
+
for col in self.columns.values():
|
|
284
|
+
fields_transform[(self.type_name, col.name)] = (None, "join")
|
|
279
285
|
self._transform_to_choice()
|
|
280
286
|
self.is_simplified = True
|
|
281
|
-
return {self.type_name: "choice"},
|
|
287
|
+
return {self.type_name: "choice"}, fields_transform
|
|
282
288
|
|
|
283
289
|
# loop through field to transform them if need be
|
|
284
290
|
out_fields = []
|
|
@@ -128,31 +128,36 @@ class XMLConverter:
|
|
|
128
128
|
key
|
|
129
129
|
!= "{http://www.w3.org/2001/XMLSchema-instance}noNamespaceSchemaLocation"
|
|
130
130
|
):
|
|
131
|
-
content[key] = [val]
|
|
131
|
+
content[f"{key}__attr"] = [val.strip() if val.strip() else val]
|
|
132
132
|
|
|
133
133
|
if node.text and node.text.strip():
|
|
134
134
|
content["value"] = [node.text.strip()]
|
|
135
135
|
|
|
136
136
|
for element in node.iterchildren():
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
137
|
+
if isinstance(element.tag, str):
|
|
138
|
+
key = element.tag.split("}")[1] if "}" in element.tag else element.tag
|
|
139
|
+
node_type_key = (node_type, key)
|
|
140
|
+
value = None
|
|
141
|
+
if element.text:
|
|
142
|
+
value = (
|
|
143
|
+
element.text.strip() if element.text.strip() else element.text
|
|
144
|
+
)
|
|
145
|
+
if node_type_key not in self.model.fields_transforms:
|
|
146
|
+
# skip the node if it is not in the data model
|
|
147
|
+
continue
|
|
148
|
+
transform = self.model.fields_transforms[node_type_key][1]
|
|
149
|
+
if transform != "join":
|
|
150
|
+
value = self._parse_xml_node(
|
|
151
|
+
self.model.fields_transforms[node_type_key][0],
|
|
152
|
+
element,
|
|
153
|
+
transform not in ["elevate", "elevate_wo_prefix"],
|
|
154
|
+
hash_maps,
|
|
155
|
+
)
|
|
156
|
+
if value is not None:
|
|
157
|
+
if key in content:
|
|
158
|
+
content[key].append(value)
|
|
159
|
+
else:
|
|
160
|
+
content[key] = [value]
|
|
156
161
|
|
|
157
162
|
node = self._transform_node(node_type, content)
|
|
158
163
|
|
|
@@ -189,6 +194,7 @@ class XMLConverter:
|
|
|
189
194
|
hash_maps = {}
|
|
190
195
|
|
|
191
196
|
joined_values = False
|
|
197
|
+
skipped_nodes = 0
|
|
192
198
|
for event, element in etree.iterparse(
|
|
193
199
|
xml_file,
|
|
194
200
|
recover=recover,
|
|
@@ -196,12 +202,17 @@ class XMLConverter:
|
|
|
196
202
|
remove_blank_text=True,
|
|
197
203
|
):
|
|
198
204
|
key = element.tag.split("}")[1] if "}" in element.tag else element.tag
|
|
199
|
-
|
|
205
|
+
|
|
206
|
+
if event == "start" and skipped_nodes > 0:
|
|
207
|
+
skipped_nodes += 1
|
|
208
|
+
|
|
209
|
+
elif event == "start":
|
|
200
210
|
if nodes_stack[-1][0]:
|
|
201
211
|
node_type_key = (nodes_stack[-1][0], key)
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
212
|
+
if node_type_key not in self.model.fields_transforms:
|
|
213
|
+
skipped_nodes += 1
|
|
214
|
+
continue
|
|
215
|
+
node_type, transform = self.model.fields_transforms[node_type_key]
|
|
205
216
|
else:
|
|
206
217
|
node_type, transform = self.model.root_table, None
|
|
207
218
|
joined_values = transform == "join"
|
|
@@ -212,28 +223,41 @@ class XMLConverter:
|
|
|
212
223
|
attrib_key
|
|
213
224
|
!= "{http://www.w3.org/2001/XMLSchema-instance}noNamespaceSchemaLocation"
|
|
214
225
|
):
|
|
215
|
-
content[attrib_key] = [
|
|
226
|
+
content[f"{attrib_key}__attr"] = [
|
|
227
|
+
attrib_val.strip() if attrib_val.strip() else attrib_val
|
|
228
|
+
]
|
|
216
229
|
nodes_stack.append((node_type, content))
|
|
217
230
|
|
|
231
|
+
elif event == "end" and skipped_nodes > 0:
|
|
232
|
+
skipped_nodes -= 1
|
|
233
|
+
|
|
218
234
|
elif event == "end":
|
|
219
|
-
# joined_values was set with the previous "start" event just before
|
|
235
|
+
# joined_values was set with the previous "start" event just before and corresponds to lists of simple
|
|
236
|
+
# type elements
|
|
220
237
|
if joined_values:
|
|
238
|
+
value = None
|
|
221
239
|
if element.text:
|
|
222
|
-
if
|
|
223
|
-
|
|
240
|
+
if element.text.strip():
|
|
241
|
+
value = element.text.strip()
|
|
224
242
|
else:
|
|
225
|
-
|
|
243
|
+
value = element.text
|
|
244
|
+
if key in nodes_stack[-1][1]:
|
|
245
|
+
nodes_stack[-1][1][key].append(value)
|
|
246
|
+
else:
|
|
247
|
+
nodes_stack[-1][1][key] = [value]
|
|
248
|
+
|
|
249
|
+
# else, we have completed a complex type node
|
|
226
250
|
else:
|
|
227
251
|
node = nodes_stack.pop()
|
|
228
252
|
if nodes_stack[-1][0]:
|
|
229
253
|
node_type_key = (nodes_stack[-1][0], key)
|
|
230
|
-
node_type, transform = self.model.fields_transforms
|
|
231
|
-
node_type_key
|
|
232
|
-
|
|
254
|
+
node_type, transform = self.model.fields_transforms[
|
|
255
|
+
node_type_key
|
|
256
|
+
]
|
|
233
257
|
else:
|
|
234
258
|
node_type, transform = self.model.root_table, None
|
|
235
|
-
if element.text:
|
|
236
|
-
node[1]["value"] = [element.text]
|
|
259
|
+
if element.text and element.text.strip():
|
|
260
|
+
node[1]["value"] = [element.text.strip()]
|
|
237
261
|
node = self._transform_node(*node)
|
|
238
262
|
if transform not in ["elevate", "elevate_wo_prefix"]:
|
|
239
263
|
node = self._compute_hash_deduplicate(node, hash_maps)
|
|
@@ -278,6 +302,39 @@ class XMLConverter:
|
|
|
278
302
|
child_key, val = list(content.items())[0]
|
|
279
303
|
content = {"type": [child_key], "value": val}
|
|
280
304
|
|
|
305
|
+
# convert some simple types to python types
|
|
306
|
+
if node_type in self.model.tables:
|
|
307
|
+
table = self.model.tables[node_type]
|
|
308
|
+
for key in table.columns:
|
|
309
|
+
content_key = (
|
|
310
|
+
(
|
|
311
|
+
f"{key[:-5]}__attr"
|
|
312
|
+
if table.columns[key].has_suffix
|
|
313
|
+
else f"{key}__attr"
|
|
314
|
+
)
|
|
315
|
+
if table.columns[key].is_attr
|
|
316
|
+
else key
|
|
317
|
+
)
|
|
318
|
+
if content_key in content:
|
|
319
|
+
if table.columns[key].data_type in ["decimal", "float"]:
|
|
320
|
+
content[content_key] = [float(v) for v in content[content_key]]
|
|
321
|
+
elif table.columns[key].data_type in [
|
|
322
|
+
"integer",
|
|
323
|
+
"int",
|
|
324
|
+
"nonPositiveInteger",
|
|
325
|
+
"nonNegativeInteger",
|
|
326
|
+
"positiveInteger",
|
|
327
|
+
"negativeInteger",
|
|
328
|
+
"short",
|
|
329
|
+
"byte",
|
|
330
|
+
"long",
|
|
331
|
+
]:
|
|
332
|
+
content[content_key] = [int(v) for v in content[content_key]]
|
|
333
|
+
elif table.columns[key].data_type == "boolean":
|
|
334
|
+
content[content_key] = [
|
|
335
|
+
v == "true" or v == "1" for v in content[content_key]
|
|
336
|
+
]
|
|
337
|
+
|
|
281
338
|
return node_type, content
|
|
282
339
|
|
|
283
340
|
def _compute_hash_deduplicate(self, node: tuple, hash_maps: dict) -> tuple:
|
|
@@ -292,12 +349,28 @@ class XMLConverter:
|
|
|
292
349
|
A tuple of (node_type, content, hash) representing a node after deduplication
|
|
293
350
|
"""
|
|
294
351
|
node_type, content = node
|
|
352
|
+
if node_type not in self.model.tables:
|
|
353
|
+
return "", None, b""
|
|
295
354
|
table = self.model.tables[node_type]
|
|
296
355
|
|
|
297
356
|
h = self.model.model_config["record_hash_constructor"]()
|
|
298
|
-
for field_type, name,
|
|
357
|
+
for field_type, name, field in table.fields:
|
|
299
358
|
if field_type == "col":
|
|
300
|
-
|
|
359
|
+
if field.is_attr:
|
|
360
|
+
h.update(
|
|
361
|
+
str(
|
|
362
|
+
content.get(
|
|
363
|
+
(
|
|
364
|
+
f"{name[:-5]}__attr"
|
|
365
|
+
if field.has_suffix
|
|
366
|
+
else f"{name}__attr"
|
|
367
|
+
),
|
|
368
|
+
None,
|
|
369
|
+
)
|
|
370
|
+
).encode("utf-8")
|
|
371
|
+
)
|
|
372
|
+
else:
|
|
373
|
+
h.update(str(content.get(name, None)).encode("utf-8"))
|
|
301
374
|
elif field_type == "rel1":
|
|
302
375
|
h.update(content[name][0][2] if name in content else b"")
|
|
303
376
|
elif field_type == "reln":
|
|
@@ -419,13 +492,37 @@ class XMLConverter:
|
|
|
419
492
|
attributes = {}
|
|
420
493
|
text_content = None
|
|
421
494
|
if field_type == "col":
|
|
422
|
-
|
|
495
|
+
content_key = (
|
|
496
|
+
(
|
|
497
|
+
f"{rel_name[:-5]}__attr"
|
|
498
|
+
if rel.has_suffix
|
|
499
|
+
else f"{rel_name}__attr"
|
|
500
|
+
)
|
|
501
|
+
if rel.is_attr
|
|
502
|
+
else rel_name
|
|
503
|
+
)
|
|
504
|
+
if content_key in content:
|
|
505
|
+
if rel.data_type in [
|
|
506
|
+
"decimal",
|
|
507
|
+
"float",
|
|
508
|
+
]: # remove trailing ".0" for decimal and float
|
|
509
|
+
val = str(content[content_key][0])
|
|
510
|
+
val = [val.rstrip("0").rstrip(".") if "." in val else val]
|
|
511
|
+
elif isinstance(content[content_key][0], datetime):
|
|
512
|
+
val = [
|
|
513
|
+
content[content_key][0].isoformat(timespec="milliseconds")
|
|
514
|
+
]
|
|
515
|
+
else:
|
|
516
|
+
val = content[content_key]
|
|
423
517
|
if rel.is_attr:
|
|
424
|
-
|
|
518
|
+
if rel.has_suffix:
|
|
519
|
+
attributes[rel.name_chain[-1][0][:-5]] = val[0]
|
|
520
|
+
else:
|
|
521
|
+
attributes[rel.name_chain[-1][0]] = val[0]
|
|
425
522
|
elif rel.is_content:
|
|
426
|
-
text_content =
|
|
523
|
+
text_content = val[0]
|
|
427
524
|
else:
|
|
428
|
-
for field_value in
|
|
525
|
+
for field_value in val:
|
|
429
526
|
child = etree.Element(rel.name_chain[-1][0])
|
|
430
527
|
if isinstance(field_value, datetime):
|
|
431
528
|
field_value = field_value.isoformat()
|
|
@@ -446,7 +543,8 @@ class XMLConverter:
|
|
|
446
543
|
if prev_ngroup and rel.ngroup != prev_ngroup:
|
|
447
544
|
for ngroup_children in zip_longest(*ngroup_stack):
|
|
448
545
|
for child in ngroup_children:
|
|
449
|
-
|
|
546
|
+
if child is not None:
|
|
547
|
+
nodes_stack[-1][1].append(child)
|
|
450
548
|
ngroup_stack = []
|
|
451
549
|
prev_ngroup = rel.ngroup
|
|
452
550
|
if len(children) > 0:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: xml2db
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.4
|
|
4
4
|
Summary: Import complex XML files to a relational database
|
|
5
5
|
Author-email: Commission de régulation de l'énergie <opensource@cre.fr>
|
|
6
6
|
Project-URL: Documentation, https://cre-dev.github.io/xml2db
|
|
@@ -13,11 +13,11 @@ Requires-Python: >=3.9
|
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
License-File: LICENSE
|
|
15
15
|
Requires-Dist: sqlalchemy>1.4
|
|
16
|
-
Requires-Dist: xmlschema
|
|
17
|
-
Requires-Dist: lxml
|
|
16
|
+
Requires-Dist: xmlschema>=3.3.2
|
|
17
|
+
Requires-Dist: lxml>=5.1.0
|
|
18
18
|
Provides-Extra: docs
|
|
19
|
-
Requires-Dist: mkdocs-material
|
|
20
|
-
Requires-Dist: mkdocstrings-python
|
|
19
|
+
Requires-Dist: mkdocs-material>=9.5.34; extra == "docs"
|
|
20
|
+
Requires-Dist: mkdocstrings-python>=1.11.1; extra == "docs"
|
|
21
21
|
Provides-Extra: tests
|
|
22
22
|
Requires-Dist: pytest>=7.0; extra == "tests"
|
|
23
23
|
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import os
|
|
2
|
+
import pprint
|
|
2
3
|
|
|
3
4
|
import pytest
|
|
4
5
|
from lxml import etree
|
|
5
6
|
|
|
6
7
|
from xml2db import DataModel
|
|
7
8
|
from xml2db.xml_converter import XMLConverter, remove_record_hash
|
|
9
|
+
from .conftest import list_xml_path, models_path
|
|
8
10
|
from .sample_models import models
|
|
9
11
|
|
|
10
12
|
|
|
@@ -13,19 +15,20 @@ from .sample_models import models
|
|
|
13
15
|
[
|
|
14
16
|
{**model, **version, "xml_file": xml_file}
|
|
15
17
|
for model in models
|
|
16
|
-
for xml_file in
|
|
18
|
+
for xml_file in list_xml_path(model, "xml")
|
|
19
|
+
+ list_xml_path(model, "equivalent_xml")
|
|
17
20
|
for version in model["versions"]
|
|
18
21
|
],
|
|
19
22
|
)
|
|
20
|
-
def
|
|
23
|
+
def test_iterative_recursive_parsing(test_config):
|
|
21
24
|
"""Test whether iterative and recursive parsing give same results"""
|
|
22
25
|
model = DataModel(
|
|
23
|
-
test_config["
|
|
26
|
+
str(os.path.join(models_path, test_config["id"], test_config["xsd"])),
|
|
24
27
|
short_name=test_config["id"],
|
|
25
28
|
model_config=test_config["config"],
|
|
26
29
|
)
|
|
27
30
|
converter = XMLConverter(model)
|
|
28
|
-
file_path =
|
|
31
|
+
file_path = test_config["xml_file"]
|
|
29
32
|
|
|
30
33
|
parsed_recursive = converter.parse_xml(
|
|
31
34
|
file_path, file_path, skip_validation=True, iterparse=False
|
|
@@ -42,7 +45,7 @@ def test_document_tree_parsing(test_config):
|
|
|
42
45
|
[
|
|
43
46
|
{**model, **version, "xml_file": xml_file}
|
|
44
47
|
for model in models
|
|
45
|
-
for xml_file in
|
|
48
|
+
for xml_file in list_xml_path(model, "xml")
|
|
46
49
|
for version in model["versions"]
|
|
47
50
|
],
|
|
48
51
|
)
|
|
@@ -50,22 +53,22 @@ def test_document_tree_to_flat_data(test_config):
|
|
|
50
53
|
"""A test for document tree to flat data conversion and back"""
|
|
51
54
|
|
|
52
55
|
model = DataModel(
|
|
53
|
-
test_config["
|
|
56
|
+
str(os.path.join(models_path, test_config["id"], test_config["xsd"])),
|
|
54
57
|
short_name=test_config["id"],
|
|
55
58
|
model_config=test_config["config"],
|
|
56
59
|
)
|
|
57
60
|
converter = XMLConverter(model)
|
|
58
61
|
|
|
59
|
-
file_path =
|
|
62
|
+
file_path = test_config["xml_file"]
|
|
60
63
|
|
|
61
64
|
# parse XML to document tree
|
|
62
65
|
converter.parse_xml(file_path, file_path)
|
|
63
|
-
exp_doc_tree = remove_record_hash(converter.document_tree)
|
|
66
|
+
exp_doc_tree = pprint.pformat(remove_record_hash(converter.document_tree))
|
|
64
67
|
|
|
65
68
|
# parse XML to document tree and then flat data model
|
|
66
69
|
doc = model.parse_xml(file_path)
|
|
67
70
|
# and convert it back to document tree
|
|
68
|
-
act_doc_tree = doc.flat_data_to_doc_tree()
|
|
71
|
+
act_doc_tree = pprint.pformat(doc.flat_data_to_doc_tree())
|
|
69
72
|
|
|
70
73
|
assert act_doc_tree == exp_doc_tree
|
|
71
74
|
|
|
@@ -75,7 +78,7 @@ def test_document_tree_to_flat_data(test_config):
|
|
|
75
78
|
[
|
|
76
79
|
{**model, **version, "xml_file": xml_file}
|
|
77
80
|
for model in models
|
|
78
|
-
for xml_file in
|
|
81
|
+
for xml_file in list_xml_path(model, "xml")
|
|
79
82
|
for version in model["versions"]
|
|
80
83
|
],
|
|
81
84
|
)
|
|
@@ -83,13 +86,13 @@ def test_document_tree_to_xml(test_config):
|
|
|
83
86
|
"""A test for document tree to xml conversion and back"""
|
|
84
87
|
|
|
85
88
|
model = DataModel(
|
|
86
|
-
test_config["
|
|
89
|
+
str(os.path.join(models_path, test_config["id"], test_config["xsd"])),
|
|
87
90
|
short_name=test_config["id"],
|
|
88
91
|
model_config=test_config["config"],
|
|
89
92
|
)
|
|
90
93
|
converter = XMLConverter(model)
|
|
91
94
|
|
|
92
|
-
file_path =
|
|
95
|
+
file_path = test_config["xml_file"]
|
|
93
96
|
|
|
94
97
|
# parse XML to document tree
|
|
95
98
|
converter.parse_xml(file_path, file_path)
|
|
@@ -112,3 +115,29 @@ def test_document_tree_to_xml(test_config):
|
|
|
112
115
|
ref_xml = f.read()
|
|
113
116
|
|
|
114
117
|
assert xml == ref_xml
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@pytest.mark.parametrize(
|
|
121
|
+
"test_config",
|
|
122
|
+
[
|
|
123
|
+
{**model, **version}
|
|
124
|
+
for model in models
|
|
125
|
+
for version in model["versions"]
|
|
126
|
+
if os.path.isdir(os.path.join(models_path, model["id"], "equivalent_xml"))
|
|
127
|
+
],
|
|
128
|
+
)
|
|
129
|
+
def test_equivalent_xml(test_config):
|
|
130
|
+
"""A test for xml documents which should result in the same extracted data"""
|
|
131
|
+
|
|
132
|
+
xml_files = list_xml_path(test_config, "equivalent_xml")
|
|
133
|
+
|
|
134
|
+
if len(xml_files) > 1:
|
|
135
|
+
model = DataModel(
|
|
136
|
+
str(os.path.join(models_path, test_config["id"], test_config["xsd"])),
|
|
137
|
+
short_name=test_config["id"],
|
|
138
|
+
model_config=test_config["config"],
|
|
139
|
+
)
|
|
140
|
+
ref_data = model.parse_xml(xml_files[0])
|
|
141
|
+
for xml_file in xml_files[1:]:
|
|
142
|
+
equ_data = model.parse_xml(xml_file)
|
|
143
|
+
assert ref_data.data == equ_data.data
|
|
@@ -5,6 +5,7 @@ from sqlalchemy.dialects import postgresql, mssql, mysql
|
|
|
5
5
|
|
|
6
6
|
from xml2db import DataModel
|
|
7
7
|
from .sample_models import models
|
|
8
|
+
from .conftest import models_path
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
@pytest.mark.parametrize(
|
|
@@ -19,14 +20,15 @@ def test_model_erd(test_config):
|
|
|
19
20
|
"""A test to check if generated ERD matches saved output"""
|
|
20
21
|
|
|
21
22
|
model = DataModel(
|
|
22
|
-
test_config["
|
|
23
|
+
str(os.path.join(models_path, test_config["id"], test_config["xsd"])),
|
|
23
24
|
short_name=test_config["id"],
|
|
24
25
|
model_config=test_config["config"],
|
|
25
26
|
)
|
|
26
27
|
|
|
27
28
|
expected = open(
|
|
28
29
|
os.path.join(
|
|
29
|
-
|
|
30
|
+
models_path,
|
|
31
|
+
test_config["id"],
|
|
30
32
|
f"{test_config['id']}_erd_version{test_config['version_id']}.md",
|
|
31
33
|
),
|
|
32
34
|
"r",
|
|
@@ -49,7 +51,7 @@ def test_model_ddl(test_config):
|
|
|
49
51
|
"""A test to check if generated SQL DDL matches saved output"""
|
|
50
52
|
|
|
51
53
|
model = DataModel(
|
|
52
|
-
test_config["
|
|
54
|
+
str(os.path.join(models_path, test_config["id"], test_config["xsd"])),
|
|
53
55
|
short_name=test_config["id"],
|
|
54
56
|
model_config=test_config["config"],
|
|
55
57
|
db_type=test_config["dialect"].name,
|
|
@@ -57,7 +59,8 @@ def test_model_ddl(test_config):
|
|
|
57
59
|
|
|
58
60
|
expected = open(
|
|
59
61
|
os.path.join(
|
|
60
|
-
|
|
62
|
+
models_path,
|
|
63
|
+
test_config["id"],
|
|
61
64
|
f"{test_config['id']}_ddl_{test_config['dialect'].name}_version{test_config['version_id']}.sql",
|
|
62
65
|
),
|
|
63
66
|
"r",
|
|
@@ -4,7 +4,7 @@ import pytest
|
|
|
4
4
|
from lxml import etree
|
|
5
5
|
|
|
6
6
|
from xml2db.xml_converter import XMLConverter, remove_record_hash
|
|
7
|
-
from .
|
|
7
|
+
from .conftest import list_xml_path
|
|
8
8
|
from .sample_models import models
|
|
9
9
|
|
|
10
10
|
|
|
@@ -17,10 +17,7 @@ def test_database_xml_roundtrip(setup_db_model, model_config):
|
|
|
17
17
|
"""A test for roundtrip insert to the database from and to XML"""
|
|
18
18
|
|
|
19
19
|
model = setup_db_model
|
|
20
|
-
xml_files =
|
|
21
|
-
os.path.join(model_config["xml_path"], file)
|
|
22
|
-
for file in os.listdir(model_config["xml_path"])
|
|
23
|
-
]
|
|
20
|
+
xml_files = list_xml_path(model_config, "xml")
|
|
24
21
|
|
|
25
22
|
for file in xml_files:
|
|
26
23
|
# do parse and insert into the database
|
|
@@ -59,10 +56,7 @@ def test_database_document_tree_roundtrip(setup_db_model, model_config):
|
|
|
59
56
|
"""A test for roundtrip insert to the database from and to document tree"""
|
|
60
57
|
|
|
61
58
|
model = setup_db_model
|
|
62
|
-
xml_files =
|
|
63
|
-
os.path.join(model_config["xml_path"], file)
|
|
64
|
-
for file in os.listdir(model_config["xml_path"])
|
|
65
|
-
]
|
|
59
|
+
xml_files = list_xml_path(model_config, "xml")
|
|
66
60
|
|
|
67
61
|
for file in xml_files:
|
|
68
62
|
# do parse and insert into the database
|
|
@@ -92,10 +86,7 @@ def test_database_document_tree_roundtrip_single_load(setup_db_model, model_conf
|
|
|
92
86
|
"""A test for roundtrip insert to the database from and to document tree"""
|
|
93
87
|
|
|
94
88
|
model = setup_db_model
|
|
95
|
-
xml_files =
|
|
96
|
-
os.path.join(model_config["xml_path"], file)
|
|
97
|
-
for file in os.listdir(model_config["xml_path"])
|
|
98
|
-
]
|
|
89
|
+
xml_files = list_xml_path(model_config, "xml")
|
|
99
90
|
|
|
100
91
|
flat_data = None
|
|
101
92
|
doc = None
|
|
@@ -129,7 +120,7 @@ def test_database_document_tree_roundtrip_single_load(setup_db_model, model_conf
|
|
|
129
120
|
[
|
|
130
121
|
{**model, **version, "xml_file": xml_file}
|
|
131
122
|
for model in models
|
|
132
|
-
for xml_file in
|
|
123
|
+
for xml_file in list_xml_path(model, "xml")
|
|
133
124
|
for version in model["versions"]
|
|
134
125
|
],
|
|
135
126
|
)
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import xml.etree.ElementTree
|
|
2
|
-
|
|
3
1
|
import lxml.etree
|
|
4
2
|
import pytest
|
|
3
|
+
import os
|
|
5
4
|
|
|
6
5
|
from xml2db import DataModel
|
|
7
6
|
from .sample_models import models
|
|
7
|
+
from .conftest import models_path
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
@pytest.mark.parametrize(
|
|
@@ -27,7 +27,9 @@ from .sample_models import models
|
|
|
27
27
|
def test_invalid_xml(args: tuple):
|
|
28
28
|
|
|
29
29
|
file_name, iterparse, recover, exception = args
|
|
30
|
-
data_model = DataModel(
|
|
30
|
+
data_model = DataModel(
|
|
31
|
+
str(os.path.join(models_path, models[0]["id"], models[0]["xsd"]))
|
|
32
|
+
)
|
|
31
33
|
|
|
32
34
|
if exception is None:
|
|
33
35
|
data_model.parse_xml(
|
|
@@ -49,8 +51,8 @@ def test_invalid_xml(args: tuple):
|
|
|
49
51
|
@pytest.mark.parametrize(
|
|
50
52
|
"args",
|
|
51
53
|
[
|
|
52
|
-
("invalid", True, False,
|
|
53
|
-
("invalid", True, True,
|
|
54
|
+
("invalid", True, False, None),
|
|
55
|
+
("invalid", True, True, None),
|
|
54
56
|
("invalid", False, False, None),
|
|
55
57
|
("invalid", False, True, None),
|
|
56
58
|
("malformed_recover", True, False, lxml.etree.XMLSyntaxError),
|
|
@@ -58,7 +60,7 @@ def test_invalid_xml(args: tuple):
|
|
|
58
60
|
("malformed_recover", False, False, lxml.etree.XMLSyntaxError),
|
|
59
61
|
("malformed_recover", False, True, None),
|
|
60
62
|
("malformed_no_recover", True, False, lxml.etree.XMLSyntaxError),
|
|
61
|
-
("malformed_no_recover", True, True,
|
|
63
|
+
("malformed_no_recover", True, True, None),
|
|
62
64
|
("malformed_no_recover", False, False, lxml.etree.XMLSyntaxError),
|
|
63
65
|
("malformed_no_recover", False, True, None),
|
|
64
66
|
],
|
|
@@ -66,7 +68,9 @@ def test_invalid_xml(args: tuple):
|
|
|
66
68
|
def test_invalid_xml_skip_verify(args: tuple):
|
|
67
69
|
|
|
68
70
|
file_name, iterparse, recover, exception = args
|
|
69
|
-
data_model = DataModel(
|
|
71
|
+
data_model = DataModel(
|
|
72
|
+
str(os.path.join(models_path, models[0]["id"], models[0]["xsd"]))
|
|
73
|
+
)
|
|
70
74
|
|
|
71
75
|
if exception is None:
|
|
72
76
|
data_model.parse_xml(
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|