xml2db 0.12.0__tar.gz → 0.12.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {xml2db-0.12.0/src/xml2db.egg-info → xml2db-0.12.2}/PKG-INFO +13 -13
  2. {xml2db-0.12.0 → xml2db-0.12.2}/README.md +9 -9
  3. {xml2db-0.12.0 → xml2db-0.12.2}/pyproject.toml +3 -3
  4. {xml2db-0.12.0 → xml2db-0.12.2}/src/xml2db/document.py +1 -2
  5. {xml2db-0.12.0 → xml2db-0.12.2}/src/xml2db/model.py +35 -19
  6. {xml2db-0.12.0 → xml2db-0.12.2}/src/xml2db/table/duplicated_table.py +15 -1
  7. {xml2db-0.12.0 → xml2db-0.12.2}/src/xml2db/table/reused_table.py +17 -1
  8. {xml2db-0.12.0 → xml2db-0.12.2}/src/xml2db/xml_converter.py +1 -3
  9. {xml2db-0.12.0 → xml2db-0.12.2/src/xml2db.egg-info}/PKG-INFO +13 -13
  10. {xml2db-0.12.0 → xml2db-0.12.2}/src/xml2db.egg-info/SOURCES.txt +2 -1
  11. xml2db-0.12.2/src/xml2db.egg-info/requires.txt +10 -0
  12. {xml2db-0.12.0 → xml2db-0.12.2}/tests/test_roundtrip.py +2 -1
  13. xml2db-0.12.2/tests/test_validation.py +85 -0
  14. xml2db-0.12.0/src/xml2db.egg-info/requires.txt +0 -10
  15. {xml2db-0.12.0 → xml2db-0.12.2}/LICENSE +0 -0
  16. {xml2db-0.12.0 → xml2db-0.12.2}/setup.cfg +0 -0
  17. {xml2db-0.12.0 → xml2db-0.12.2}/src/xml2db/__init__.py +0 -0
  18. {xml2db-0.12.0 → xml2db-0.12.2}/src/xml2db/exceptions.py +0 -0
  19. {xml2db-0.12.0 → xml2db-0.12.2}/src/xml2db/table/__init__.py +0 -0
  20. {xml2db-0.12.0 → xml2db-0.12.2}/src/xml2db/table/column.py +0 -0
  21. {xml2db-0.12.0 → xml2db-0.12.2}/src/xml2db/table/relations.py +0 -0
  22. {xml2db-0.12.0 → xml2db-0.12.2}/src/xml2db/table/table.py +0 -0
  23. {xml2db-0.12.0 → xml2db-0.12.2}/src/xml2db/table/transformed_table.py +0 -0
  24. {xml2db-0.12.0 → xml2db-0.12.2}/src/xml2db.egg-info/dependency_links.txt +0 -0
  25. {xml2db-0.12.0 → xml2db-0.12.2}/src/xml2db.egg-info/top_level.txt +0 -0
  26. {xml2db-0.12.0 → xml2db-0.12.2}/tests/test_conversions.py +1 -1
  27. {xml2db-0.12.0 → xml2db-0.12.2}/tests/test_models_output.py +1 -1
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xml2db
3
- Version: 0.12.0
3
+ Version: 0.12.2
4
4
  Summary: Import complex XML files to a relational database
5
5
  Author-email: Commission de régulation de l'énergie <opensource@cre.fr>
6
6
  Project-URL: Documentation, https://cre-dev.github.io/xml2db
@@ -13,19 +13,18 @@ Requires-Python: >=3.9
13
13
  Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
15
  Requires-Dist: sqlalchemy>1.4
16
- Requires-Dist: xmlschema==3.1.0
16
+ Requires-Dist: xmlschema==3.3.2
17
17
  Requires-Dist: lxml==5.1.0
18
18
  Provides-Extra: docs
19
- Requires-Dist: mkdocs-material==9.5.23; extra == "docs"
20
- Requires-Dist: mkdocstrings-python==1.10.2; extra == "docs"
19
+ Requires-Dist: mkdocs-material==9.5.34; extra == "docs"
20
+ Requires-Dist: mkdocstrings-python==1.11.1; extra == "docs"
21
21
  Provides-Extra: tests
22
22
  Requires-Dist: pytest>=7.0; extra == "tests"
23
23
 
24
- # Loading complex XML files to a relational database
24
+ # Loading XML files into a relational database
25
25
 
26
- `xml2db` is a Python package which allows parsing and loading XML files into a relational database. It is designed to
27
- handle complex XML files which cannot be denormalized to flat tables. It works out of the box, without any custom
28
- mapping rules.
26
+ `xml2db` is a Python package which allows parsing and loading XML files into a relational database. It handles complex
27
+ XML files which cannot be denormalized to flat tables, and works out of the box, without any custom mapping rules.
29
28
 
30
29
  It can be used within an [Extract, Load, Transform](https://docs.getdbt.com/terms/elt) data pipeline pattern as it
31
30
  allows loading XML files into a relational data model which is very close from the source data, yet easy to work with.
@@ -52,7 +51,7 @@ document = data_model.parse_xml(
52
51
  document.insert_into_target_tables()
53
52
  ```
54
53
 
55
- The resulting data model will adhere closely to the XSD schema. However, `xml2db` will perform a few systematic
54
+ The data model created by `xml2db` will be close to the XSD schema. However, `xml2db` will perform a few systematic
56
55
  simplifications aimed at limiting the complexity of the resulting data model and the storage footprint. The resulting
57
56
  data model can be configured, but the above code will work out of the box, with reasonable defaults.
58
57
 
@@ -60,9 +59,9 @@ The raw data loaded into the database can then be processed if need be, using fo
60
59
  SQL views or stored procedures aimed at extracting, correcting and formatting the data into more user-friendly tables.
61
60
 
62
61
  This package uses `sqlalchemy` to interact with the database, so it should work with different database backends.
63
- Automated integration tests run against PostgreSQL, MySQL and MS SQL Server. `xml2db` does not work with SQLite. You may
64
- have to install additional packages to connect to your database (e.g. `psycopg2` for PostgreSQL, `pymysql` for MySQL or
65
- `pyodbc` for MS SQL Server).
62
+ Automated integration tests run against PostgreSQL, MySQL, MS SQL Server and DuckDB. You may have to install additional
63
+ packages to connect to your database (e.g. `psycopg2` for PostgreSQL, `pymysql` for MySQL, `pyodbc` for MS SQL Server or
64
+ `duckdb_engine` for DuckDB).
66
65
 
67
66
  **Please read the [package documentation website](https://cre-dev.github.io/xml2db) for all the details!**
68
67
 
@@ -97,7 +96,8 @@ pytest -m "not dbtest"
97
96
 
98
97
  ## Contributing
99
98
 
100
- `xml2db` is developed and used at the [French energy regulation authority (CRE)](https://www.cre.fr/) to process complex XML data.
99
+ `xml2db` is developed and used at the [French energy regulation authority (CRE)](https://www.cre.fr/) to process complex
100
+ XML data.
101
101
 
102
102
  Contributions are welcome, as well as bug reports, starting on the project's
103
103
  [issue page](https://github.com/cre-dev/xml2db/issues).
@@ -1,8 +1,7 @@
1
- # Loading complex XML files to a relational database
1
+ # Loading XML files into a relational database
2
2
 
3
- `xml2db` is a Python package which allows parsing and loading XML files into a relational database. It is designed to
4
- handle complex XML files which cannot be denormalized to flat tables. It works out of the box, without any custom
5
- mapping rules.
3
+ `xml2db` is a Python package which allows parsing and loading XML files into a relational database. It handles complex
4
+ XML files which cannot be denormalized to flat tables, and works out of the box, without any custom mapping rules.
6
5
 
7
6
  It can be used within an [Extract, Load, Transform](https://docs.getdbt.com/terms/elt) data pipeline pattern as it
8
7
  allows loading XML files into a relational data model which is very close from the source data, yet easy to work with.
@@ -29,7 +28,7 @@ document = data_model.parse_xml(
29
28
  document.insert_into_target_tables()
30
29
  ```
31
30
 
32
- The resulting data model will adhere closely to the XSD schema. However, `xml2db` will perform a few systematic
31
+ The data model created by `xml2db` will be close to the XSD schema. However, `xml2db` will perform a few systematic
33
32
  simplifications aimed at limiting the complexity of the resulting data model and the storage footprint. The resulting
34
33
  data model can be configured, but the above code will work out of the box, with reasonable defaults.
35
34
 
@@ -37,9 +36,9 @@ The raw data loaded into the database can then be processed if need be, using fo
37
36
  SQL views or stored procedures aimed at extracting, correcting and formatting the data into more user-friendly tables.
38
37
 
39
38
  This package uses `sqlalchemy` to interact with the database, so it should work with different database backends.
40
- Automated integration tests run against PostgreSQL, MySQL and MS SQL Server. `xml2db` does not work with SQLite. You may
41
- have to install additional packages to connect to your database (e.g. `psycopg2` for PostgreSQL, `pymysql` for MySQL or
42
- `pyodbc` for MS SQL Server).
39
+ Automated integration tests run against PostgreSQL, MySQL, MS SQL Server and DuckDB. You may have to install additional
40
+ packages to connect to your database (e.g. `psycopg2` for PostgreSQL, `pymysql` for MySQL, `pyodbc` for MS SQL Server or
41
+ `duckdb_engine` for DuckDB).
43
42
 
44
43
  **Please read the [package documentation website](https://cre-dev.github.io/xml2db) for all the details!**
45
44
 
@@ -74,7 +73,8 @@ pytest -m "not dbtest"
74
73
 
75
74
  ## Contributing
76
75
 
77
- `xml2db` is developed and used at the [French energy regulation authority (CRE)](https://www.cre.fr/) to process complex XML data.
76
+ `xml2db` is developed and used at the [French energy regulation authority (CRE)](https://www.cre.fr/) to process complex
77
+ XML data.
78
78
 
79
79
  Contributions are welcome, as well as bug reports, starting on the project's
80
80
  [issue page](https://github.com/cre-dev/xml2db/issues).
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "xml2db"
7
- version = "0.12.0"
7
+ version = "0.12.2"
8
8
  authors = [
9
9
  { name="Commission de régulation de l'énergie", email="opensource@cre.fr" },
10
10
  ]
@@ -18,12 +18,12 @@ classifiers = [
18
18
  ]
19
19
  dependencies = [
20
20
  "sqlalchemy>1.4",
21
- "xmlschema==3.1.0",
21
+ "xmlschema==3.3.2",
22
22
  "lxml==5.1.0",
23
23
  ]
24
24
 
25
25
  [project.optional-dependencies]
26
- docs = ["mkdocs-material==9.5.23", "mkdocstrings-python==1.10.2"]
26
+ docs = ["mkdocs-material==9.5.34", "mkdocstrings-python==1.11.1"]
27
27
  tests = ["pytest>=7.0"]
28
28
 
29
29
  [project.urls]
@@ -2,7 +2,7 @@ import csv
2
2
  import datetime
3
3
  import logging
4
4
  from io import BytesIO
5
- from typing import Union, TYPE_CHECKING, Dict
5
+ from typing import Union, TYPE_CHECKING
6
6
  from zoneinfo import ZoneInfo
7
7
  from sqlalchemy import Column, Table, text, select
8
8
  from sqlalchemy.engine import Connection
@@ -12,7 +12,6 @@ from lxml import etree
12
12
  if TYPE_CHECKING:
13
13
  from .model import DataModel
14
14
 
15
- from .exceptions import DataModelConfigError
16
15
  from .xml_converter import XMLConverter
17
16
 
18
17
  logger = logging.getLogger(__name__)
@@ -8,8 +8,10 @@ import hashlib
8
8
 
9
9
  import xmlschema
10
10
  import sqlalchemy
11
+ from lxml import etree
11
12
  from sqlalchemy import MetaData, create_engine, inspect
12
13
  from sqlalchemy.sql.ddl import CreateIndex, CreateTable
14
+ from sqlalchemy.exc import ProgrammingError
13
15
  from graphlib import TopologicalSorter
14
16
 
15
17
  from .document import Document
@@ -49,6 +51,7 @@ class DataModel:
49
51
 
50
52
  Attributes:
51
53
  xml_schema: The `xmlschema.XMLSchema` object associated with this data model
54
+ lxml_schema: The `lxml.etree.XMLSchema` object associated with this data model
52
55
  data_flow_name: A short identifier used for the data model (`short_name` argument value)
53
56
  data_flow_long_name: A longer for the data model (`long_name` argument value)
54
57
  db_schema: A database schema name to store the database tables
@@ -72,22 +75,22 @@ class DataModel:
72
75
  base_url: str = None,
73
76
  model_config: dict = None,
74
77
  connection_string: str = None,
75
- db_engine: str = None,
78
+ db_engine: sqlalchemy.Engine = None,
76
79
  db_type: str = None,
77
80
  db_schema: str = None,
78
81
  temp_prefix: str = None,
79
82
  ):
80
83
  self.model_config = self._validate_config(model_config)
81
- self.tables_config = model_config.get("tables", {})
82
-
83
- self.xml_schema = xmlschema.XMLSchema(
84
- os.path.basename(xsd_file) if base_url is None else xsd_file,
85
- base_url=(
86
- base_url
87
- if base_url is not None
88
- else os.path.normpath(os.path.dirname(xsd_file))
89
- ),
90
- )
84
+ self.tables_config = model_config.get("tables", {}) if model_config else {}
85
+
86
+ xsd_file_name = xsd_file
87
+ if base_url is None:
88
+ base_url = os.path.normpath(os.path.dirname(xsd_file))
89
+ xsd_file_name = os.path.basename(xsd_file)
90
+
91
+ self.xml_schema = xmlschema.XMLSchema(xsd_file_name, base_url=base_url)
92
+ self.lxml_schema = etree.XMLSchema(etree.parse(xsd_file))
93
+
91
94
  self.xml_converter = XMLConverter(data_model=self)
92
95
  self.data_flow_name = short_name
93
96
  self.data_flow_long_name = long_name
@@ -104,10 +107,12 @@ class DataModel:
104
107
  else:
105
108
  engine_options = {}
106
109
  if "mssql" in connection_string:
107
- engine_options = {"fast_executemany": True}
110
+ engine_options = {
111
+ "fast_executemany": True,
112
+ "isolation_level": "SERIALIZABLE",
113
+ }
108
114
  self.engine = create_engine(
109
115
  connection_string,
110
- isolation_level="SERIALIZABLE",
111
116
  **engine_options,
112
117
  )
113
118
  self.db_type = self.engine.dialect.name
@@ -647,13 +652,24 @@ class DataModel:
647
652
  You do not have to call this method explicitly when using
648
653
  [`Document.insert_into_target_tables()`](document.md#xml2db.document.Document.insert_into_target_tables).
649
654
  """
655
+
656
+ def do_create_schema():
657
+ with self.engine.connect() as conn:
658
+ conn.execute(sqlalchemy.schema.CreateSchema(self.db_schema))
659
+ conn.commit()
660
+
650
661
  if self.db_schema is not None:
651
- inspector = inspect(self.engine)
652
- if self.db_schema not in inspector.get_schema_names():
653
- with self.engine.connect() as conn:
654
- conn.execute(sqlalchemy.schema.CreateSchema(self.db_schema))
655
- conn.commit()
656
- logger.info(f"Created schema: {self.db_schema}")
662
+ if self.db_type == "duckdb":
663
+ try:
664
+ do_create_schema()
665
+ except ProgrammingError:
666
+ pass
667
+ else:
668
+ inspector = inspect(self.engine)
669
+ if self.db_schema not in inspector.get_schema_names():
670
+ do_create_schema()
671
+
672
+ logger.info(f"Created schema: {self.db_schema}")
657
673
 
658
674
  def drop_all_tables(self):
659
675
  """Drop the data model target (unprefixed) tables.
@@ -9,6 +9,7 @@ from sqlalchemy import (
9
9
  Boolean,
10
10
  select,
11
11
  and_,
12
+ Sequence,
12
13
  )
13
14
 
14
15
  from .transformed_table import DataModelTableTransformed
@@ -83,10 +84,23 @@ class DataModelTableDuplicated(DataModelTableTransformed):
83
84
  if callable(self.config.get("extra_args", []))
84
85
  else self.config.get("extra_args", [])
85
86
  )
87
+ if self.data_model.db_type == "duckdb":
88
+ pk_sequence = Sequence(f"pk_sequ_{self.name}")
89
+ pk_col = Column(
90
+ f"pk_{self.name}",
91
+ Integer,
92
+ pk_sequence,
93
+ server_default=pk_sequence.next_value(),
94
+ primary_key=True,
95
+ )
96
+ else:
97
+ pk_col = Column(
98
+ f"pk_{self.name}", Integer, primary_key=True, autoincrement=True
99
+ )
86
100
  self.table = Table(
87
101
  self.name,
88
102
  self.metadata,
89
- Column(f"pk_{self.name}", Integer, primary_key=True, autoincrement=True),
103
+ pk_col,
90
104
  PrimaryKeyConstraint(
91
105
  name=f"cx_pk_{self.name}",
92
106
  mssql_clustered=not self.config["as_columnstore"],
@@ -7,6 +7,7 @@ from sqlalchemy import (
7
7
  UniqueConstraint,
8
8
  Boolean,
9
9
  select,
10
+ Sequence,
10
11
  )
11
12
 
12
13
  from .transformed_table import DataModelTableTransformed
@@ -86,10 +87,25 @@ class DataModelTableReused(DataModelTableTransformed):
86
87
  if callable(self.config.get("extra_args", []))
87
88
  else self.config.get("extra_args", [])
88
89
  )
90
+
91
+ if self.data_model.db_type == "duckdb":
92
+ pk_sequence = Sequence(f"pk_sequ_{self.name}")
93
+ pk_col = Column(
94
+ f"pk_{self.name}",
95
+ Integer,
96
+ pk_sequence,
97
+ server_default=pk_sequence.next_value(),
98
+ primary_key=True,
99
+ )
100
+ else:
101
+ pk_col = Column(
102
+ f"pk_{self.name}", Integer, primary_key=True, autoincrement=True
103
+ )
104
+
89
105
  self.table = Table(
90
106
  self.name,
91
107
  self.metadata,
92
- Column(f"pk_{self.name}", Integer, primary_key=True, autoincrement=True),
108
+ pk_col,
93
109
  PrimaryKeyConstraint(
94
110
  name=f"cx_pk_{self.name}",
95
111
  mssql_clustered=not self.config["as_columnstore"],
@@ -6,8 +6,6 @@ from lxml import etree
6
6
  from io import BytesIO
7
7
  from itertools import zip_longest
8
8
 
9
- from .exceptions import DataModelConfigError
10
-
11
9
 
12
10
  if typing.TYPE_CHECKING:
13
11
  from .model import DataModel
@@ -76,7 +74,7 @@ class XMLConverter:
76
74
  logger.info("Skipping XML file validation")
77
75
  else:
78
76
  logger.info("Validating XML file against the schema")
79
- if not self.model.xml_schema.is_valid(xt if xt else xml_file):
77
+ if not self.model.lxml_schema.validate(xt if xt else etree.parse(xml_file)):
80
78
  logger.error(f"XML file {file_path} does not conform with the schema")
81
79
  raise ValueError(
82
80
  f"XML file {file_path} does not conform with the schema"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xml2db
3
- Version: 0.12.0
3
+ Version: 0.12.2
4
4
  Summary: Import complex XML files to a relational database
5
5
  Author-email: Commission de régulation de l'énergie <opensource@cre.fr>
6
6
  Project-URL: Documentation, https://cre-dev.github.io/xml2db
@@ -13,19 +13,18 @@ Requires-Python: >=3.9
13
13
  Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
15
  Requires-Dist: sqlalchemy>1.4
16
- Requires-Dist: xmlschema==3.1.0
16
+ Requires-Dist: xmlschema==3.3.2
17
17
  Requires-Dist: lxml==5.1.0
18
18
  Provides-Extra: docs
19
- Requires-Dist: mkdocs-material==9.5.23; extra == "docs"
20
- Requires-Dist: mkdocstrings-python==1.10.2; extra == "docs"
19
+ Requires-Dist: mkdocs-material==9.5.34; extra == "docs"
20
+ Requires-Dist: mkdocstrings-python==1.11.1; extra == "docs"
21
21
  Provides-Extra: tests
22
22
  Requires-Dist: pytest>=7.0; extra == "tests"
23
23
 
24
- # Loading complex XML files to a relational database
24
+ # Loading XML files into a relational database
25
25
 
26
- `xml2db` is a Python package which allows parsing and loading XML files into a relational database. It is designed to
27
- handle complex XML files which cannot be denormalized to flat tables. It works out of the box, without any custom
28
- mapping rules.
26
+ `xml2db` is a Python package which allows parsing and loading XML files into a relational database. It handles complex
27
+ XML files which cannot be denormalized to flat tables, and works out of the box, without any custom mapping rules.
29
28
 
30
29
  It can be used within an [Extract, Load, Transform](https://docs.getdbt.com/terms/elt) data pipeline pattern as it
31
30
  allows loading XML files into a relational data model which is very close from the source data, yet easy to work with.
@@ -52,7 +51,7 @@ document = data_model.parse_xml(
52
51
  document.insert_into_target_tables()
53
52
  ```
54
53
 
55
- The resulting data model will adhere closely to the XSD schema. However, `xml2db` will perform a few systematic
54
+ The data model created by `xml2db` will be close to the XSD schema. However, `xml2db` will perform a few systematic
56
55
  simplifications aimed at limiting the complexity of the resulting data model and the storage footprint. The resulting
57
56
  data model can be configured, but the above code will work out of the box, with reasonable defaults.
58
57
 
@@ -60,9 +59,9 @@ The raw data loaded into the database can then be processed if need be, using fo
60
59
  SQL views or stored procedures aimed at extracting, correcting and formatting the data into more user-friendly tables.
61
60
 
62
61
  This package uses `sqlalchemy` to interact with the database, so it should work with different database backends.
63
- Automated integration tests run against PostgreSQL, MySQL and MS SQL Server. `xml2db` does not work with SQLite. You may
64
- have to install additional packages to connect to your database (e.g. `psycopg2` for PostgreSQL, `pymysql` for MySQL or
65
- `pyodbc` for MS SQL Server).
62
+ Automated integration tests run against PostgreSQL, MySQL, MS SQL Server and DuckDB. You may have to install additional
63
+ packages to connect to your database (e.g. `psycopg2` for PostgreSQL, `pymysql` for MySQL, `pyodbc` for MS SQL Server or
64
+ `duckdb_engine` for DuckDB).
66
65
 
67
66
  **Please read the [package documentation website](https://cre-dev.github.io/xml2db) for all the details!**
68
67
 
@@ -97,7 +96,8 @@ pytest -m "not dbtest"
97
96
 
98
97
  ## Contributing
99
98
 
100
- `xml2db` is developed and used at the [French energy regulation authority (CRE)](https://www.cre.fr/) to process complex XML data.
99
+ `xml2db` is developed and used at the [French energy regulation authority (CRE)](https://www.cre.fr/) to process complex
100
+ XML data.
101
101
 
102
102
  Contributions are welcome, as well as bug reports, starting on the project's
103
103
  [issue page](https://github.com/cre-dev/xml2db/issues).
@@ -20,4 +20,5 @@ src/xml2db/table/table.py
20
20
  src/xml2db/table/transformed_table.py
21
21
  tests/test_conversions.py
22
22
  tests/test_models_output.py
23
- tests/test_roundtrip.py
23
+ tests/test_roundtrip.py
24
+ tests/test_validation.py
@@ -0,0 +1,10 @@
1
+ sqlalchemy>1.4
2
+ xmlschema==3.3.2
3
+ lxml==5.1.0
4
+
5
+ [docs]
6
+ mkdocs-material==9.5.34
7
+ mkdocstrings-python==1.11.1
8
+
9
+ [tests]
10
+ pytest>=7.0
@@ -1,8 +1,9 @@
1
1
  import os
2
+
2
3
  import pytest
3
4
  from lxml import etree
4
- from xml2db.xml_converter import XMLConverter, remove_record_hash
5
5
 
6
+ from xml2db.xml_converter import XMLConverter, remove_record_hash
6
7
  from .fixtures import setup_db_model, conn_string
7
8
  from .sample_models import models
8
9
 
@@ -0,0 +1,85 @@
1
+ import xml.etree.ElementTree
2
+
3
+ import lxml.etree
4
+ import pytest
5
+
6
+ from xml2db import DataModel
7
+ from .sample_models import models
8
+
9
+
10
+ @pytest.mark.parametrize(
11
+ "args",
12
+ [
13
+ ("invalid", True, False, ValueError),
14
+ ("invalid", True, True, ValueError),
15
+ ("invalid", False, False, ValueError),
16
+ ("invalid", False, True, ValueError),
17
+ ("malformed_recover", True, False, lxml.etree.XMLSyntaxError),
18
+ ("malformed_recover", True, True, None),
19
+ ("malformed_recover", False, False, lxml.etree.XMLSyntaxError),
20
+ ("malformed_recover", False, True, None),
21
+ ("malformed_no_recover", True, False, lxml.etree.XMLSyntaxError),
22
+ ("malformed_no_recover", True, True, ValueError),
23
+ ("malformed_no_recover", False, False, lxml.etree.XMLSyntaxError),
24
+ ("malformed_no_recover", False, True, ValueError),
25
+ ],
26
+ )
27
+ def test_invalid_xml(args: tuple):
28
+
29
+ file_name, iterparse, recover, exception = args
30
+ data_model = DataModel(models[0]["xsd_path"])
31
+
32
+ if exception is None:
33
+ data_model.parse_xml(
34
+ f"tests/sample_models/orders/invalid_xml/{file_name}.xml",
35
+ skip_validation=False,
36
+ iterparse=iterparse,
37
+ recover=recover,
38
+ )
39
+ else:
40
+ with pytest.raises(exception):
41
+ data_model.parse_xml(
42
+ f"tests/sample_models/orders/invalid_xml/{file_name}.xml",
43
+ skip_validation=False,
44
+ iterparse=iterparse,
45
+ recover=recover,
46
+ )
47
+
48
+
49
+ @pytest.mark.parametrize(
50
+ "args",
51
+ [
52
+ ("invalid", True, False, IndexError),
53
+ ("invalid", True, True, IndexError),
54
+ ("invalid", False, False, None),
55
+ ("invalid", False, True, None),
56
+ ("malformed_recover", True, False, lxml.etree.XMLSyntaxError),
57
+ ("malformed_recover", True, True, None),
58
+ ("malformed_recover", False, False, lxml.etree.XMLSyntaxError),
59
+ ("malformed_recover", False, True, None),
60
+ ("malformed_no_recover", True, False, lxml.etree.XMLSyntaxError),
61
+ ("malformed_no_recover", True, True, IndexError),
62
+ ("malformed_no_recover", False, False, lxml.etree.XMLSyntaxError),
63
+ ("malformed_no_recover", False, True, None),
64
+ ],
65
+ )
66
+ def test_invalid_xml_skip_verify(args: tuple):
67
+
68
+ file_name, iterparse, recover, exception = args
69
+ data_model = DataModel(models[0]["xsd_path"])
70
+
71
+ if exception is None:
72
+ data_model.parse_xml(
73
+ f"tests/sample_models/orders/invalid_xml/{file_name}.xml",
74
+ skip_validation=True,
75
+ iterparse=iterparse,
76
+ recover=recover,
77
+ )
78
+ else:
79
+ with pytest.raises(exception):
80
+ data_model.parse_xml(
81
+ f"tests/sample_models/orders/invalid_xml/{file_name}.xml",
82
+ skip_validation=True,
83
+ iterparse=iterparse,
84
+ recover=recover,
85
+ )
@@ -1,10 +0,0 @@
1
- sqlalchemy>1.4
2
- xmlschema==3.1.0
3
- lxml==5.1.0
4
-
5
- [docs]
6
- mkdocs-material==9.5.23
7
- mkdocstrings-python==1.10.2
8
-
9
- [tests]
10
- pytest>=7.0
File without changes
File without changes
File without changes
@@ -1,10 +1,10 @@
1
1
  import os
2
+
2
3
  import pytest
3
4
  from lxml import etree
4
5
 
5
6
  from xml2db import DataModel
6
7
  from xml2db.xml_converter import XMLConverter, remove_record_hash
7
-
8
8
  from .sample_models import models
9
9
 
10
10
 
@@ -1,9 +1,9 @@
1
1
  import os
2
+
2
3
  import pytest
3
4
  from sqlalchemy.dialects import postgresql, mssql, mysql
4
5
 
5
6
  from xml2db import DataModel
6
-
7
7
  from .sample_models import models
8
8
 
9
9