xml2db 0.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
xml2db-0.9.0/LICENSE ADDED
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2023 Commission de régulation de l'énergie
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.
xml2db-0.9.0/PKG-INFO ADDED
@@ -0,0 +1,100 @@
1
+ Metadata-Version: 2.1
2
+ Name: xml2db
3
+ Version: 0.9.0
4
+ Summary: Import complex XML files to a relational database
5
+ Author-email: Commission de régulation de l'énergie <opensource@cre.fr>
6
+ Project-URL: Documentation, https://cre-dev.github.io/xml2db
7
+ Project-URL: Repository, https://github.com/cre-dev/xml2db
8
+ Project-URL: Issues page, https://github.com/cre-dev/xml2db/issues
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.8
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Requires-Dist: sqlalchemy
16
+ Requires-Dist: xmlschema
17
+ Requires-Dist: lxml
18
+ Requires-Dist: graphlib_backport; python_version < "3.9"
19
+ Provides-Extra: docs
20
+ Requires-Dist: mkdocs-material; extra == "docs"
21
+ Requires-Dist: mkdocstrings[python]; extra == "docs"
22
+ Provides-Extra: tests
23
+ Requires-Dist: pytest; extra == "tests"
24
+
25
+ # Xml2db
26
+
27
+ `xml2db` is a Python package which allows loading XML data into a relational database. It is designed to handle complex
28
+ schemas which cannot be denormalized to a flat table, without any custom code.
29
+
30
+ It builds a data model (i.e. a set of database tables linked with foreign keys relationships) based on a XSD schema and
31
+ allows parsing and loading XML files into the database, and get them back to XML, if needed.
32
+
33
+ It is as simple as:
34
+
35
+ ```python
36
+ from xml2db import DataModel
37
+
38
+ # Create a data model of tables with relations based on the XSD file
39
+ data_model = DataModel(
40
+ xsd_file="path/to/file.xsd",
41
+ connection_string="mssql+pyodbc://server/database?driver=ODBC+Driver+17+for+SQL+Server&trusted_connection=yes",
42
+ )
43
+ # Parse an XML file based on this XSD
44
+ document = data_model.parse_xml(
45
+ xml_file="path/to/file.xml"
46
+ )
47
+ # Insert the document content into the database
48
+ document.insert_into_target_tables()
49
+ ```
50
+
51
+ The data model will adhere closely to the XSD schema, but `xml2db` will perform simplifications aimed at limiting the
52
+ complexity of the resulting data model and the storage footprint.
53
+
54
+ The raw data loaded into the database can then be processed using [DBT](https://www.getdbt.com/), SQL views or
55
+ stored procedures aimed at extracting, correcting and formatting the data into more user-friendly tables.
56
+
57
+ `xml2db` is developed and used at the [French energy regulation authority (CRE)](https://www.cre.fr/) to process XML
58
+ data, notably [REMIT data](https://www.acer.europa.eu/remit/data-collection). There, it handles batches of ~500 MB XML
59
+ files translating into a 20+ tables data model in the database.
60
+
61
+ This package uses `sqlalchemy` to interact with the database, so it should work with different database backends. It has
62
+ been tested against PostgreSQL and MS SQL Server. It currently does not work with SQLite. You may have to install
63
+ additional packages to connect to your database (e.g. `pyodbc` which is the default connector for MS SQL Server, or
64
+ `psycopg2` for PostgreSQL).
65
+
66
+ **Please read the [package documentation website](https://cre-dev.github.io/xml2db) for all the details!**
67
+
68
+ ## Installation
69
+
70
+ The package can be installed, preferably in a virtual environment, using `pip`:
71
+
72
+ ``` bash
73
+ pip install xml2db
74
+ ```
75
+
76
+ ## Testing
77
+
78
+ Running the tests requires installing additional development dependencies, after cloning the repo, with:
79
+
80
+ ```bash
81
+ pip install -e .[tests,docs]
82
+ ```
83
+
84
+ Run all tests with the following command:
85
+
86
+ ```bash
87
+ python -m pytest
88
+ ```
89
+
90
+ Integration tests require write access to a MS SQL server database; the connection string is provided as an environment
91
+ variable `DB_STRING`. If you want to run only conversion tests that do not require a database you can run:
92
+
93
+ ```bash
94
+ pytest -m "not dbtest"
95
+ `````
96
+
97
+ ## Contributing
98
+
99
+ Contributions are more than welcome, as well as bug reports, starting with the project's
100
+ [issue page](https://github.com/cre-dev/xml2db/issues).
xml2db-0.9.0/README.md ADDED
@@ -0,0 +1,76 @@
1
+ # Xml2db
2
+
3
+ `xml2db` is a Python package which allows loading XML data into a relational database. It is designed to handle complex
4
+ schemas which cannot be denormalized to a flat table, without any custom code.
5
+
6
+ It builds a data model (i.e. a set of database tables linked with foreign keys relationships) based on a XSD schema and
7
+ allows parsing and loading XML files into the database, and get them back to XML, if needed.
8
+
9
+ It is as simple as:
10
+
11
+ ```python
12
+ from xml2db import DataModel
13
+
14
+ # Create a data model of tables with relations based on the XSD file
15
+ data_model = DataModel(
16
+ xsd_file="path/to/file.xsd",
17
+ connection_string="mssql+pyodbc://server/database?driver=ODBC+Driver+17+for+SQL+Server&trusted_connection=yes",
18
+ )
19
+ # Parse an XML file based on this XSD
20
+ document = data_model.parse_xml(
21
+ xml_file="path/to/file.xml"
22
+ )
23
+ # Insert the document content into the database
24
+ document.insert_into_target_tables()
25
+ ```
26
+
27
+ The data model will adhere closely to the XSD schema, but `xml2db` will perform simplifications aimed at limiting the
28
+ complexity of the resulting data model and the storage footprint.
29
+
30
+ The raw data loaded into the database can then be processed using [DBT](https://www.getdbt.com/), SQL views or
31
+ stored procedures aimed at extracting, correcting and formatting the data into more user-friendly tables.
32
+
33
+ `xml2db` is developed and used at the [French energy regulation authority (CRE)](https://www.cre.fr/) to process XML
34
+ data, notably [REMIT data](https://www.acer.europa.eu/remit/data-collection). There, it handles batches of ~500 MB XML
35
+ files translating into a 20+ tables data model in the database.
36
+
37
+ This package uses `sqlalchemy` to interact with the database, so it should work with different database backends. It has
38
+ been tested against PostgreSQL and MS SQL Server. It currently does not work with SQLite. You may have to install
39
+ additional packages to connect to your database (e.g. `pyodbc` which is the default connector for MS SQL Server, or
40
+ `psycopg2` for PostgreSQL).
41
+
42
+ **Please read the [package documentation website](https://cre-dev.github.io/xml2db) for all the details!**
43
+
44
+ ## Installation
45
+
46
+ The package can be installed, preferably in a virtual environment, using `pip`:
47
+
48
+ ``` bash
49
+ pip install xml2db
50
+ ```
51
+
52
+ ## Testing
53
+
54
+ Running the tests requires installing additional development dependencies, after cloning the repo, with:
55
+
56
+ ```bash
57
+ pip install -e .[tests,docs]
58
+ ```
59
+
60
+ Run all tests with the following command:
61
+
62
+ ```bash
63
+ python -m pytest
64
+ ```
65
+
66
+ Integration tests require write access to a MS SQL server database; the connection string is provided as an environment
67
+ variable `DB_STRING`. If you want to run only conversion tests that do not require a database you can run:
68
+
69
+ ```bash
70
+ pytest -m "not dbtest"
71
+ `````
72
+
73
+ ## Contributing
74
+
75
+ Contributions are more than welcome, as well as bug reports, starting with the project's
76
+ [issue page](https://github.com/cre-dev/xml2db/issues).
@@ -0,0 +1,39 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "xml2db"
7
+ version = "0.9.0"
8
+ authors = [
9
+ { name="Commission de régulation de l'énergie", email="opensource@cre.fr" },
10
+ ]
11
+ description = "Import complex XML files to a relational database"
12
+ readme = "README.md"
13
+ requires-python = ">=3.8"
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3",
16
+ "License :: OSI Approved :: MIT License",
17
+ "Operating System :: OS Independent",
18
+ ]
19
+ dependencies = [
20
+ "sqlalchemy",
21
+ "xmlschema",
22
+ "lxml",
23
+ "graphlib_backport;python_version<'3.9'",
24
+ ]
25
+
26
+ [project.optional-dependencies]
27
+ docs = ["mkdocs-material", "mkdocstrings[python]"]
28
+ tests = ["pytest"]
29
+
30
+ [project.urls]
31
+ "Documentation" = "https://cre-dev.github.io/xml2db"
32
+ "Repository" = "https://github.com/cre-dev/xml2db"
33
+ "Issues page" = "https://github.com/cre-dev/xml2db/issues"
34
+
35
+ [tool.pytest.ini_options]
36
+ markers = [
37
+ "dbtest: marks tests as integration tests requiring a database backend (deselect with '-m \"not dbtest\"')",
38
+ ]
39
+ junit_family = "xunit2"
xml2db-0.9.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,34 @@
1
+ import os
2
+ from xml2db import DataModel
3
+ from sqlalchemy import inspect
4
+
5
+ from tests.sample_models.models import models
6
+
7
+
8
+ def setup():
9
+ model_config = models[2]
10
+
11
+ model = DataModel(
12
+ os.path.join("../", model_config["xsd_path"]),
13
+ short_name="junit",
14
+ model_config=model_config["versions"][0]["config"],
15
+ )
16
+
17
+ return model
18
+
19
+
20
+ def main():
21
+ from sqlalchemy import create_engine
22
+
23
+ connection_string = "mssql+pyodbc://DATACRE\DEV_BASECRE/BaseCRE?driver=ODBC+Driver+17+for+SQL+Server&trusted_connection=yes"
24
+
25
+ engine = create_engine(
26
+ "postgresql+psycopg2://testuser:testuser@localhost:5432/testdb"
27
+ )
28
+
29
+ inspector = inspect(engine)
30
+ print("ok")
31
+
32
+
33
+ if __name__ == "__main__":
34
+ main()
@@ -0,0 +1,21 @@
1
+ from xml2db.model import DataModel
2
+ from xml2db.document import Document
3
+ from xml2db.table import (
4
+ DataModelTable,
5
+ DataModelTableReused,
6
+ DataModelTableDuplicated,
7
+ DataModelColumn,
8
+ DataModelRelationN,
9
+ DataModelRelation1,
10
+ )
11
+
12
+ __all__ = [
13
+ "DataModel",
14
+ "Document",
15
+ "DataModelTable",
16
+ "DataModelTableReused",
17
+ "DataModelTableDuplicated",
18
+ "DataModelColumn",
19
+ "DataModelRelation1",
20
+ "DataModelRelationN",
21
+ ]