xml2db 0.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xml2db-0.9.0/LICENSE +19 -0
- xml2db-0.9.0/PKG-INFO +100 -0
- xml2db-0.9.0/README.md +76 -0
- xml2db-0.9.0/pyproject.toml +39 -0
- xml2db-0.9.0/setup.cfg +4 -0
- xml2db-0.9.0/src/debug.py +34 -0
- xml2db-0.9.0/src/xml2db/__init__.py +21 -0
- xml2db-0.9.0/src/xml2db/document.py +650 -0
- xml2db-0.9.0/src/xml2db/exceptions.py +4 -0
- xml2db-0.9.0/src/xml2db/model.py +619 -0
- xml2db-0.9.0/src/xml2db/table/__init__.py +5 -0
- xml2db-0.9.0/src/xml2db/table/column.py +190 -0
- xml2db-0.9.0/src/xml2db/table/duplicated_table.py +180 -0
- xml2db-0.9.0/src/xml2db/table/relations.py +243 -0
- xml2db-0.9.0/src/xml2db/table/reused_table.py +152 -0
- xml2db-0.9.0/src/xml2db/table/table.py +356 -0
- xml2db-0.9.0/src/xml2db/table/transformed_table.py +314 -0
- xml2db-0.9.0/src/xml2db/xml_converter.py +258 -0
- xml2db-0.9.0/src/xml2db.egg-info/PKG-INFO +100 -0
- xml2db-0.9.0/src/xml2db.egg-info/SOURCES.txt +23 -0
- xml2db-0.9.0/src/xml2db.egg-info/dependency_links.txt +1 -0
- xml2db-0.9.0/src/xml2db.egg-info/requires.txt +13 -0
- xml2db-0.9.0/src/xml2db.egg-info/top_level.txt +2 -0
- xml2db-0.9.0/tests/test_conversions.py +85 -0
- xml2db-0.9.0/tests/test_roundtrip.py +105 -0
xml2db-0.9.0/LICENSE
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Copyright (c) 2023 Commission de régulation de l'énergie
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
5
|
+
in the Software without restriction, including without limitation the rights
|
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
8
|
+
furnished to do so, subject to the following conditions:
|
|
9
|
+
|
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
|
11
|
+
copies or substantial portions of the Software.
|
|
12
|
+
|
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
19
|
+
SOFTWARE.
|
xml2db-0.9.0/PKG-INFO
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: xml2db
|
|
3
|
+
Version: 0.9.0
|
|
4
|
+
Summary: Import complex XML files to a relational database
|
|
5
|
+
Author-email: Commission de régulation de l'énergie <opensource@cre.fr>
|
|
6
|
+
Project-URL: Documentation, https://cre-dev.github.io/xml2db
|
|
7
|
+
Project-URL: Repository, https://github.com/cre-dev/xml2db
|
|
8
|
+
Project-URL: Issues page, https://github.com/cre-dev/xml2db/issues
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Requires-Python: >=3.8
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
License-File: LICENSE
|
|
15
|
+
Requires-Dist: sqlalchemy
|
|
16
|
+
Requires-Dist: xmlschema
|
|
17
|
+
Requires-Dist: lxml
|
|
18
|
+
Requires-Dist: graphlib_backport; python_version < "3.9"
|
|
19
|
+
Provides-Extra: docs
|
|
20
|
+
Requires-Dist: mkdocs-material; extra == "docs"
|
|
21
|
+
Requires-Dist: mkdocstrings[python]; extra == "docs"
|
|
22
|
+
Provides-Extra: tests
|
|
23
|
+
Requires-Dist: pytest; extra == "tests"
|
|
24
|
+
|
|
25
|
+
# Xml2db
|
|
26
|
+
|
|
27
|
+
`xml2db` is a Python package which allows loading XML data into a relational database. It is designed to handle complex
|
|
28
|
+
schemas which cannot be denormalized to a flat table, without any custom code.
|
|
29
|
+
|
|
30
|
+
It builds a data model (i.e. a set of database tables linked with foreign keys relationships) based on a XSD schema and
|
|
31
|
+
allows parsing and loading XML files into the database, and get them back to XML, if needed.
|
|
32
|
+
|
|
33
|
+
It is as simple as:
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
from xml2db import DataModel
|
|
37
|
+
|
|
38
|
+
# Create a data model of tables with relations based on the XSD file
|
|
39
|
+
data_model = DataModel(
|
|
40
|
+
xsd_file="path/to/file.xsd",
|
|
41
|
+
connection_string="mssql+pyodbc://server/database?driver=ODBC+Driver+17+for+SQL+Server&trusted_connection=yes",
|
|
42
|
+
)
|
|
43
|
+
# Parse an XML file based on this XSD
|
|
44
|
+
document = data_model.parse_xml(
|
|
45
|
+
xml_file="path/to/file.xml"
|
|
46
|
+
)
|
|
47
|
+
# Insert the document content into the database
|
|
48
|
+
document.insert_into_target_tables()
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
The data model will adhere closely to the XSD schema, but `xml2db` will perform simplifications aimed at limiting the
|
|
52
|
+
complexity of the resulting data model and the storage footprint.
|
|
53
|
+
|
|
54
|
+
The raw data loaded into the database can then be processed using [DBT](https://www.getdbt.com/), SQL views or
|
|
55
|
+
stored procedures aimed at extracting, correcting and formatting the data into more user-friendly tables.
|
|
56
|
+
|
|
57
|
+
`xml2db` is developed and used at the [French energy regulation authority (CRE)](https://www.cre.fr/) to process XML
|
|
58
|
+
data, notably [REMIT data](https://www.acer.europa.eu/remit/data-collection). There, it handles batches of ~500 MB XML
|
|
59
|
+
files translating into a 20+ tables data model in the database.
|
|
60
|
+
|
|
61
|
+
This package uses `sqlalchemy` to interact with the database, so it should work with different database backends. It has
|
|
62
|
+
been tested against PostgreSQL and MS SQL Server. It currently does not work with SQLite. You may have to install
|
|
63
|
+
additional packages to connect to your database (e.g. `pyodbc` which is the default connector for MS SQL Server, or
|
|
64
|
+
`psycopg2` for PostgreSQL).
|
|
65
|
+
|
|
66
|
+
**Please read the [package documentation website](https://cre-dev.github.io/xml2db) for all the details!**
|
|
67
|
+
|
|
68
|
+
## Installation
|
|
69
|
+
|
|
70
|
+
The package can be installed, preferably in a virtual environment, using `pip`:
|
|
71
|
+
|
|
72
|
+
``` bash
|
|
73
|
+
pip install xml2db
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Testing
|
|
77
|
+
|
|
78
|
+
Running the tests requires installing additional development dependencies, after cloning the repo, with:
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
pip install -e .[tests,docs]
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Run all tests with the following command:
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
python -m pytest
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Integration tests require write access to a MS SQL server database; the connection string is provided as an environment
|
|
91
|
+
variable `DB_STRING`. If you want to run only conversion tests that do not require a database you can run:
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
pytest -m "not dbtest"
|
|
95
|
+
`````
|
|
96
|
+
|
|
97
|
+
## Contributing
|
|
98
|
+
|
|
99
|
+
Contributions are more than welcome, as well as bug reports, starting with the project's
|
|
100
|
+
[issue page](https://github.com/cre-dev/xml2db/issues).
|
xml2db-0.9.0/README.md
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# Xml2db
|
|
2
|
+
|
|
3
|
+
`xml2db` is a Python package which allows loading XML data into a relational database. It is designed to handle complex
|
|
4
|
+
schemas which cannot be denormalized to a flat table, without any custom code.
|
|
5
|
+
|
|
6
|
+
It builds a data model (i.e. a set of database tables linked with foreign keys relationships) based on a XSD schema and
|
|
7
|
+
allows parsing and loading XML files into the database, and get them back to XML, if needed.
|
|
8
|
+
|
|
9
|
+
It is as simple as:
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
from xml2db import DataModel
|
|
13
|
+
|
|
14
|
+
# Create a data model of tables with relations based on the XSD file
|
|
15
|
+
data_model = DataModel(
|
|
16
|
+
xsd_file="path/to/file.xsd",
|
|
17
|
+
connection_string="mssql+pyodbc://server/database?driver=ODBC+Driver+17+for+SQL+Server&trusted_connection=yes",
|
|
18
|
+
)
|
|
19
|
+
# Parse an XML file based on this XSD
|
|
20
|
+
document = data_model.parse_xml(
|
|
21
|
+
xml_file="path/to/file.xml"
|
|
22
|
+
)
|
|
23
|
+
# Insert the document content into the database
|
|
24
|
+
document.insert_into_target_tables()
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
The data model will adhere closely to the XSD schema, but `xml2db` will perform simplifications aimed at limiting the
|
|
28
|
+
complexity of the resulting data model and the storage footprint.
|
|
29
|
+
|
|
30
|
+
The raw data loaded into the database can then be processed using [DBT](https://www.getdbt.com/), SQL views or
|
|
31
|
+
stored procedures aimed at extracting, correcting and formatting the data into more user-friendly tables.
|
|
32
|
+
|
|
33
|
+
`xml2db` is developed and used at the [French energy regulation authority (CRE)](https://www.cre.fr/) to process XML
|
|
34
|
+
data, notably [REMIT data](https://www.acer.europa.eu/remit/data-collection). There, it handles batches of ~500 MB XML
|
|
35
|
+
files translating into a 20+ tables data model in the database.
|
|
36
|
+
|
|
37
|
+
This package uses `sqlalchemy` to interact with the database, so it should work with different database backends. It has
|
|
38
|
+
been tested against PostgreSQL and MS SQL Server. It currently does not work with SQLite. You may have to install
|
|
39
|
+
additional packages to connect to your database (e.g. `pyodbc` which is the default connector for MS SQL Server, or
|
|
40
|
+
`psycopg2` for PostgreSQL).
|
|
41
|
+
|
|
42
|
+
**Please read the [package documentation website](https://cre-dev.github.io/xml2db) for all the details!**
|
|
43
|
+
|
|
44
|
+
## Installation
|
|
45
|
+
|
|
46
|
+
The package can be installed, preferably in a virtual environment, using `pip`:
|
|
47
|
+
|
|
48
|
+
``` bash
|
|
49
|
+
pip install xml2db
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Testing
|
|
53
|
+
|
|
54
|
+
Running the tests requires installing additional development dependencies, after cloning the repo, with:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pip install -e .[tests,docs]
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Run all tests with the following command:
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
python -m pytest
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Integration tests require write access to a MS SQL server database; the connection string is provided as an environment
|
|
67
|
+
variable `DB_STRING`. If you want to run only conversion tests that do not require a database you can run:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
pytest -m "not dbtest"
|
|
71
|
+
`````
|
|
72
|
+
|
|
73
|
+
## Contributing
|
|
74
|
+
|
|
75
|
+
Contributions are more than welcome, as well as bug reports, starting with the project's
|
|
76
|
+
[issue page](https://github.com/cre-dev/xml2db/issues).
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "xml2db"
|
|
7
|
+
version = "0.9.0"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name="Commission de régulation de l'énergie", email="opensource@cre.fr" },
|
|
10
|
+
]
|
|
11
|
+
description = "Import complex XML files to a relational database"
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">=3.8"
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
]
|
|
19
|
+
dependencies = [
|
|
20
|
+
"sqlalchemy",
|
|
21
|
+
"xmlschema",
|
|
22
|
+
"lxml",
|
|
23
|
+
"graphlib_backport;python_version<'3.9'",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[project.optional-dependencies]
|
|
27
|
+
docs = ["mkdocs-material", "mkdocstrings[python]"]
|
|
28
|
+
tests = ["pytest"]
|
|
29
|
+
|
|
30
|
+
[project.urls]
|
|
31
|
+
"Documentation" = "https://cre-dev.github.io/xml2db"
|
|
32
|
+
"Repository" = "https://github.com/cre-dev/xml2db"
|
|
33
|
+
"Issues page" = "https://github.com/cre-dev/xml2db/issues"
|
|
34
|
+
|
|
35
|
+
[tool.pytest.ini_options]
|
|
36
|
+
markers = [
|
|
37
|
+
"dbtest: marks tests as integration tests requiring a database backend (deselect with '-m \"not dbtest\"')",
|
|
38
|
+
]
|
|
39
|
+
junit_family = "xunit2"
|
xml2db-0.9.0/setup.cfg
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from xml2db import DataModel
|
|
3
|
+
from sqlalchemy import inspect
|
|
4
|
+
|
|
5
|
+
from tests.sample_models.models import models
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def setup():
|
|
9
|
+
model_config = models[2]
|
|
10
|
+
|
|
11
|
+
model = DataModel(
|
|
12
|
+
os.path.join("../", model_config["xsd_path"]),
|
|
13
|
+
short_name="junit",
|
|
14
|
+
model_config=model_config["versions"][0]["config"],
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
return model
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def main():
|
|
21
|
+
from sqlalchemy import create_engine
|
|
22
|
+
|
|
23
|
+
connection_string = "mssql+pyodbc://DATACRE\DEV_BASECRE/BaseCRE?driver=ODBC+Driver+17+for+SQL+Server&trusted_connection=yes"
|
|
24
|
+
|
|
25
|
+
engine = create_engine(
|
|
26
|
+
"postgresql+psycopg2://testuser:testuser@localhost:5432/testdb"
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
inspector = inspect(engine)
|
|
30
|
+
print("ok")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
if __name__ == "__main__":
|
|
34
|
+
main()
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from xml2db.model import DataModel
|
|
2
|
+
from xml2db.document import Document
|
|
3
|
+
from xml2db.table import (
|
|
4
|
+
DataModelTable,
|
|
5
|
+
DataModelTableReused,
|
|
6
|
+
DataModelTableDuplicated,
|
|
7
|
+
DataModelColumn,
|
|
8
|
+
DataModelRelationN,
|
|
9
|
+
DataModelRelation1,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"DataModel",
|
|
14
|
+
"Document",
|
|
15
|
+
"DataModelTable",
|
|
16
|
+
"DataModelTableReused",
|
|
17
|
+
"DataModelTableDuplicated",
|
|
18
|
+
"DataModelColumn",
|
|
19
|
+
"DataModelRelation1",
|
|
20
|
+
"DataModelRelationN",
|
|
21
|
+
]
|