xmldiffreport 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xmldiffreport-0.1.0/.gitignore +36 -0
- xmldiffreport-0.1.0/CHANGELOG.md +37 -0
- xmldiffreport-0.1.0/LICENSE +21 -0
- xmldiffreport-0.1.0/PKG-INFO +318 -0
- xmldiffreport-0.1.0/README.md +280 -0
- xmldiffreport-0.1.0/examples/README.md +49 -0
- xmldiffreport-0.1.0/examples/build_examples.py +477 -0
- xmldiffreport-0.1.0/examples/controlm/bench/patch-a.xml +118 -0
- xmldiffreport-0.1.0/examples/controlm/bench/patch-x.xml +28 -0
- xmldiffreport-0.1.0/examples/controlm/prod/hotfix-c.xml +71 -0
- xmldiffreport-0.1.0/examples/controlm/test/patch-d.xml +39 -0
- xmldiffreport-0.1.0/examples/controlm/uat/patch-b.xml +80 -0
- xmldiffreport-0.1.0/examples/controlm/uat/patch-e.xml +27 -0
- xmldiffreport-0.1.0/examples/sitemap/new/sitemap.xml +19 -0
- xmldiffreport-0.1.0/examples/sitemap/old/sitemap.xml +18 -0
- xmldiffreport-0.1.0/pyproject.toml +93 -0
- xmldiffreport-0.1.0/src/xmldiffreport/__init__.py +40 -0
- xmldiffreport-0.1.0/src/xmldiffreport/__main__.py +6 -0
- xmldiffreport-0.1.0/src/xmldiffreport/cli.py +90 -0
- xmldiffreport-0.1.0/src/xmldiffreport/core.py +366 -0
- xmldiffreport-0.1.0/src/xmldiffreport/prompts/recipe_from_xml.md +201 -0
- xmldiffreport-0.1.0/src/xmldiffreport/py.typed +0 -0
- xmldiffreport-0.1.0/src/xmldiffreport/recipes/controlm.toml +48 -0
- xmldiffreport-0.1.0/src/xmldiffreport/recipes/generic.toml +9 -0
- xmldiffreport-0.1.0/src/xmldiffreport/recipes/recipe.schema.json +53 -0
- xmldiffreport-0.1.0/src/xmldiffreport/recipes/sitemap.toml +23 -0
- xmldiffreport-0.1.0/src/xmldiffreport/report/__init__.py +28 -0
- xmldiffreport-0.1.0/src/xmldiffreport/report/base.py +77 -0
- xmldiffreport-0.1.0/src/xmldiffreport/report/html.py +158 -0
- xmldiffreport-0.1.0/src/xmldiffreport/report/markdown.py +106 -0
- xmldiffreport-0.1.0/src/xmldiffreport/scaffold.py +119 -0
- xmldiffreport-0.1.0/tests/test_controlm.py +53 -0
- xmldiffreport-0.1.0/tests/test_recipe_tooling.py +59 -0
- xmldiffreport-0.1.0/tests/test_report.py +48 -0
- xmldiffreport-0.1.0/usage/README.md +44 -0
- xmldiffreport-0.1.0/usage/collect.py +61 -0
- xmldiffreport-0.1.0/usage/config.example.toml +17 -0
- xmldiffreport-0.1.0/usage/reports/.gitkeep +0 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
.eggs/
|
|
6
|
+
build/
|
|
7
|
+
dist/
|
|
8
|
+
.venv/
|
|
9
|
+
venv/
|
|
10
|
+
.env
|
|
11
|
+
|
|
12
|
+
# Tooling
|
|
13
|
+
.pytest_cache/
|
|
14
|
+
.ruff_cache/
|
|
15
|
+
.mypy_cache/
|
|
16
|
+
.coverage
|
|
17
|
+
coverage.xml
|
|
18
|
+
htmlcov/
|
|
19
|
+
|
|
20
|
+
# MkDocs build output and social-cards cache
|
|
21
|
+
site/
|
|
22
|
+
.cache/
|
|
23
|
+
|
|
24
|
+
# Output do próprio tool
|
|
25
|
+
/reports/
|
|
26
|
+
|
|
27
|
+
# Harness de uso: nunca versionar config real, dados nem relatórios
|
|
28
|
+
usage/config.toml
|
|
29
|
+
usage/reports/*
|
|
30
|
+
!usage/reports/.gitkeep
|
|
31
|
+
usage/**/*.xml
|
|
32
|
+
|
|
33
|
+
# OS / editores
|
|
34
|
+
.DS_Store
|
|
35
|
+
.idea/
|
|
36
|
+
.vscode/
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project are documented here.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.1.0] - 2026-06-03
|
|
11
|
+
|
|
12
|
+
Initial release.
|
|
13
|
+
|
|
14
|
+
### Added
|
|
15
|
+
- N-way, recipe-driven structural & semantic XML diff engine
|
|
16
|
+
(`xmldiffreport.core`): natural-key alignment (order-independent), volatile
|
|
17
|
+
attribute filtering, inline elements, attribute-level and presence diffs.
|
|
18
|
+
- Pluggable report formats via a strategy/factory (`xmldiffreport.report`):
|
|
19
|
+
built-in **Markdown** and **HTML** renderers, selectable with `--format`
|
|
20
|
+
(also inferred from the `-o` extension). New formats are a single
|
|
21
|
+
`@register`ed `Renderer` subclass.
|
|
22
|
+
- Command-line interface (`xmldiffreport`) and a typed library API.
|
|
23
|
+
- Built-in recipes — `controlm`, `sitemap`, `generic` — plus a TOML
|
|
24
|
+
"key mini-language" for custom dialects.
|
|
25
|
+
- Recipe tooling (`xmldiffreport-recipe`): `scaffold` prints an LLM prompt that
|
|
26
|
+
generates a recipe from a sample XML; `validate` checks a recipe against the
|
|
27
|
+
shipped JSON Schema (`recipes/recipe.schema.json`). Dependency-free validator.
|
|
28
|
+
- High-level API: `diff(paths, recipe=...)` accepts a file, multiple files, and/or
|
|
29
|
+
directories (scanned recursively) and returns a `DiffReport` you can `.render()`.
|
|
30
|
+
The engine is generic — no notion of "environments".
|
|
31
|
+
- Synthetic example datasets (Control-M patches, sitemaps) and a config-driven
|
|
32
|
+
usage harness (`usage/`).
|
|
33
|
+
- MkDocs (Material) documentation, bilingual (English + Português), deployed to
|
|
34
|
+
GitHub Pages; SEO-ready (JSON-LD, sitemap, robots.txt, social cards).
|
|
35
|
+
|
|
36
|
+
[Unreleased]: https://github.com/bilouro/xmldiffreport/compare/v0.1.0...HEAD
|
|
37
|
+
[0.1.0]: https://github.com/bilouro/xmldiffreport/releases/tag/v0.1.0
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Victor H. Bilouro
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: xmldiffreport
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: N-way structural & semantic XML diff that generates human-readable Markdown reports, driven by per-dialect recipes (Control-M, sitemaps, and more).
|
|
5
|
+
Project-URL: Homepage, https://github.com/bilouro/xmldiffreport
|
|
6
|
+
Project-URL: Documentation, https://bilouro.github.io/xmldiffreport/
|
|
7
|
+
Project-URL: Repository, https://github.com/bilouro/xmldiffreport
|
|
8
|
+
Project-URL: Issues, https://github.com/bilouro/xmldiffreport/issues
|
|
9
|
+
Project-URL: Changelog, https://github.com/bilouro/xmldiffreport/blob/main/CHANGELOG.md
|
|
10
|
+
Author: Victor H. Bilouro
|
|
11
|
+
License: MIT
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Keywords: compare,control-m,controlm,devops,diff,markdown,n-way-diff,report,semantic-diff,sitemap,structural-diff,tree-diff,xml,xml-compare,xml-comparison,xml-diff
|
|
14
|
+
Classifier: Development Status :: 3 - Alpha
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: Intended Audience :: System Administrators
|
|
17
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
18
|
+
Classifier: Operating System :: OS Independent
|
|
19
|
+
Classifier: Programming Language :: Python :: 3
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
23
|
+
Classifier: Topic :: Software Development :: Version Control
|
|
24
|
+
Classifier: Topic :: Text Processing :: Markup :: XML
|
|
25
|
+
Classifier: Topic :: Utilities
|
|
26
|
+
Classifier: Typing :: Typed
|
|
27
|
+
Requires-Python: >=3.11
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: mypy>=1.11; extra == 'dev'
|
|
30
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
31
|
+
Requires-Dist: pytest>=8; extra == 'dev'
|
|
32
|
+
Requires-Dist: ruff>=0.6; extra == 'dev'
|
|
33
|
+
Provides-Extra: docs
|
|
34
|
+
Requires-Dist: mkdocs-material[imaging]; extra == 'docs'
|
|
35
|
+
Requires-Dist: mkdocs-static-i18n; extra == 'docs'
|
|
36
|
+
Requires-Dist: mkdocstrings[python]; extra == 'docs'
|
|
37
|
+
Description-Content-Type: text/markdown
|
|
38
|
+
|
|
39
|
+
# xmldiffreport
|
|
40
|
+
|
|
41
|
+
[](https://bilouro.github.io/xmldiffreport/)
|
|
42
|
+
[](https://github.com/bilouro/xmldiffreport/actions/workflows/ci.yml)
|
|
43
|
+
[](https://pypi.org/project/xmldiffreport/)
|
|
44
|
+
[](https://pypi.org/project/xmldiffreport/)
|
|
45
|
+
[](LICENSE)
|
|
46
|
+
|
|
47
|
+
📖 **Documentation: <https://bilouro.github.io/xmldiffreport/>** · [Português](https://bilouro.github.io/xmldiffreport/pt/)
|
|
48
|
+
|
|
49
|
+
**N-way structural & semantic XML diff that produces human-readable Markdown reports — driven by per-dialect recipes.**
|
|
50
|
+
|
|
51
|
+
`xmldiffreport` compares **two or more** XML files at once and tells you *what
|
|
52
|
+
actually changed*, element by element and attribute by attribute — not a noisy
|
|
53
|
+
line-by-line text diff. It aligns elements by a **natural key** (not by
|
|
54
|
+
position), ignores **volatile attributes**, and renders a clean **Markdown
|
|
55
|
+
report** with a summary table plus per-element detail.
|
|
56
|
+
|
|
57
|
+
It was born from a real problem — spotting differences between **BMC Control-M**
|
|
58
|
+
job patches flowing through `test → uat → bench → prod` — and generalized into a
|
|
59
|
+
recipe-driven engine that works on any XML dialect (Control-M exports,
|
|
60
|
+
**sitemaps**, POMs, manifests, …).
|
|
61
|
+
|
|
62
|
+
> Status: early (0.1.0), but already useful. Feedback and recipes welcome.
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
## Why not a normal diff / `xmldiff`?
|
|
67
|
+
|
|
68
|
+
A plain `diff` (or git diff) on XML lies, for three reasons:
|
|
69
|
+
|
|
70
|
+
1. **Volatile attributes** — `VERSION`, `CREATION_TIME`, `JOBISN`… change on every export with no functional meaning.
|
|
71
|
+
2. **Reordering** — children are often unordered; a reorder is not a change.
|
|
72
|
+
3. **Attribute order** inside a tag is irrelevant.
|
|
73
|
+
|
|
74
|
+
Text/edit-script diffs (like the excellent [`xmldiff`](https://pypi.org/project/xmldiff/))
|
|
75
|
+
solve part of this but are **2-way**, **algorithm-matched** (you can't say "match
|
|
76
|
+
`<JOB>` by `JOBNAME`"), and output an edit script rather than a review-friendly report.
|
|
77
|
+
|
|
78
|
+
| | xmldiffreport | xmldiff | DiffDog / Oxygen | DeltaXML |
|
|
79
|
+
|---|---|---|---|---|
|
|
80
|
+
| Match by **declared natural key** | ✅ | ❌ | ⚠️ limited | ✅ |
|
|
81
|
+
| **N-way** (3+ files at once) | ✅ | ❌ | ❌ | ❌ |
|
|
82
|
+
| **Markdown report** out of the box | ✅ | ❌ (edit script) | ⚠️ GUI | ❌ (delta XML) |
|
|
83
|
+
| Open source | ✅ | ✅ | ❌ | ❌ |
|
|
84
|
+
|
|
85
|
+
**When to use which** — choose `xmldiffreport` for **N-way**, key-aligned,
|
|
86
|
+
report-first comparison (e.g. "the same folder in uat, bench and prod"); reach
|
|
87
|
+
for `xmldiff` to produce a **patch/edit script**, DiffDog/Oxygen for **interactive
|
|
88
|
+
2-way merging**, DeltaXML for **heuristic matching of keyless documents**, and
|
|
89
|
+
`git diff` for **raw line changes** on already-normalized XML. Full breakdown:
|
|
90
|
+
[How it compares](https://bilouro.github.io/xmldiffreport/comparison/).
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## Install
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
pip install xmldiffreport
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Requires Python 3.11+ (uses the standard-library `tomllib`). **No third-party dependencies.**
|
|
101
|
+
|
|
102
|
+
## Quickstart
|
|
103
|
+
|
|
104
|
+
Compare two XML files — that's the core idea:
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
xmldiffreport old.xml new.xml -o report.md
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
`report.md` lists every element that changed, **one column per file**. No options
|
|
111
|
+
needed — it uses the `generic` recipe by default. Pass **as many files as you
|
|
112
|
+
like**; the report just grows a column each:
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
xmldiffreport v1.xml v2.xml v3.xml -o report.md
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Prefer an HTML page? Add `-f html` (or name the output `*.html`):
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
xmldiffreport old.xml new.xml -f html -o report.html
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Exit code is `1` when a **difference** is found (handy for CI), `0` otherwise.
|
|
125
|
+
|
|
126
|
+
> No files handy? `git clone` the repo and try the bundled, synthetic `examples/`:
|
|
127
|
+
> `xmldiffreport examples/sitemap/old/sitemap.xml examples/sitemap/new/sitemap.xml --recipe sitemap`
|
|
128
|
+
|
|
129
|
+
### Sharper results: recipes
|
|
130
|
+
|
|
131
|
+
The default compares any XML, but a **recipe** teaches the tool how to identify
|
|
132
|
+
elements in a specific dialect — matching "the same" element by a *key* (not by
|
|
133
|
+
position) and ignoring volatile attributes. Built-ins: `controlm`, `sitemap`,
|
|
134
|
+
`generic`; or write your own.
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
xmldiffreport old.xml new.xml --recipe sitemap -o report.md
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
→ [Writing recipes](https://bilouro.github.io/xmldiffreport/guide/recipes/) ·
|
|
141
|
+
[generate one from your XML with an LLM](https://bilouro.github.io/xmldiffreport/guide/recipe-from-llm/).
|
|
142
|
+
|
|
143
|
+
### Comparing many files (or whole directories)
|
|
144
|
+
|
|
145
|
+
Point it at **directories** too — they're scanned recursively for `*.xml`, and
|
|
146
|
+
every file found becomes a source:
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
xmldiffreport ./dump-a ./dump-b --recipe controlm -o report.md
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
Mental model: every file is a **source** (labelled by its path); a **unit** is the
|
|
153
|
+
recipe's `unit` element (e.g. a Control-M `SMART_FOLDER`); the engine compares
|
|
154
|
+
each unit across **every source that contains it** (2+). A unit that appears in
|
|
155
|
+
only one file is ignored. The tool has **no notion of "environments"** — if it
|
|
156
|
+
matters which file is production, name it so.
|
|
157
|
+
|
|
158
|
+
→ Full, worked guide with directory trees and a complete example:
|
|
159
|
+
**[Inputs & file layout](https://bilouro.github.io/xmldiffreport/guide/inputs/)**.
|
|
160
|
+
|
|
161
|
+
---
|
|
162
|
+
|
|
163
|
+
## What the report looks like
|
|
164
|
+
|
|
165
|
+
For each unit (e.g. a Control-M `SMART_FOLDER`) present in **2+ sources** with
|
|
166
|
+
differences (names below are from the synthetic `examples/`):
|
|
167
|
+
|
|
168
|
+
> ### `GLX_INGEST_DAILY` (SMART_FOLDER)
|
|
169
|
+
> Sources: `bench/patch-a.xml`, `uat/patch-b.xml`, `prod/hotfix-c.xml`
|
|
170
|
+
>
|
|
171
|
+
> **~ JOB `GLX_INGEST_LOAD`**
|
|
172
|
+
>
|
|
173
|
+
> | Element · attribute | bench/patch-a.xml | uat/patch-b.xml | prod/hotfix-c.xml |
|
|
174
|
+
> |---|---|---|---|
|
|
175
|
+
> | `CMDLINE` | …`--force` | …`--retry` | …%%P_DATE |
|
|
176
|
+
> | `MAXRERUN` | 0 | 5 | 3 |
|
|
177
|
+
> | INCOND `GLX_INGEST_STAGE-…_OK` · `AND_OR` | A | O | A |
|
|
178
|
+
> | OUTCOND `GLX_INGEST_LOAD-…_OK` · `SIGN` | - | + | + |
|
|
179
|
+
> | ON `NOTOK\|RERUN` | − | present | present |
|
|
180
|
+
|
|
181
|
+
Notice: it's **N-way** (one column per file), it shows **attribute-level**
|
|
182
|
+
changes of the *same* element (the `SIGN` flip, the `AND_OR` change), it
|
|
183
|
+
collapses identical jobs into a count, and the volatile `VERSION`/`CREATION_TIME`
|
|
184
|
+
noise is gone.
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
## Recipes
|
|
189
|
+
|
|
190
|
+
A **recipe** is a small TOML file that teaches the generic engine about one XML
|
|
191
|
+
dialect: the natural key per element and which attributes to ignore.
|
|
192
|
+
|
|
193
|
+
```toml
|
|
194
|
+
name = "controlm"
|
|
195
|
+
|
|
196
|
+
[defaults]
|
|
197
|
+
unit = "SMART_FOLDER" # the unit of comparison
|
|
198
|
+
ignore_attrs = ["VERSION", "JOBISN", "CREATION_TIME", "LAST_UPLOAD", "..."]
|
|
199
|
+
|
|
200
|
+
[elements.JOB]
|
|
201
|
+
key = ["@JOBNAME"]
|
|
202
|
+
|
|
203
|
+
[elements.OUTCOND]
|
|
204
|
+
key = ["@NAME"] # SIGN / ODATE are compared as attributes
|
|
205
|
+
|
|
206
|
+
[elements.ON] # no clear key → synthesize from CODE + DO actions
|
|
207
|
+
key = ["@CODE", "*kinds"]
|
|
208
|
+
inline = true # treat children as pseudo-attributes
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
### Key mini-language
|
|
212
|
+
|
|
213
|
+
A `key` is a list of tokens, joined by `|`:
|
|
214
|
+
|
|
215
|
+
| Token | Meaning |
|
|
216
|
+
|---|---|
|
|
217
|
+
| `@ATTR` | value of attribute `ATTR` |
|
|
218
|
+
| `#text` | the element's own text |
|
|
219
|
+
| `*tag` | the element's tag name (use for singletons compared by their text) |
|
|
220
|
+
| `child:TAG@ATTR` | attribute of a child element |
|
|
221
|
+
| `child:TAG#text` | text of a child element (e.g. sitemap `<loc>`) |
|
|
222
|
+
| `*kinds` | summary of child kinds / `DOACTION` actions (for keyless elements like `<ON>`) |
|
|
223
|
+
|
|
224
|
+
If no key is given, the engine falls back to `@NAME`, then `#text`, then a
|
|
225
|
+
composite of all attributes.
|
|
226
|
+
|
|
227
|
+
### Built-in recipes
|
|
228
|
+
|
|
229
|
+
- **`controlm`** — BMC Control-M exports (`DEFTABLE → SMART_FOLDER → JOB → INCOND/OUTCOND/QUANTITATIVE/CONTROL/ON`).
|
|
230
|
+
- **`sitemap`** — `sitemap.xml` (identity by `<loc>` text; compares `<lastmod>`/`<priority>`/`<changefreq>`).
|
|
231
|
+
- **`generic`** — no dialect knowledge (default).
|
|
232
|
+
|
|
233
|
+
Drop a `.toml` anywhere and pass its path to `--recipe` to add your own dialect.
|
|
234
|
+
|
|
235
|
+
### Generate & validate a recipe
|
|
236
|
+
|
|
237
|
+
Don't want to write one by hand? Let an LLM draft it from a sample of your XML:
|
|
238
|
+
|
|
239
|
+
```bash
|
|
240
|
+
xmldiffreport-recipe scaffold sample.xml > prompt.txt # paste prompt.txt into any LLM
|
|
241
|
+
xmldiffreport-recipe validate my-dialect.toml # check the result (ships a JSON Schema)
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
See [Generate a recipe with an LLM](https://bilouro.github.io/xmldiffreport/guide/recipe-from-llm/).
|
|
245
|
+
|
|
246
|
+
---
|
|
247
|
+
|
|
248
|
+
## Project layout — tool vs. your usage
|
|
249
|
+
|
|
250
|
+
```
|
|
251
|
+
src/xmldiffreport/ the installable TOOL (engine, recipes, CLI) — generic, reusable
|
|
252
|
+
examples/ synthetic datasets + generator (no real data)
|
|
253
|
+
usage/ a config-driven HARNESS to run the tool on YOUR files
|
|
254
|
+
tests/ pytest suite
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
The **tool** in `src/` knows nothing about your folders. The **`usage/`** folder
|
|
258
|
+
is the thin layer you adapt: a `config.toml` listing the inputs (files/dirs), a
|
|
259
|
+
`report_dir`, and a `collect.py` that runs the diff and writes the report.
|
|
260
|
+
|
|
261
|
+
```bash
|
|
262
|
+
cp usage/config.example.toml usage/config.toml # then edit the paths
|
|
263
|
+
python usage/collect.py # writes usage/reports/<timestamp>.md
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
Your `config.toml`, reports, and any XML under `usage/` are git-ignored — real
|
|
267
|
+
data and paths never get committed.
|
|
268
|
+
|
|
269
|
+
---
|
|
270
|
+
|
|
271
|
+
## Library use
|
|
272
|
+
|
|
273
|
+
```python
|
|
274
|
+
from xmldiffreport import diff
|
|
275
|
+
|
|
276
|
+
result = diff(["old.xml", "new.xml"], recipe="sitemap") # a file, files, or dir(s)
|
|
277
|
+
print(result.render()) # Markdown — or result.render("html")
|
|
278
|
+
|
|
279
|
+
for unit in result.units: # what differs
|
|
280
|
+
print(unit.ident, unit.sources)
|
|
281
|
+
if result: # truthy when anything differs (handy for exit codes)
|
|
282
|
+
...
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
---
|
|
286
|
+
|
|
287
|
+
## Performance
|
|
288
|
+
|
|
289
|
+
Each file is parsed once into an in-memory tree (`xml.etree.ElementTree`); the
|
|
290
|
+
diff cost is roughly linear in the number of nodes. For typical Control-M exports
|
|
291
|
+
(a few MB) it's instant, and it's fine up to the order of tens of MB. It is
|
|
292
|
+
**not** designed for gigabyte-scale files — we deliberately favour simple,
|
|
293
|
+
maintainable code over incremental/streaming parsing.
|
|
294
|
+
|
|
295
|
+
## Development
|
|
296
|
+
|
|
297
|
+
```bash
|
|
298
|
+
python -m venv .venv && source .venv/bin/activate
|
|
299
|
+
pip install -e ".[dev]"
|
|
300
|
+
|
|
301
|
+
ruff check . && ruff format --check .
|
|
302
|
+
mypy src
|
|
303
|
+
pytest
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md). Examples and tests use **synthetic** data
|
|
307
|
+
only — never real exports.
|
|
308
|
+
|
|
309
|
+
## Roadmap
|
|
310
|
+
|
|
311
|
+
- Report top-level units that exist in only one source (added/removed units).
|
|
312
|
+
- JSON report format (Markdown and HTML already ship; formats are pluggable).
|
|
313
|
+
- Similarity-based matching fallback for keyless elements.
|
|
314
|
+
- More built-in recipes (Maven POM, Android manifest, RSS/Atom, JUnit).
|
|
315
|
+
|
|
316
|
+
## License
|
|
317
|
+
|
|
318
|
+
MIT © Victor H. Bilouro — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
# xmldiffreport
|
|
2
|
+
|
|
3
|
+
[](https://bilouro.github.io/xmldiffreport/)
|
|
4
|
+
[](https://github.com/bilouro/xmldiffreport/actions/workflows/ci.yml)
|
|
5
|
+
[](https://pypi.org/project/xmldiffreport/)
|
|
6
|
+
[](https://pypi.org/project/xmldiffreport/)
|
|
7
|
+
[](LICENSE)
|
|
8
|
+
|
|
9
|
+
📖 **Documentation: <https://bilouro.github.io/xmldiffreport/>** · [Português](https://bilouro.github.io/xmldiffreport/pt/)
|
|
10
|
+
|
|
11
|
+
**N-way structural & semantic XML diff that produces human-readable Markdown reports — driven by per-dialect recipes.**
|
|
12
|
+
|
|
13
|
+
`xmldiffreport` compares **two or more** XML files at once and tells you *what
|
|
14
|
+
actually changed*, element by element and attribute by attribute — not a noisy
|
|
15
|
+
line-by-line text diff. It aligns elements by a **natural key** (not by
|
|
16
|
+
position), ignores **volatile attributes**, and renders a clean **Markdown
|
|
17
|
+
report** with a summary table plus per-element detail.
|
|
18
|
+
|
|
19
|
+
It was born from a real problem — spotting differences between **BMC Control-M**
|
|
20
|
+
job patches flowing through `test → uat → bench → prod` — and generalized into a
|
|
21
|
+
recipe-driven engine that works on any XML dialect (Control-M exports,
|
|
22
|
+
**sitemaps**, POMs, manifests, …).
|
|
23
|
+
|
|
24
|
+
> Status: early (0.1.0), but already useful. Feedback and recipes welcome.
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Why not a normal diff / `xmldiff`?
|
|
29
|
+
|
|
30
|
+
A plain `diff` (or git diff) on XML lies, for three reasons:
|
|
31
|
+
|
|
32
|
+
1. **Volatile attributes** — `VERSION`, `CREATION_TIME`, `JOBISN`… change on every export with no functional meaning.
|
|
33
|
+
2. **Reordering** — children are often unordered; a reorder is not a change.
|
|
34
|
+
3. **Attribute order** inside a tag is irrelevant.
|
|
35
|
+
|
|
36
|
+
Text/edit-script diffs (like the excellent [`xmldiff`](https://pypi.org/project/xmldiff/))
|
|
37
|
+
solve part of this but are **2-way**, **algorithm-matched** (you can't say "match
|
|
38
|
+
`<JOB>` by `JOBNAME`"), and output an edit script rather than a review-friendly report.
|
|
39
|
+
|
|
40
|
+
| | xmldiffreport | xmldiff | DiffDog / Oxygen | DeltaXML |
|
|
41
|
+
|---|---|---|---|---|
|
|
42
|
+
| Match by **declared natural key** | ✅ | ❌ | ⚠️ limited | ✅ |
|
|
43
|
+
| **N-way** (3+ files at once) | ✅ | ❌ | ❌ | ❌ |
|
|
44
|
+
| **Markdown report** out of the box | ✅ | ❌ (edit script) | ⚠️ GUI | ❌ (delta XML) |
|
|
45
|
+
| Open source | ✅ | ✅ | ❌ | ❌ |
|
|
46
|
+
|
|
47
|
+
**When to use which** — choose `xmldiffreport` for **N-way**, key-aligned,
|
|
48
|
+
report-first comparison (e.g. "the same folder in uat, bench and prod"); reach
|
|
49
|
+
for `xmldiff` to produce a **patch/edit script**, DiffDog/Oxygen for **interactive
|
|
50
|
+
2-way merging**, DeltaXML for **heuristic matching of keyless documents**, and
|
|
51
|
+
`git diff` for **raw line changes** on already-normalized XML. Full breakdown:
|
|
52
|
+
[How it compares](https://bilouro.github.io/xmldiffreport/comparison/).
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## Install
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
pip install xmldiffreport
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Requires Python 3.11+ (uses the standard-library `tomllib`). **No third-party dependencies.**
|
|
63
|
+
|
|
64
|
+
## Quickstart
|
|
65
|
+
|
|
66
|
+
Compare two XML files — that's the core idea:
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
xmldiffreport old.xml new.xml -o report.md
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
`report.md` lists every element that changed, **one column per file**. No options
|
|
73
|
+
needed — it uses the `generic` recipe by default. Pass **as many files as you
|
|
74
|
+
like**; the report just grows a column each:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
xmldiffreport v1.xml v2.xml v3.xml -o report.md
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Prefer an HTML page? Add `-f html` (or name the output `*.html`):
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
xmldiffreport old.xml new.xml -f html -o report.html
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Exit code is `1` when a **difference** is found (handy for CI), `0` otherwise.
|
|
87
|
+
|
|
88
|
+
> No files handy? `git clone` the repo and try the bundled, synthetic `examples/`:
|
|
89
|
+
> `xmldiffreport examples/sitemap/old/sitemap.xml examples/sitemap/new/sitemap.xml --recipe sitemap`
|
|
90
|
+
|
|
91
|
+
### Sharper results: recipes
|
|
92
|
+
|
|
93
|
+
The default compares any XML, but a **recipe** teaches the tool how to identify
|
|
94
|
+
elements in a specific dialect — matching "the same" element by a *key* (not by
|
|
95
|
+
position) and ignoring volatile attributes. Built-ins: `controlm`, `sitemap`,
|
|
96
|
+
`generic`; or write your own.
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
xmldiffreport old.xml new.xml --recipe sitemap -o report.md
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
→ [Writing recipes](https://bilouro.github.io/xmldiffreport/guide/recipes/) ·
|
|
103
|
+
[generate one from your XML with an LLM](https://bilouro.github.io/xmldiffreport/guide/recipe-from-llm/).
|
|
104
|
+
|
|
105
|
+
### Comparing many files (or whole directories)
|
|
106
|
+
|
|
107
|
+
Point it at **directories** too — they're scanned recursively for `*.xml`, and
|
|
108
|
+
every file found becomes a source:
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
xmldiffreport ./dump-a ./dump-b --recipe controlm -o report.md
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
Mental model: every file is a **source** (labelled by its path); a **unit** is the
|
|
115
|
+
recipe's `unit` element (e.g. a Control-M `SMART_FOLDER`); the engine compares
|
|
116
|
+
each unit across **every source that contains it** (2+). A unit that appears in
|
|
117
|
+
only one file is ignored. The tool has **no notion of "environments"** — if it
|
|
118
|
+
matters which file is production, name it so.
|
|
119
|
+
|
|
120
|
+
→ Full, worked guide with directory trees and a complete example:
|
|
121
|
+
**[Inputs & file layout](https://bilouro.github.io/xmldiffreport/guide/inputs/)**.
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## What the report looks like
|
|
126
|
+
|
|
127
|
+
For each unit (e.g. a Control-M `SMART_FOLDER`) present in **2+ sources** with
|
|
128
|
+
differences (names below are from the synthetic `examples/`):
|
|
129
|
+
|
|
130
|
+
> ### `GLX_INGEST_DAILY` (SMART_FOLDER)
|
|
131
|
+
> Sources: `bench/patch-a.xml`, `uat/patch-b.xml`, `prod/hotfix-c.xml`
|
|
132
|
+
>
|
|
133
|
+
> **~ JOB `GLX_INGEST_LOAD`**
|
|
134
|
+
>
|
|
135
|
+
> | Element · attribute | bench/patch-a.xml | uat/patch-b.xml | prod/hotfix-c.xml |
|
|
136
|
+
> |---|---|---|---|
|
|
137
|
+
> | `CMDLINE` | …`--force` | …`--retry` | …%%P_DATE |
|
|
138
|
+
> | `MAXRERUN` | 0 | 5 | 3 |
|
|
139
|
+
> | INCOND `GLX_INGEST_STAGE-…_OK` · `AND_OR` | A | O | A |
|
|
140
|
+
> | OUTCOND `GLX_INGEST_LOAD-…_OK` · `SIGN` | - | + | + |
|
|
141
|
+
> | ON `NOTOK\|RERUN` | − | present | present |
|
|
142
|
+
|
|
143
|
+
Notice: it's **N-way** (one column per file), it shows **attribute-level**
|
|
144
|
+
changes of the *same* element (the `SIGN` flip, the `AND_OR` change), it
|
|
145
|
+
collapses identical jobs into a count, and the volatile `VERSION`/`CREATION_TIME`
|
|
146
|
+
noise is gone.
|
|
147
|
+
|
|
148
|
+
---
|
|
149
|
+
|
|
150
|
+
## Recipes
|
|
151
|
+
|
|
152
|
+
A **recipe** is a small TOML file that teaches the generic engine about one XML
|
|
153
|
+
dialect: the natural key per element and which attributes to ignore.
|
|
154
|
+
|
|
155
|
+
```toml
|
|
156
|
+
name = "controlm"
|
|
157
|
+
|
|
158
|
+
[defaults]
|
|
159
|
+
unit = "SMART_FOLDER" # the unit of comparison
|
|
160
|
+
ignore_attrs = ["VERSION", "JOBISN", "CREATION_TIME", "LAST_UPLOAD", "..."]
|
|
161
|
+
|
|
162
|
+
[elements.JOB]
|
|
163
|
+
key = ["@JOBNAME"]
|
|
164
|
+
|
|
165
|
+
[elements.OUTCOND]
|
|
166
|
+
key = ["@NAME"] # SIGN / ODATE are compared as attributes
|
|
167
|
+
|
|
168
|
+
[elements.ON] # no clear key → synthesize from CODE + DO actions
|
|
169
|
+
key = ["@CODE", "*kinds"]
|
|
170
|
+
inline = true # treat children as pseudo-attributes
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
### Key mini-language
|
|
174
|
+
|
|
175
|
+
A `key` is a list of tokens, joined by `|`:
|
|
176
|
+
|
|
177
|
+
| Token | Meaning |
|
|
178
|
+
|---|---|
|
|
179
|
+
| `@ATTR` | value of attribute `ATTR` |
|
|
180
|
+
| `#text` | the element's own text |
|
|
181
|
+
| `*tag` | the element's tag name (use for singletons compared by their text) |
|
|
182
|
+
| `child:TAG@ATTR` | attribute of a child element |
|
|
183
|
+
| `child:TAG#text` | text of a child element (e.g. sitemap `<loc>`) |
|
|
184
|
+
| `*kinds` | summary of child kinds / `DOACTION` actions (for keyless elements like `<ON>`) |
|
|
185
|
+
|
|
186
|
+
If no key is given, the engine falls back to `@NAME`, then `#text`, then a
|
|
187
|
+
composite of all attributes.
|
|
188
|
+
|
|
189
|
+
### Built-in recipes
|
|
190
|
+
|
|
191
|
+
- **`controlm`** — BMC Control-M exports (`DEFTABLE → SMART_FOLDER → JOB → INCOND/OUTCOND/QUANTITATIVE/CONTROL/ON`).
|
|
192
|
+
- **`sitemap`** — `sitemap.xml` (identity by `<loc>` text; compares `<lastmod>`/`<priority>`/`<changefreq>`).
|
|
193
|
+
- **`generic`** — no dialect knowledge (default).
|
|
194
|
+
|
|
195
|
+
Drop a `.toml` anywhere and pass its path to `--recipe` to add your own dialect.
|
|
196
|
+
|
|
197
|
+
### Generate & validate a recipe
|
|
198
|
+
|
|
199
|
+
Don't want to write one by hand? Let an LLM draft it from a sample of your XML:
|
|
200
|
+
|
|
201
|
+
```bash
|
|
202
|
+
xmldiffreport-recipe scaffold sample.xml > prompt.txt # paste prompt.txt into any LLM
|
|
203
|
+
xmldiffreport-recipe validate my-dialect.toml # check the result (ships a JSON Schema)
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
See [Generate a recipe with an LLM](https://bilouro.github.io/xmldiffreport/guide/recipe-from-llm/).
|
|
207
|
+
|
|
208
|
+
---
|
|
209
|
+
|
|
210
|
+
## Project layout — tool vs. your usage
|
|
211
|
+
|
|
212
|
+
```
|
|
213
|
+
src/xmldiffreport/ the installable TOOL (engine, recipes, CLI) — generic, reusable
|
|
214
|
+
examples/ synthetic datasets + generator (no real data)
|
|
215
|
+
usage/ a config-driven HARNESS to run the tool on YOUR files
|
|
216
|
+
tests/ pytest suite
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
The **tool** in `src/` knows nothing about your folders. The **`usage/`** folder
|
|
220
|
+
is the thin layer you adapt: a `config.toml` listing the inputs (files/dirs), a
|
|
221
|
+
`report_dir`, and a `collect.py` that runs the diff and writes the report.
|
|
222
|
+
|
|
223
|
+
```bash
|
|
224
|
+
cp usage/config.example.toml usage/config.toml # then edit the paths
|
|
225
|
+
python usage/collect.py # writes usage/reports/<timestamp>.md
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
Your `config.toml`, reports, and any XML under `usage/` are git-ignored — real
|
|
229
|
+
data and paths never get committed.
|
|
230
|
+
|
|
231
|
+
---
|
|
232
|
+
|
|
233
|
+
## Library use
|
|
234
|
+
|
|
235
|
+
```python
|
|
236
|
+
from xmldiffreport import diff
|
|
237
|
+
|
|
238
|
+
result = diff(["old.xml", "new.xml"], recipe="sitemap") # a file, files, or dir(s)
|
|
239
|
+
print(result.render()) # Markdown — or result.render("html")
|
|
240
|
+
|
|
241
|
+
for unit in result.units: # what differs
|
|
242
|
+
print(unit.ident, unit.sources)
|
|
243
|
+
if result: # truthy when anything differs (handy for exit codes)
|
|
244
|
+
...
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
---
|
|
248
|
+
|
|
249
|
+
## Performance
|
|
250
|
+
|
|
251
|
+
Each file is parsed once into an in-memory tree (`xml.etree.ElementTree`); the
|
|
252
|
+
diff cost is roughly linear in the number of nodes. For typical Control-M exports
|
|
253
|
+
(a few MB) it's instant, and it's fine up to the order of tens of MB. It is
|
|
254
|
+
**not** designed for gigabyte-scale files — we deliberately favour simple,
|
|
255
|
+
maintainable code over incremental/streaming parsing.
|
|
256
|
+
|
|
257
|
+
## Development
|
|
258
|
+
|
|
259
|
+
```bash
|
|
260
|
+
python -m venv .venv && source .venv/bin/activate
|
|
261
|
+
pip install -e ".[dev]"
|
|
262
|
+
|
|
263
|
+
ruff check . && ruff format --check .
|
|
264
|
+
mypy src
|
|
265
|
+
pytest
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md). Examples and tests use **synthetic** data
|
|
269
|
+
only — never real exports.
|
|
270
|
+
|
|
271
|
+
## Roadmap
|
|
272
|
+
|
|
273
|
+
- Report top-level units that exist in only one source (added/removed units).
|
|
274
|
+
- JSON report format (Markdown and HTML already ship; formats are pluggable).
|
|
275
|
+
- Similarity-based matching fallback for keyless elements.
|
|
276
|
+
- More built-in recipes (Maven POM, Android manifest, RSS/Atom, JUnit).
|
|
277
|
+
|
|
278
|
+
## License
|
|
279
|
+
|
|
280
|
+
MIT © Victor H. Bilouro — see [LICENSE](LICENSE).
|