xmlpydict 0.0.7__tar.gz → 0.0.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,111 @@
1
+ Metadata-Version: 2.4
2
+ Name: xmlpydict
3
+ Version: 0.0.11
4
+ Summary: xml to dictionary tool for python
5
+ Author-email: Matthew Taylor <matthew.taylor.andre@gmail.com>
6
+ Project-URL: Homepage, https://github.com/MatthewAndreTaylor/xml-to-pydict
7
+ Keywords: xml,dictionary
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3 :: Only
13
+ Classifier: Programming Language :: Python :: 3.8
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: Implementation :: CPython
18
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
+ Classifier: Topic :: Text Processing :: Markup :: XML
20
+ Requires-Python: >=3.7
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Provides-Extra: tests
24
+ Requires-Dist: pytest; extra == "tests"
25
+ Requires-Dist: requests; extra == "tests"
26
+ Dynamic: license-file
27
+
28
+ # xmlpydict 📑
29
+
30
+ [![XML Tests](https://github.com/MatthewAndreTaylor/xml-to-pydict/actions/workflows/tests.yml/badge.svg)](https://github.com/MatthewAndreTaylor/xml-to-pydict/actions/workflows/tests.yml)
31
+ [![PyPI versions](https://img.shields.io/badge/python-3.8%2B-blue)](https://github.com/MatthewAndreTaylor/xml-to-pydict)
32
+ [![PyPI](https://img.shields.io/pypi/v/xmlpydict.svg)](https://pypi.org/project/xmlpydict/)
33
+
34
+ ## Requirements
35
+
36
+ - `python 3.8+`
37
+
38
+ ## Installation
39
+
40
+ To install xmlpydict, using pip:
41
+
42
+ ```bash
43
+ pip install xmlpydict
44
+ ```
45
+
46
+ ## Quickstart
47
+
48
+ ```py
49
+ >>> from xmlpydict import parse
50
+ >>> parse("<package><xmlpydict language='python'/></package>")
51
+ {'package': {'xmlpydict': {'@language': 'python'}}}
52
+ >>> parse("<person name='Matthew'>Hello!</person>")
53
+ {'person': {'@name': 'Matthew', '#text': 'Hello!'}}
54
+ ```
55
+
56
+ ## Goals
57
+
58
+ Create a consistent parsing strategy between XML and Python dictionaries using the specification found [here](https://www.xml.com/pub/a/2006/05/31/converting-between-xml-and-json.html). `xmlpydict` focuses on speed; see the benchmarks below.
59
+
60
+ <img width="256" alt="small_xml_document" src="https://github.com/user-attachments/assets/0248a408-6bb6-4790-bd0f-f90537e2f21a" />
61
+ <img width="256" alt="large_xml_document" src="https://github.com/user-attachments/assets/539a2a69-f475-46a5-bffc-1e8805a5a5e7" />
62
+
63
+
64
+ ### xmlpydict supports the following
65
+
66
+ [CDataSection](https://www.w3.org/TR/xml/#sec-cdata-sect): CDATA Sections are stored as {'#text': CData}.
67
+
68
+ [Comments](https://www.w3.org/TR/xml/#sec-comments): Comments are tokenized for corectness, but have no effect in what is returned.
69
+
70
+ [Element Tags](https://www.w3.org/TR/xml/#sec-starttags): Allows for duplicate attributes, however only the latest defined will be taken.
71
+
72
+ [Characters](https://www.w3.org/TR/xml/#charsets): Similar to CDATA text is stored as {'#text': Char} , however this text is stripped.
73
+
74
+ ```py
75
+ # Empty tags are containers
76
+ >>> from xmlpydict import parse
77
+ >>> parse("<a></a>")
78
+ {'a': None}
79
+ >>> parse("<a/>")
80
+ {'a': None}
81
+ >>> parse("<a/>").get('href')
82
+ None
83
+ ```
84
+
85
+ ### Attribute prefixing
86
+
87
+ ```py
88
+ # Change prefix from default "@" with keyword argument attr_prefix
89
+ >>> from xmlpydict import parse
90
+ >>> parse('<p width="10" height="5"></p>', attr_prefix="$")
91
+ {"p": {"$width": "10", "$height": "5"}}
92
+ ```
93
+
94
+
95
+ ### Exceptions
96
+
97
+ ```py
98
+ # Grammar and structure of the xml_content is checked while parsing
99
+ >>> from xmlpydict import parse
100
+ >>> parse("<a></ a>")
101
+ xml.parsers.expat.ExpatError: not well-formed (invalid token): line 1, column 5
102
+ ```
103
+
104
+
105
+ ### Unsupported
106
+
107
+ Prolog / Enforcing Document Type Definition and Element Type Declarations
108
+
109
+ Entity Referencing
110
+
111
+ Namespaces
@@ -0,0 +1,84 @@
1
+ # xmlpydict 📑
2
+
3
+ [![XML Tests](https://github.com/MatthewAndreTaylor/xml-to-pydict/actions/workflows/tests.yml/badge.svg)](https://github.com/MatthewAndreTaylor/xml-to-pydict/actions/workflows/tests.yml)
4
+ [![PyPI versions](https://img.shields.io/badge/python-3.8%2B-blue)](https://github.com/MatthewAndreTaylor/xml-to-pydict)
5
+ [![PyPI](https://img.shields.io/pypi/v/xmlpydict.svg)](https://pypi.org/project/xmlpydict/)
6
+
7
+ ## Requirements
8
+
9
+ - `python 3.8+`
10
+
11
+ ## Installation
12
+
13
+ To install xmlpydict, using pip:
14
+
15
+ ```bash
16
+ pip install xmlpydict
17
+ ```
18
+
19
+ ## Quickstart
20
+
21
+ ```py
22
+ >>> from xmlpydict import parse
23
+ >>> parse("<package><xmlpydict language='python'/></package>")
24
+ {'package': {'xmlpydict': {'@language': 'python'}}}
25
+ >>> parse("<person name='Matthew'>Hello!</person>")
26
+ {'person': {'@name': 'Matthew', '#text': 'Hello!'}}
27
+ ```
28
+
29
+ ## Goals
30
+
31
+ Create a consistent parsing strategy between XML and Python dictionaries using the specification found [here](https://www.xml.com/pub/a/2006/05/31/converting-between-xml-and-json.html). `xmlpydict` focuses on speed; see the benchmarks below.
32
+
33
+ <img width="256" alt="small_xml_document" src="https://github.com/user-attachments/assets/0248a408-6bb6-4790-bd0f-f90537e2f21a" />
34
+ <img width="256" alt="large_xml_document" src="https://github.com/user-attachments/assets/539a2a69-f475-46a5-bffc-1e8805a5a5e7" />
35
+
36
+
37
+ ### xmlpydict supports the following
38
+
39
+ [CDataSection](https://www.w3.org/TR/xml/#sec-cdata-sect): CDATA Sections are stored as {'#text': CData}.
40
+
41
+ [Comments](https://www.w3.org/TR/xml/#sec-comments): Comments are tokenized for corectness, but have no effect in what is returned.
42
+
43
+ [Element Tags](https://www.w3.org/TR/xml/#sec-starttags): Allows for duplicate attributes, however only the latest defined will be taken.
44
+
45
+ [Characters](https://www.w3.org/TR/xml/#charsets): Similar to CDATA text is stored as {'#text': Char} , however this text is stripped.
46
+
47
+ ```py
48
+ # Empty tags are containers
49
+ >>> from xmlpydict import parse
50
+ >>> parse("<a></a>")
51
+ {'a': None}
52
+ >>> parse("<a/>")
53
+ {'a': None}
54
+ >>> parse("<a/>").get('href')
55
+ None
56
+ ```
57
+
58
+ ### Attribute prefixing
59
+
60
+ ```py
61
+ # Change prefix from default "@" with keyword argument attr_prefix
62
+ >>> from xmlpydict import parse
63
+ >>> parse('<p width="10" height="5"></p>', attr_prefix="$")
64
+ {"p": {"$width": "10", "$height": "5"}}
65
+ ```
66
+
67
+
68
+ ### Exceptions
69
+
70
+ ```py
71
+ # Grammar and structure of the xml_content is checked while parsing
72
+ >>> from xmlpydict import parse
73
+ >>> parse("<a></ a>")
74
+ xml.parsers.expat.ExpatError: not well-formed (invalid token): line 1, column 5
75
+ ```
76
+
77
+
78
+ ### Unsupported
79
+
80
+ Prolog / Enforcing Document Type Definition and Element Type Declarations
81
+
82
+ Entity Referencing
83
+
84
+ Namespaces
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "xmlpydict"
7
- version = "0.0.7"
7
+ version = "0.0.11"
8
8
  description="xml to dictionary tool for python"
9
9
  authors = [
10
10
  {name = "Matthew Taylor", email = "matthew.taylor.andre@gmail.com"},
@@ -18,13 +18,13 @@ classifiers = [
18
18
  "License :: OSI Approved :: MIT License",
19
19
  "Programming Language :: Python :: 3",
20
20
  "Programming Language :: Python :: 3 :: Only",
21
- "Programming Language :: Python :: 3.7",
22
21
  "Programming Language :: Python :: 3.8",
23
22
  "Programming Language :: Python :: 3.9",
24
23
  "Programming Language :: Python :: 3.10",
25
24
  "Programming Language :: Python :: 3.11",
26
25
  "Programming Language :: Python :: Implementation :: CPython",
27
26
  "Topic :: Software Development :: Libraries :: Python Modules",
27
+ "Topic :: Text Processing :: Markup :: XML",
28
28
  ]
29
29
 
30
30
  [project.readme]
@@ -32,4 +32,4 @@ file = "README.md"
32
32
  content-type = "text/markdown"
33
33
 
34
34
  [project.optional-dependencies]
35
- tests = [ "pytest", "xmltodict" ]
35
+ tests = [ "pytest", "requests" ]
@@ -16,8 +16,8 @@ class build_ext(build_ext_orig):
16
16
  setup(
17
17
  include_package_data=True,
18
18
  ext_modules=[
19
- Extension("xmlpydict", ["src/xmlparse.cpp"]),
19
+ Extension("pyxmlhandler", ["src/xmlparse.cpp"]),
20
20
  ],
21
21
  cmdclass={"build_ext": build_ext},
22
- package_data={"xmlpydict": ["py.typed"], "": ["xmlpydict.pyi"]},
22
+ packages=["xmlpydict"],
23
23
  )
@@ -0,0 +1,222 @@
1
+ /**
2
+ * Copyright (c) 2023 Matthew Andre Taylor
3
+ */
4
+ #include <Python.h>
5
+ #include <stdio.h>
6
+ #include <cctype>
7
+ #include <vector>
8
+
9
+ static PyObject* strip(PyObject* s_obj) {
10
+ Py_ssize_t start = 0;
11
+ Py_ssize_t end = PyUnicode_GetLength(s_obj);
12
+ while (start < end && std::isspace(PyUnicode_ReadChar(s_obj, start))) {
13
+ ++start;
14
+ }
15
+ while (end > start && std::isspace(PyUnicode_ReadChar(s_obj, end - 1))) {
16
+ --end;
17
+ }
18
+ return PyUnicode_Substring(s_obj, start, end);
19
+ }
20
+
21
+ typedef struct {
22
+ PyObject_HEAD PyObject* item; // current dict
23
+ PyObject* data; // character data buffer
24
+ std::vector<PyObject*> item_stack;
25
+ std::vector<PyObject*> data_stack;
26
+ PyObject* attr_prefix;
27
+ PyObject* cdata_key;
28
+ } PyDictHandler;
29
+
30
+ static PyObject* PyDictHandler_new(PyTypeObject* type, PyObject* args,
31
+ PyObject* kwargs) {
32
+ PyDictHandler* self;
33
+ self = (PyDictHandler*)type->tp_alloc(type, 0);
34
+ return (PyObject*)self;
35
+ }
36
+
37
+ static int PyDictHandler_init(PyDictHandler* self, PyObject* args,
38
+ PyObject* kwargs) {
39
+ const char* attr_prefix = "@";
40
+ const char* cdata_key = "#text";
41
+ static char* kwlist[] = {"attr_prefix", "cdata_key", NULL};
42
+
43
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss", kwlist,
44
+ &attr_prefix, &cdata_key))
45
+ return -1;
46
+
47
+ self->item = Py_None;
48
+ self->data = PyUnicode_New(0, 127); // empty string
49
+ self->attr_prefix = PyUnicode_FromString(attr_prefix);
50
+ self->cdata_key = PyUnicode_FromString(cdata_key);
51
+ return 0;
52
+ }
53
+
54
+ static PyObject* characters(PyDictHandler* self, PyObject* data_obj) {
55
+ PyUnicode_Append(&self->data, data_obj);
56
+ Py_RETURN_NONE;
57
+ }
58
+
59
+ static PyObject* startElement(PyDictHandler* self, PyObject* args) {
60
+ self->item_stack.push_back(self->item);
61
+ self->data_stack.push_back(self->data);
62
+ self->data = PyUnicode_New(0, 127); // reset data buffer
63
+
64
+ const char* name;
65
+ PyObject* attrs;
66
+ if (!PyArg_ParseTuple(args, "sO", &name, &attrs)) {
67
+ return NULL;
68
+ }
69
+
70
+ if (!PyDict_Check(attrs) || PyDict_Size(attrs) == 0) {
71
+ self->item = Py_None;
72
+ Py_RETURN_NONE;
73
+ }
74
+
75
+ PyObject* newDict = PyDict_New();
76
+ PyObject *key, *value;
77
+ Py_ssize_t pos = 0;
78
+
79
+ while (PyDict_Next(attrs, &pos, &key, &value)) {
80
+ PyObject* prefixed_key = PyUnicode_Concat(self->attr_prefix, key);
81
+ PyDict_SetItem(newDict, prefixed_key, value);
82
+ }
83
+
84
+ self->item = newDict;
85
+ Py_RETURN_NONE;
86
+ }
87
+
88
+ static PyObject* updateChildren(PyObject*& target, PyObject* key, PyObject* value) {
89
+ if (target == Py_None) {
90
+ target = PyDict_New();
91
+ }
92
+
93
+ if (!PyDict_Contains(target, key)) {
94
+ PyDict_SetItem(target, key, value);
95
+ } else {
96
+ PyObject* existing = PyDict_GetItem(target, key);
97
+ if (PyList_Check(existing)) {
98
+ PyList_Append(existing, value);
99
+ } else {
100
+ PyObject* newList = PyList_New(2);
101
+ PyList_SetItem(newList, 0, existing);
102
+ PyList_SetItem(newList, 1, value);
103
+ PyDict_SetItem(target, key, newList);
104
+ }
105
+ }
106
+ return target;
107
+ }
108
+
109
+ static PyObject* endElement(PyDictHandler* self, PyObject* name_obj) {
110
+ if (!self->data_stack.empty()) {
111
+ PyObject* temp_data = strip(self->data);
112
+ bool has_data = (PyUnicode_GetLength(temp_data) > 0);
113
+ PyObject* py_data = has_data ? temp_data : Py_None;
114
+ PyObject* temp_item = self->item;
115
+
116
+ self->item = self->item_stack.back();
117
+ self->data = self->data_stack.back();
118
+ self->item_stack.pop_back();
119
+ self->data_stack.pop_back();
120
+
121
+ if (temp_item != Py_None) {
122
+ if (has_data) {
123
+ PyDict_SetItem(temp_item, self->cdata_key, py_data);
124
+ }
125
+ temp_item = PyDict_Copy(temp_item);
126
+ self->item = updateChildren(self->item, name_obj, temp_item);
127
+ }
128
+ else {
129
+ self->item = updateChildren(self->item, name_obj, py_data);
130
+ }
131
+ }
132
+ Py_RETURN_NONE;
133
+ }
134
+
135
+
136
+
137
+ static PyMethodDef PyDictHandler_methods[] = {
138
+ {"characters", (PyCFunction)characters, METH_O, "Handle character data"},
139
+ {"startElement", (PyCFunction)startElement, METH_VARARGS, "Handle start of an element"},
140
+ {"endElement", (PyCFunction)endElement, METH_O, "Handle end of an element"},
141
+ {NULL, NULL, 0, NULL}
142
+ };
143
+
144
+ static PyObject* PyDictHandler_get_item(PyDictHandler *self, void *closure)
145
+ {
146
+ Py_INCREF(self->item);
147
+ return self->item;
148
+ }
149
+
150
+ static PyGetSetDef PyDictHandler_getset[] = {
151
+ {
152
+ "item", /* name */
153
+ (getter)PyDictHandler_get_item, /* get */
154
+ NULL, /* set */
155
+ NULL, /* doc */
156
+ NULL /* closure */
157
+ },
158
+ {NULL} /* Sentinel */
159
+ };
160
+
161
+
162
+ static PyTypeObject PyDictHandlerType = {
163
+ PyVarObject_HEAD_INIT(NULL, 0) "pyxmlhandler._PyDictHandler", // tp_name
164
+ sizeof(PyDictHandler), // tp_basicsize
165
+ 0, // tp_itemsize
166
+ 0, // tp_dealloc
167
+ 0, // tp_vectorcall_offset
168
+ 0, // tp_getattr
169
+ 0, // tp_setattr
170
+ 0, // tp_as_async
171
+ 0, // tp_repr
172
+ 0, // tp_as_number
173
+ 0, // tp_as_sequence
174
+ 0, // tp_as_mapping
175
+ 0, // tp_hash
176
+ 0, // tp_call
177
+ 0, // tp_str
178
+ 0, // tp_getattro
179
+ 0, // tp_setattro
180
+ 0, // tp_as_buffer
181
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, // tp_flags
182
+ "Handler that converts XML to Python dict", // tp_doc
183
+ 0, // tp_traverse
184
+ 0, // tp_clear
185
+ 0, // tp_richcompare
186
+ 0, // tp_weaklistoffset
187
+ 0, // tp_iter
188
+ 0, // tp_iternext
189
+ PyDictHandler_methods, // tp_methods
190
+ 0, // tp_members
191
+ PyDictHandler_getset, // tp_getset
192
+ 0, // tp_base
193
+ 0, // tp_dict
194
+ 0, // tp_descr_get
195
+ 0, // tp_descr_set
196
+ 0, // tp_dictoffset
197
+ (initproc)PyDictHandler_init, // tp_init
198
+ 0, // tp_alloc
199
+ PyDictHandler_new, // tp_new
200
+ };
201
+
202
+ static PyModuleDef pyxmlhandlermodule = {
203
+ PyModuleDef_HEAD_INIT,
204
+ "pyxmlhandler",
205
+ "Module that provides XML to Python dict parsing",
206
+ -1,
207
+ NULL, NULL, NULL, NULL, NULL
208
+ };
209
+
210
+ PyMODINIT_FUNC PyInit_pyxmlhandler(void) {
211
+ PyObject* m;
212
+ if (PyType_Ready(&PyDictHandlerType) < 0)
213
+ return NULL;
214
+
215
+ m = PyModule_Create(&pyxmlhandlermodule);
216
+ if (m == NULL)
217
+ return NULL;
218
+
219
+ Py_INCREF(&PyDictHandlerType);
220
+ PyModule_AddObject(m, "_PyDictHandler", (PyObject*)&PyDictHandlerType);
221
+ return m;
222
+ }
@@ -1,13 +1,14 @@
1
1
  import pytest
2
- from xmlpydict import parse
3
2
  import json
3
+ from xmlpydict import parse
4
4
 
5
5
 
6
6
  def test_simple():
7
- assert parse("") == {}
8
- assert parse("<p/>") == {"p": {}}
9
- assert parse("<p></p>") == {"p": {}}
7
+ assert parse("<p/>") == {"p": None}
8
+ assert parse("<p></p>") == {"p": None}
10
9
  assert parse('<p width="10"></p>') == {"p": {"@width": "10"}}
10
+ assert parse("<p>Hello</p>") == {"p": "Hello"}
11
+
11
12
  assert parse('<p width="10">Hello World</p>') == {
12
13
  "p": {"@width": "10", "#text": "Hello World"}
13
14
  }
@@ -21,7 +22,18 @@ def test_simple():
21
22
  "p": {"@width": "10", "@height": "20"}
22
23
  }
23
24
  assert parse("<p>Hey <b>bold</b>There</p>") == {
24
- "p": {"#text": "HeyThere", "b": {"#text": "bold"}}
25
+ "p": {"#text": "Hey There", "b": "bold"}
26
+ }
27
+ assert parse("<p>Hey <b>bold</b>There <b>bold</b>Buddy </p>") == {
28
+ "p": {"#text": "Hey There Buddy", "b": ["bold", "bold"]}
29
+ }
30
+
31
+ assert parse("<p>Hey <b/>There Buddy</p>") == {
32
+ "p": {"#text": "Hey There Buddy", "b": None}
33
+ }
34
+
35
+ assert parse("<p>Hey <b/>There Buddy <b/> </p>") == {
36
+ "p": {"#text": "Hey There Buddy", "b": [None, None]}
25
37
  }
26
38
 
27
39
  assert (
@@ -66,15 +78,18 @@ def test_simple():
66
78
  )
67
79
 
68
80
 
69
- def test_nested():
70
- assert parse("<book><p/></book> ") == {"book": {"p": {}}}
71
- assert parse("<book><p></p></book>") == {"book": {"p": {}}}
72
- assert parse("<book><p></p></book><card/>") == {"book": {"p": {}}, "card": {}}
73
- assert parse("<pizza></pizza><book><p></p></book><card/>") == {
74
- "pizza": {},
75
- "book": {"p": {}},
76
- "card": {},
81
+ def test_cdata():
82
+ assert parse("<content><![CDATA[<p>This is a paragraph</p>]]></content>") == {
83
+ "content": "<p>This is a paragraph</p>"
77
84
  }
85
+ assert parse(
86
+ "<special_chars><![CDATA[$ ^ * % & <> () + - + ` ~]]></special_chars>"
87
+ ) == {"special_chars": "$ ^ * % & <> () + - + ` ~"}
88
+
89
+
90
+ def test_nested():
91
+ assert parse("<book><p/></book> ") == {"book": {"p": None}}
92
+ assert parse("<book><p></p></book>") == {"book": {"p": None}}
78
93
 
79
94
 
80
95
  def test_list():
@@ -89,12 +104,20 @@ def test_list():
89
104
 
90
105
 
91
106
  def test_comment():
92
- assert parse("<!-- simple comment -->") == {}
107
+ assert parse("<p/><!-- simple comment -->") == {"p": None}
93
108
  comment = """<world>
94
109
  <!-- $comment+++@python -->
95
110
  <lake>Content</lake>
96
111
  </world>"""
97
- assert parse(comment) == {"world": {"lake": {"#text": "Content"}}}
112
+ assert parse(comment) == {"world": {"lake": "Content"}}
113
+ multiple_comments = """<book>
114
+ <!-- Comment 0 -->
115
+ <!-- Comment 1 -->
116
+ <lines>510</lines>
117
+ <!-- Comment 2 -->
118
+ <!-- -->
119
+ </book>"""
120
+ assert parse(multiple_comments) == {"book": {"lines": "510"}}
98
121
 
99
122
 
100
123
  def test_files():
@@ -269,16 +292,61 @@ def test_files():
269
292
 
270
293
  def test_exception():
271
294
  xml_strings = [
272
- "< p/>",
273
- "<p>",
274
- "<p/ >",
275
295
  "<p height'10'/>",
276
296
  "<p height='10'width='5'/>",
277
- "<p width='5/>",
278
297
  "<p width=5'/>",
279
- "</p>",
280
298
  "<pwidth='5'/>",
299
+ "<!---->",
300
+ "<a></p>",
301
+ "<></>",
302
+ "</>",
303
+ "<",
304
+ ">",
305
+ "<nested></p></nested>",
281
306
  ]
282
307
  for xml_str in xml_strings:
283
308
  with pytest.raises(Exception):
284
309
  parse(xml_str)
310
+
311
+
312
+ def test_prefix():
313
+ assert parse("<p></p>", attr_prefix="$") == {"p": None}
314
+ assert parse('<p width="10"></p>', attr_prefix="$") == {"p": {"$width": "10"}}
315
+ assert parse('<p width="10" height="5"></p>', attr_prefix="$") == {
316
+ "p": {"$width": "10", "$height": "5"}
317
+ }
318
+ assert parse('<p width="10" height="5"></p>', attr_prefix="$$$$$$$$$") == {
319
+ "p": {"$$$$$$$$$width": "10", "$$$$$$$$$height": "5"}
320
+ }
321
+ assert parse('<p width="10" height="5"></p>', attr_prefix="") == {
322
+ "p": {"width": "10", "height": "5"}
323
+ }
324
+
325
+
326
+ def test_document():
327
+ s = """<?xml version="1.0" encoding="UTF-8"?><repository>
328
+ <project pypi="xmlpydict">
329
+ <title>XML document parser</title>
330
+ <author>Matthew Taylor</author>
331
+ </project>
332
+ <project pypi="blank">
333
+ <title>Test project</title>
334
+ <author>Matthew Taylor</author>
335
+ </project>
336
+ </repository>"""
337
+ assert parse(s) == {
338
+ "repository": {
339
+ "project": [
340
+ {
341
+ "@pypi": "xmlpydict",
342
+ "title": "XML document parser",
343
+ "author": "Matthew Taylor",
344
+ },
345
+ {
346
+ "@pypi": "blank",
347
+ "title": "Test project",
348
+ "author": "Matthew Taylor",
349
+ },
350
+ ]
351
+ }
352
+ }
@@ -0,0 +1,45 @@
1
+ from pyxmlhandler import _PyDictHandler
2
+ from xml.parsers import expat
3
+
4
+
5
+ def parse(xml_content, attr_prefix: str = "@", cdata_key: str = "#text") -> dict:
6
+ """
7
+ Parse XML content into a python dictionary.
8
+
9
+ Args:
10
+ xml_content: The XML content to be parsed.
11
+ attr_prefix: The prefix to use for attributes in the resulting dictionary.
12
+ cdata_key: The key to use for character data in the resulting dictionary.
13
+
14
+ Returns:
15
+ A dictionary representation of the XML content.
16
+ """
17
+ handler = _PyDictHandler(attr_prefix=attr_prefix, cdata_key=cdata_key)
18
+ parser = expat.ParserCreate()
19
+ parser.CharacterDataHandler = handler.characters
20
+ parser.StartElementHandler = handler.startElement
21
+ parser.EndElementHandler = handler.endElement
22
+ parser.Parse(xml_content, True)
23
+ return handler.item
24
+
25
+
26
+ def parse_file(file_path, attr_prefix: str = "@", cdata_key: str = "#text") -> dict:
27
+ """
28
+ Parse an XML file into a python dictionary.
29
+
30
+ Args:
31
+ file_path: The path to the XML file to be parsed.
32
+ attr_prefix: The prefix to use for attributes in the resulting dictionary.
33
+ cdata_key: The key to use for character data in the resulting dictionary.
34
+
35
+ Returns:
36
+ A dictionary representation of the XML file content.
37
+ """
38
+ handler = _PyDictHandler(attr_prefix=attr_prefix, cdata_key=cdata_key)
39
+ parser = expat.ParserCreate()
40
+ parser.CharacterDataHandler = handler.characters
41
+ parser.StartElementHandler = handler.startElement
42
+ parser.EndElementHandler = handler.endElement
43
+ with open(file_path, "r", encoding="utf-8") as f:
44
+ parser.ParseFile(f)
45
+ return handler.item