xmlpydict 0.0.11__tar.gz → 0.0.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xmlpydict
3
- Version: 0.0.11
3
+ Version: 0.0.13
4
4
  Summary: xml to dictionary tool for python
5
5
  Author-email: Matthew Taylor <matthew.taylor.andre@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/MatthewAndreTaylor/xml-to-pydict
@@ -17,7 +17,7 @@ Classifier: Programming Language :: Python :: 3.11
17
17
  Classifier: Programming Language :: Python :: Implementation :: CPython
18
18
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
19
  Classifier: Topic :: Text Processing :: Markup :: XML
20
- Requires-Python: >=3.7
20
+ Requires-Python: >=3.8
21
21
  Description-Content-Type: text/markdown
22
22
  License-File: LICENSE
23
23
  Provides-Extra: tests
@@ -4,13 +4,13 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "xmlpydict"
7
- version = "0.0.11"
7
+ version = "0.0.13"
8
8
  description="xml to dictionary tool for python"
9
9
  authors = [
10
10
  {name = "Matthew Taylor", email = "matthew.taylor.andre@gmail.com"},
11
11
  ]
12
12
  urls = {Homepage = "https://github.com/MatthewAndreTaylor/xml-to-pydict"}
13
- requires-python = ">=3.7"
13
+ requires-python = ">=3.8"
14
14
  keywords = [ "xml", "dictionary" ]
15
15
  classifiers = [
16
16
  "Development Status :: 3 - Alpha",
@@ -1,6 +1,6 @@
1
1
  import pytest
2
2
  import json
3
- from xmlpydict import parse
3
+ from xmlpydict import parse, parse_file
4
4
 
5
5
 
6
6
  def test_simple():
@@ -350,3 +350,35 @@ def test_document():
350
350
  ]
351
351
  }
352
352
  }
353
+
354
+
355
+ def test_parse_file(tmp_path):
356
+ s = """<?xml version="1.0" encoding="UTF-8"?><repository>
357
+ <project pypi="xmlpydict">
358
+ <title>XML document parser</title>
359
+ <author>Matthew Taylor</author>
360
+ </project>
361
+ <project pypi="blank">
362
+ <title>Test project</title>
363
+ <author>Matthew Taylor</author>
364
+ </project>
365
+ </repository>"""
366
+ with open(tmp_path / "test.xml", "w") as f:
367
+ f.write(s)
368
+
369
+ assert parse_file(tmp_path / "test.xml") == {
370
+ "repository": {
371
+ "project": [
372
+ {
373
+ "@pypi": "xmlpydict",
374
+ "title": "XML document parser",
375
+ "author": "Matthew Taylor",
376
+ },
377
+ {
378
+ "@pypi": "blank",
379
+ "title": "Test project",
380
+ "author": "Matthew Taylor",
381
+ },
382
+ ]
383
+ }
384
+ }
@@ -40,6 +40,50 @@ def parse_file(file_path, attr_prefix: str = "@", cdata_key: str = "#text") -> d
40
40
  parser.CharacterDataHandler = handler.characters
41
41
  parser.StartElementHandler = handler.startElement
42
42
  parser.EndElementHandler = handler.endElement
43
- with open(file_path, "r", encoding="utf-8") as f:
43
+ with open(file_path, "rb") as f:
44
44
  parser.ParseFile(f)
45
45
  return handler.item
46
+
47
+
48
+ def iter_xml_documents(
49
+ file_path, chunk_size: int = 64 * 1024, start_token: bytes = b"<?xml"
50
+ ):
51
+ buffer = b""
52
+ with open(file_path, "rb") as f:
53
+ while True:
54
+ chunk = f.read(chunk_size)
55
+ if not chunk:
56
+ if buffer.strip():
57
+ yield buffer
58
+ break
59
+ buffer += chunk
60
+ while True:
61
+ start_index = buffer.find(start_token, 1)
62
+ if start_index == -1:
63
+ break
64
+ yield buffer[:start_index]
65
+ buffer = buffer[start_index:]
66
+
67
+
68
+ def parse_xml_collections(
69
+ file_path,
70
+ attr_prefix: str = "@",
71
+ cdata_key: str = "#text",
72
+ start_token: bytes = b"<?xml",
73
+ ):
74
+ """
75
+ Parse collections of xml documents based on a delimeter start_token
76
+
77
+ Args:
78
+ file_path: The path to the XML file to be parsed.
79
+ attr_prefix: The prefix to use for attributes in the resulting dictionary.
80
+ cdata_key: The key to use for character data in the resulting dictionary.
81
+ start_token: The byte sequence that delimits the start of each XML document.
82
+
83
+ Returns:
84
+ A generator yielding dictionaries representing each XML document in the collection.
85
+ """
86
+ for xml_content in iter_xml_documents(file_path, start_token=start_token):
87
+ yield parse(
88
+ xml_content.decode("utf-8"), attr_prefix=attr_prefix, cdata_key=cdata_key
89
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xmlpydict
3
- Version: 0.0.11
3
+ Version: 0.0.13
4
4
  Summary: xml to dictionary tool for python
5
5
  Author-email: Matthew Taylor <matthew.taylor.andre@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/MatthewAndreTaylor/xml-to-pydict
@@ -17,7 +17,7 @@ Classifier: Programming Language :: Python :: 3.11
17
17
  Classifier: Programming Language :: Python :: Implementation :: CPython
18
18
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
19
  Classifier: Topic :: Text Processing :: Markup :: XML
20
- Requires-Python: >=3.7
20
+ Requires-Python: >=3.8
21
21
  Description-Content-Type: text/markdown
22
22
  License-File: LICENSE
23
23
  Provides-Extra: tests
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes