wikitextparser 0.55.12__tar.gz → 0.56.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/.github/workflows/tests.yml +1 -1
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/CHANGELOG.rst +16 -0
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/PKG-INFO +3 -3
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/README.rst +1 -1
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/pyproject.toml +1 -1
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/__init__.py +1 -1
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_parser_function.py +1 -4
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_section.py +2 -2
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_wikitext.py +17 -31
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/.coveragerc +0 -0
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/.gitignore +0 -0
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/.readthedocs.yaml +0 -0
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/LICENSE.md +0 -0
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/docs/CHANGELOG.rst +0 -0
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/docs/Makefile +0 -0
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/docs/README.rst +0 -0
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/docs/conf.py +0 -0
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/docs/index.rst +0 -0
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/docs/make.bat +0 -0
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_argument.py +0 -0
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_cell.py +0 -0
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_comment_bold_italic.py +0 -0
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_config.py +0 -0
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_externallink.py +0 -0
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_parameter.py +0 -0
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_spans.py +0 -0
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_table.py +0 -0
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_tag.py +0 -0
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_template.py +0 -0
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_wikilink.py +0 -0
- {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_wikilist.py +0 -0
|
@@ -13,7 +13,7 @@ jobs:
|
|
|
13
13
|
strategy:
|
|
14
14
|
matrix:
|
|
15
15
|
os: [ubuntu-latest, windows-latest, macos-latest]
|
|
16
|
-
python-version: ["3.
|
|
16
|
+
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
|
|
17
17
|
env:
|
|
18
18
|
OS: ${{ matrix.os }}
|
|
19
19
|
PYTHON: ${{ matrix.python-version }}
|
|
@@ -1,3 +1,19 @@
|
|
|
1
|
+
v0.56.1
|
|
2
|
+
-------
|
|
3
|
+
* Fixed a bug in ``get_sections`` when ``top_levels_only`` was ``True``.
|
|
4
|
+
|
|
5
|
+
v0.56.0
|
|
6
|
+
-------
|
|
7
|
+
* Drop Python 3.7 support.
|
|
8
|
+
|
|
9
|
+
v0.55.14
|
|
10
|
+
--------
|
|
11
|
+
* Fixed a bug in detecting the text of an external link. (#137)
|
|
12
|
+
|
|
13
|
+
v0.55.13
|
|
14
|
+
--------
|
|
15
|
+
* Fixed a bug in ``Section.level`` resulting in malformed section titles when multiple levels are added (#135)
|
|
16
|
+
|
|
1
17
|
v0.55.12
|
|
2
18
|
--------
|
|
3
19
|
* Performance improvements in extracting bold and italic nodes. (#133)
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: wikitextparser
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.56.1
|
|
4
4
|
Summary: A simple parsing tool for MediaWiki's wikitext markup.
|
|
5
5
|
Keywords: MediaWiki,wikitext,parser
|
|
6
6
|
Author-email: 5j9 <5j9@users.noreply.github.com>
|
|
7
|
-
Requires-Python: >= 3.
|
|
7
|
+
Requires-Python: >= 3.8
|
|
8
8
|
Description-Content-Type: text/x-rst
|
|
9
9
|
Classifier: Programming Language :: Python
|
|
10
10
|
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
|
@@ -38,7 +38,7 @@ The purpose is to allow users easily extract and/or manipulate templates, templa
|
|
|
38
38
|
Installation
|
|
39
39
|
============
|
|
40
40
|
|
|
41
|
-
- Python 3.
|
|
41
|
+
- Python 3.8+ is required
|
|
42
42
|
- ``pip install wikitextparser``
|
|
43
43
|
|
|
44
44
|
Usage
|
|
@@ -60,10 +60,7 @@ class SubWikiTextWithArgs(SubWikiText):
|
|
|
60
60
|
else:
|
|
61
61
|
arg_span = old_span
|
|
62
62
|
arg = Argument(lststr, type_to_spans, arg_span, type_, self)
|
|
63
|
-
arg.
|
|
64
|
-
lststr[0][s:e],
|
|
65
|
-
shadow[arg_self_start:arg_self_end],
|
|
66
|
-
)
|
|
63
|
+
arg._span_data[3] = shadow[arg_self_start:arg_self_end]
|
|
67
64
|
arguments_append(arg)
|
|
68
65
|
return arguments
|
|
69
66
|
|
|
@@ -42,9 +42,9 @@ class Section(SubWikiText):
|
|
|
42
42
|
if level_diff == 0:
|
|
43
43
|
return
|
|
44
44
|
if level_diff < 0:
|
|
45
|
-
new_equals = '=' *
|
|
45
|
+
new_equals = '=' * -level_diff
|
|
46
46
|
self.insert(0, new_equals)
|
|
47
|
-
self.insert(m.end(2)
|
|
47
|
+
self.insert(m.end(2) - level_diff, new_equals)
|
|
48
48
|
return
|
|
49
49
|
del self[:level_diff]
|
|
50
50
|
del self[m.end(2) : m.end(2) + level_diff]
|
|
@@ -40,7 +40,6 @@ from ._spans import (
|
|
|
40
40
|
END_TAG_PATTERN,
|
|
41
41
|
EXTERNAL_LINK_URL_TAIL,
|
|
42
42
|
INVALID_URL_CHARS,
|
|
43
|
-
PARSABLE_TAG_EXTENSION_NAME,
|
|
44
43
|
START_TAG_PATTERN,
|
|
45
44
|
parse_to_spans,
|
|
46
45
|
rc,
|
|
@@ -52,10 +51,6 @@ NAME_CAPTURING_HTML_START_TAG_FINDITER = rc(
|
|
|
52
51
|
)
|
|
53
52
|
).finditer
|
|
54
53
|
|
|
55
|
-
PARSABLE_TAG_EXTENSIONS_MATCH = rc(
|
|
56
|
-
rb'<' + PARSABLE_TAG_EXTENSION_NAME + rb'\b', IGNORECASE
|
|
57
|
-
).match
|
|
58
|
-
|
|
59
54
|
# External links
|
|
60
55
|
BRACKET_EXTERNAL_LINK_SCHEMES = regex_pattern(
|
|
61
56
|
_bare_external_link_schemes | {'//'}
|
|
@@ -74,7 +69,7 @@ INVALID_EXT_CHARS_SUB = rc( # the [:-4] slice allows \[ and \]
|
|
|
74
69
|
|
|
75
70
|
# Sections
|
|
76
71
|
SECTION_HEADING = rb'^(?<equals>={1,6})[^\n]+?(?P=equals)[ \t]*+$'
|
|
77
|
-
|
|
72
|
+
SUB_SECTION = rb'(?:^(?P=equals)=[^\n]+?(?P=equals)=[ \t]*+$.*?)*'
|
|
78
73
|
LEAD_SECTION = rb'(?<section>(?<equals>).*?)'
|
|
79
74
|
SECTIONS_FULLMATCH = rc(
|
|
80
75
|
LEAD_SECTION
|
|
@@ -89,9 +84,8 @@ SECTIONS_TOP_LEVELS_ONLY = rc(
|
|
|
89
84
|
+ rb'(?<section>'
|
|
90
85
|
+ SECTION_HEADING
|
|
91
86
|
+ rb'.*?'
|
|
92
|
-
+
|
|
93
|
-
+ rb'
|
|
94
|
-
rb')*',
|
|
87
|
+
+ SUB_SECTION
|
|
88
|
+
+ rb')*',
|
|
95
89
|
DOTALL | MULTILINE | VERBOSE,
|
|
96
90
|
).fullmatch
|
|
97
91
|
|
|
@@ -113,11 +107,7 @@ TABLE_FINDITER = rc(
|
|
|
113
107
|
DOTALL | MULTILINE | VERBOSE,
|
|
114
108
|
).finditer
|
|
115
109
|
|
|
116
|
-
substitute_apostrophes = rc(
|
|
117
|
-
rb"('\0*+){2,}+(?=[^']|$)",
|
|
118
|
-
MULTILINE | VERBOSE,
|
|
119
|
-
).sub
|
|
120
|
-
find_lines = rc(rb'(.*?)$').finditer
|
|
110
|
+
substitute_apostrophes = rc(rb"('\0*+){2,}+(?=[^']|$)", MULTILINE).sub
|
|
121
111
|
|
|
122
112
|
BOLD_FINDITER = rc(
|
|
123
113
|
rb"""
|
|
@@ -213,7 +203,7 @@ class WikiText:
|
|
|
213
203
|
# The following class attribute acts as a default value.
|
|
214
204
|
_type = 'WikiText'
|
|
215
205
|
|
|
216
|
-
__slots__ = '_type_to_spans', '_lststr', '_span_data'
|
|
206
|
+
__slots__ = '_type_to_spans', '_lststr', '_span_data'
|
|
217
207
|
|
|
218
208
|
def __init__(
|
|
219
209
|
self,
|
|
@@ -241,7 +231,6 @@ class WikiText:
|
|
|
241
231
|
if _type not in SPAN_PARSER_TYPES:
|
|
242
232
|
type_to_spans = self._type_to_spans = parse_to_spans(byte_array)
|
|
243
233
|
type_to_spans[_type] = [span]
|
|
244
|
-
self._shadow_cache = string, byte_array
|
|
245
234
|
else:
|
|
246
235
|
# In SPAN_PARSER_TYPES, we can't pass the original byte_array to
|
|
247
236
|
# parser to generate the shadow because it will replace the whole
|
|
@@ -259,7 +248,6 @@ class WikiText:
|
|
|
259
248
|
byte_array[0] = 3
|
|
260
249
|
byte_array[-1] = 32
|
|
261
250
|
type_to_spans = parse_to_spans(byte_array)
|
|
262
|
-
self._shadow_cache = string, byte_array
|
|
263
251
|
type_to_spans[_type].insert(0, span)
|
|
264
252
|
self._type_to_spans = type_to_spans
|
|
265
253
|
if type(self) is Parameter:
|
|
@@ -443,10 +431,7 @@ class WikiText:
|
|
|
443
431
|
@property
|
|
444
432
|
def span(self) -> tuple:
|
|
445
433
|
"""Return the span of self relative to the start of the root node."""
|
|
446
|
-
|
|
447
|
-
# and return statements requires enclosing parentheses:
|
|
448
|
-
# https://docs.python.org/3.8/whatsnew/3.8.html#other-language-changes
|
|
449
|
-
return (*self._span_data[:2],) # noqa
|
|
434
|
+
return (*self._span_data[:2],)
|
|
450
435
|
|
|
451
436
|
@property
|
|
452
437
|
def string(self) -> str:
|
|
@@ -1072,12 +1057,11 @@ class WikiText:
|
|
|
1072
1057
|
s = starts[1]
|
|
1073
1058
|
append_bold_start(s)
|
|
1074
1059
|
return b'_' * (s - starts[0]) + m.string[s : m.end()]
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
raise # execution should never reach here
|
|
1060
|
+
# more than 5 apostrophes -> hide the prior ones
|
|
1061
|
+
odd_bold_italics ^= True
|
|
1062
|
+
odd_italics ^= True
|
|
1063
|
+
s = starts[-5]
|
|
1064
|
+
return b'_' * (s - starts[0]) + m.string[s : m.end()]
|
|
1081
1065
|
|
|
1082
1066
|
return bytearray(b'\n').join(
|
|
1083
1067
|
[
|
|
@@ -1206,7 +1190,7 @@ class WikiText:
|
|
|
1206
1190
|
)
|
|
1207
1191
|
|
|
1208
1192
|
@property
|
|
1209
|
-
def _ext_link_shadow(self):
|
|
1193
|
+
def _ext_link_shadow(self) -> bytearray:
|
|
1210
1194
|
"""Replace the invalid chars of SPAN_PARSER_TYPES with b'_'.
|
|
1211
1195
|
|
|
1212
1196
|
For comments, all characters are replaced, but for ('Template',
|
|
@@ -1216,12 +1200,14 @@ class WikiText:
|
|
|
1216
1200
|
byte_array = bytearray(self._lststr[0][ss:se], 'ascii', 'replace')
|
|
1217
1201
|
subspans = self._subspans
|
|
1218
1202
|
for s, e, _, _ in subspans('Comment'):
|
|
1219
|
-
byte_array[s:e] = (e - s) * b'_'
|
|
1203
|
+
byte_array[s - ss : e - ss] = (e - s) * b'_'
|
|
1220
1204
|
for s, e, _, _ in subspans('WikiLink'):
|
|
1221
|
-
byte_array[s:e] = (e - s) * b' '
|
|
1205
|
+
byte_array[s - ss : e - ss] = (e - s) * b' '
|
|
1222
1206
|
for type_ in 'Template', 'ParserFunction', 'Parameter':
|
|
1223
1207
|
for s, e, _, _ in subspans(type_):
|
|
1224
|
-
byte_array[s:e] = INVALID_EXT_CHARS_SUB(
|
|
1208
|
+
byte_array[s - ss : e - ss] = INVALID_EXT_CHARS_SUB(
|
|
1209
|
+
b' ', byte_array[s:e]
|
|
1210
|
+
)
|
|
1225
1211
|
return byte_array
|
|
1226
1212
|
|
|
1227
1213
|
@property
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|