wikitextparser 0.56.2__tar.gz → 0.56.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wikitextparser-0.56.3/.vscode/settings.json +7 -0
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/CHANGELOG.rst +5 -0
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/PKG-INFO +1 -1
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/pyproject.toml +10 -0
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/wikitextparser/__init__.py +1 -1
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/wikitextparser/_argument.py +13 -10
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/wikitextparser/_cell.py +14 -13
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/wikitextparser/_comment_bold_italic.py +19 -13
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/wikitextparser/_externallink.py +3 -3
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/wikitextparser/_parameter.py +4 -4
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/wikitextparser/_parser_function.py +8 -6
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/wikitextparser/_section.py +2 -2
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/wikitextparser/_spans.py +10 -4
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/wikitextparser/_table.py +13 -11
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/wikitextparser/_tag.py +8 -6
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/wikitextparser/_template.py +15 -13
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/wikitextparser/_wikilink.py +8 -10
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/wikitextparser/_wikilist.py +23 -20
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/wikitextparser/_wikitext.py +121 -102
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/.coveragerc +0 -0
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/.github/workflows/tests.yml +0 -0
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/.gitignore +0 -0
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/.readthedocs.yaml +0 -0
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/LICENSE.md +0 -0
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/README.rst +0 -0
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/docs/CHANGELOG.rst +0 -0
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/docs/Makefile +0 -0
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/docs/README.rst +0 -0
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/docs/conf.py +0 -0
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/docs/index.rst +0 -0
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/docs/make.bat +0 -0
- {wikitextparser-0.56.2 → wikitextparser-0.56.3}/wikitextparser/_config.py +0 -0
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
v0.56.3 (2024-10-18)
|
|
2
|
+
--------------------
|
|
3
|
+
* Fixed a bug in detecting HTML tags nested in wiki markup. (#140)
|
|
4
|
+
* Improved type hints.
|
|
5
|
+
|
|
1
6
|
v0.56.2
|
|
2
7
|
-------
|
|
3
8
|
* Fixed a bug in ``external_links`` property where ``|`` was recognized as part of the link by mistake. (#139)
|
|
@@ -50,6 +50,7 @@ line-length = 79
|
|
|
50
50
|
format.quote-style = 'single'
|
|
51
51
|
lint.isort.combine-as-imports = true
|
|
52
52
|
lint.extend-select = [
|
|
53
|
+
'FA', # flake8-future-annotations
|
|
53
54
|
'I', # isort
|
|
54
55
|
'UP', # pyupgrade
|
|
55
56
|
]
|
|
@@ -60,3 +61,12 @@ lint.ignore = [
|
|
|
60
61
|
|
|
61
62
|
[tool.pytest.ini_options]
|
|
62
63
|
addopts = '--quiet --tb=short'
|
|
64
|
+
|
|
65
|
+
[tool.pyright]
|
|
66
|
+
typeCheckingMode = 'standard'
|
|
67
|
+
reportDeprecated = "warning"
|
|
68
|
+
reportPropertyTypeMismatch = "warning"
|
|
69
|
+
reportUnnecessaryCast = "warning"
|
|
70
|
+
reportUnnecessaryContains = "warning"
|
|
71
|
+
reportUnnecessaryIsInstance = "warning"
|
|
72
|
+
reportUnnecessaryTypeIgnoreComment = true
|
|
@@ -1,7 +1,10 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from
|
|
3
|
+
from typing import MutableSequence
|
|
4
4
|
|
|
5
|
+
from regex import DOTALL, MULTILINE, Match
|
|
6
|
+
|
|
7
|
+
from ._spans import TypeToSpans
|
|
5
8
|
from ._wikitext import SECTION_HEADING, SubWikiText, rc
|
|
6
9
|
|
|
7
10
|
ARG_SHADOW_FULLMATCH = rc(
|
|
@@ -25,28 +28,28 @@ class Argument(SubWikiText):
|
|
|
25
28
|
|
|
26
29
|
def __init__(
|
|
27
30
|
self,
|
|
28
|
-
string:
|
|
29
|
-
_type_to_spans:
|
|
30
|
-
_span:
|
|
31
|
-
_type:
|
|
32
|
-
_parent:
|
|
31
|
+
string: str | MutableSequence[str],
|
|
32
|
+
_type_to_spans: TypeToSpans | None = None,
|
|
33
|
+
_span: list[int] | None = None,
|
|
34
|
+
_type: str | int | None = None,
|
|
35
|
+
_parent: SubWikiTextWithArgs | None = None,
|
|
33
36
|
):
|
|
34
37
|
super().__init__(string, _type_to_spans, _span, _type)
|
|
35
38
|
self._parent = _parent or self
|
|
36
39
|
self._shadow_match_cache = None, None
|
|
37
40
|
|
|
38
41
|
@property
|
|
39
|
-
def _shadow_match(self):
|
|
42
|
+
def _shadow_match(self) -> Match[bytes]:
|
|
40
43
|
cached_shadow_match, cache_string = self._shadow_match_cache
|
|
41
44
|
self_string = str(self)
|
|
42
45
|
if cache_string == self_string:
|
|
43
|
-
return cached_shadow_match
|
|
46
|
+
return cached_shadow_match # type: ignore
|
|
44
47
|
ss, se, _, _ = self._span_data
|
|
45
48
|
parent = self._parent
|
|
46
49
|
ps = parent._span_data[0]
|
|
47
50
|
shadow_match = ARG_SHADOW_FULLMATCH(parent._shadow[ss - ps : se - ps])
|
|
48
51
|
self._shadow_match_cache = shadow_match, self_string
|
|
49
|
-
return shadow_match
|
|
52
|
+
return shadow_match # type: ignore
|
|
50
53
|
|
|
51
54
|
@property
|
|
52
55
|
def name(self) -> str:
|
|
@@ -1,8 +1,10 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from
|
|
3
|
+
from typing import MutableSequence
|
|
4
4
|
|
|
5
|
-
from
|
|
5
|
+
from regex import DOTALL, VERBOSE, Match
|
|
6
|
+
|
|
7
|
+
from ._spans import ATTRS_MATCH, TypeToSpans
|
|
6
8
|
from ._tag import SubWikiTextWithAttrs
|
|
7
9
|
from ._wikitext import rc
|
|
8
10
|
|
|
@@ -144,15 +146,14 @@ class Cell(SubWikiTextWithAttrs):
|
|
|
144
146
|
|
|
145
147
|
def __init__(
|
|
146
148
|
self,
|
|
147
|
-
string:
|
|
149
|
+
string: str | MutableSequence[str],
|
|
148
150
|
header: bool = False,
|
|
149
|
-
_type_to_spans:
|
|
150
|
-
_span:
|
|
151
|
-
_type: int = None,
|
|
152
|
-
_match: Match = None,
|
|
153
|
-
_attrs_match: Match = None,
|
|
151
|
+
_type_to_spans: TypeToSpans | None = None,
|
|
152
|
+
_span: list | None = None,
|
|
153
|
+
_type: int | None = None,
|
|
154
|
+
_match: Match | None = None,
|
|
155
|
+
_attrs_match: Match | None = None,
|
|
154
156
|
) -> None:
|
|
155
|
-
"""Initialize the object."""
|
|
156
157
|
super().__init__(string, _type_to_spans, _span, _type)
|
|
157
158
|
self._header = header
|
|
158
159
|
if _match:
|
|
@@ -175,7 +176,7 @@ class Cell(SubWikiTextWithAttrs):
|
|
|
175
176
|
self._attrs_match_cache = self._match_cache = None, None
|
|
176
177
|
|
|
177
178
|
@property
|
|
178
|
-
def _match(self):
|
|
179
|
+
def _match(self) -> Match[bytes]:
|
|
179
180
|
"""Return the match object for the current tag. Cache the result.
|
|
180
181
|
|
|
181
182
|
Be extra careful when using this property. The position of match
|
|
@@ -185,7 +186,7 @@ class Cell(SubWikiTextWithAttrs):
|
|
|
185
186
|
cache_match, cache_string = self._match_cache
|
|
186
187
|
string = self.string
|
|
187
188
|
if cache_string == string:
|
|
188
|
-
return cache_match
|
|
189
|
+
return cache_match # type: ignore
|
|
189
190
|
shadow = self._shadow
|
|
190
191
|
if shadow[0] == 10: # ord('\n')
|
|
191
192
|
m = NEWLINE_CELL_MATCH(shadow)
|
|
@@ -196,7 +197,7 @@ class Cell(SubWikiTextWithAttrs):
|
|
|
196
197
|
m = INLINE_NONHAEDER_CELL_MATCH(shadow)
|
|
197
198
|
self._match_cache = m, string
|
|
198
199
|
self._attrs_match_cache = None, None
|
|
199
|
-
return m
|
|
200
|
+
return m # type: ignore
|
|
200
201
|
|
|
201
202
|
@property
|
|
202
203
|
def value(self) -> str:
|
|
@@ -1,7 +1,10 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from
|
|
3
|
+
from typing import MutableSequence
|
|
4
4
|
|
|
5
|
+
from regex import DOTALL, MULTILINE, Match
|
|
6
|
+
|
|
7
|
+
from ._spans import TypeToSpans
|
|
5
8
|
from ._wikitext import SubWikiText, rc
|
|
6
9
|
|
|
7
10
|
COMMENT_PATTERN = r'<!--[\s\S]*?(?>-->|\Z)'
|
|
@@ -29,13 +32,16 @@ class Comment(SubWikiText):
|
|
|
29
32
|
return s[4:]
|
|
30
33
|
|
|
31
34
|
@property
|
|
32
|
-
def comments(self) ->
|
|
35
|
+
def comments(self) -> list[Comment]:
|
|
33
36
|
return []
|
|
34
37
|
|
|
35
38
|
|
|
36
39
|
class BoldItalic(SubWikiText):
|
|
37
40
|
__slots__ = ()
|
|
38
41
|
|
|
42
|
+
@property
|
|
43
|
+
def _match(self) -> Match[str]: ...
|
|
44
|
+
|
|
39
45
|
@property
|
|
40
46
|
def text(self) -> str:
|
|
41
47
|
"""Return text value of self (without triple quotes)."""
|
|
@@ -49,7 +55,7 @@ class BoldItalic(SubWikiText):
|
|
|
49
55
|
self[b:e] = s
|
|
50
56
|
|
|
51
57
|
@property
|
|
52
|
-
def _content_span(self) ->
|
|
58
|
+
def _content_span(self) -> tuple[int, int]:
|
|
53
59
|
# noinspection PyUnresolvedReferences
|
|
54
60
|
return self._match.span(1)
|
|
55
61
|
|
|
@@ -58,8 +64,8 @@ class Bold(BoldItalic):
|
|
|
58
64
|
__slots__ = ()
|
|
59
65
|
|
|
60
66
|
@property
|
|
61
|
-
def _match(self):
|
|
62
|
-
return BOLD_FULLMATCH(self.string)
|
|
67
|
+
def _match(self) -> Match[str]:
|
|
68
|
+
return BOLD_FULLMATCH(self.string) # type: ignore
|
|
63
69
|
|
|
64
70
|
|
|
65
71
|
class Italic(BoldItalic):
|
|
@@ -67,10 +73,10 @@ class Italic(BoldItalic):
|
|
|
67
73
|
|
|
68
74
|
def __init__(
|
|
69
75
|
self,
|
|
70
|
-
string:
|
|
71
|
-
_type_to_spans:
|
|
72
|
-
_span:
|
|
73
|
-
_type:
|
|
76
|
+
string: str | MutableSequence[str],
|
|
77
|
+
_type_to_spans: TypeToSpans | None = None,
|
|
78
|
+
_span: list[int] | None = None,
|
|
79
|
+
_type: str | int | None = None,
|
|
74
80
|
end_token: bool = True,
|
|
75
81
|
):
|
|
76
82
|
"""Initialize the Italic object.
|
|
@@ -82,7 +88,7 @@ class Italic(BoldItalic):
|
|
|
82
88
|
self.end_token = end_token
|
|
83
89
|
|
|
84
90
|
@property
|
|
85
|
-
def _match(self):
|
|
91
|
+
def _match(self) -> Match[str]:
|
|
86
92
|
if self.end_token:
|
|
87
|
-
return ITALIC_FULLMATCH(self.string)
|
|
88
|
-
return ITALIC_NOEND_FULLMATCH(self.string)
|
|
93
|
+
return ITALIC_FULLMATCH(self.string) # type: ignore
|
|
94
|
+
return ITALIC_NOEND_FULLMATCH(self.string) # type: ignore
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from ._wikitext import BRACKET_EXTERNAL_LINK_URL, IGNORECASE, SubWikiText, rc
|
|
4
4
|
|
|
@@ -28,7 +28,7 @@ class ExternalLink(SubWikiText):
|
|
|
28
28
|
self[0 : len(self.url)] = newurl
|
|
29
29
|
|
|
30
30
|
@property
|
|
31
|
-
def text(self) ->
|
|
31
|
+
def text(self) -> str | None:
|
|
32
32
|
"""The text part (the part after the url).
|
|
33
33
|
|
|
34
34
|
getter: Return None if this is a bare link or has no associated text.
|
|
@@ -74,5 +74,5 @@ class ExternalLink(SubWikiText):
|
|
|
74
74
|
return self(0) == '['
|
|
75
75
|
|
|
76
76
|
@property
|
|
77
|
-
def external_links(self) ->
|
|
77
|
+
def external_links(self) -> list[ExternalLink]:
|
|
78
78
|
return []
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from ._wikitext import WS, SubWikiText
|
|
4
4
|
|
|
@@ -35,7 +35,7 @@ class Parameter(SubWikiText):
|
|
|
35
35
|
return '|' if self._shadow.find(124) != -1 else ''
|
|
36
36
|
|
|
37
37
|
@property
|
|
38
|
-
def default(self) ->
|
|
38
|
+
def default(self) -> str | None:
|
|
39
39
|
"""The default value of current parameter.
|
|
40
40
|
|
|
41
41
|
getter: Return None if there is no default.
|
|
@@ -100,9 +100,9 @@ class Parameter(SubWikiText):
|
|
|
100
100
|
] = '{{{' + new_default_name + '|' + innermost_default + '}}}'
|
|
101
101
|
|
|
102
102
|
@property
|
|
103
|
-
def parameters(self) ->
|
|
103
|
+
def parameters(self) -> list[Parameter]:
|
|
104
104
|
return super().parameters[1:]
|
|
105
105
|
|
|
106
106
|
@property
|
|
107
|
-
def _content_span(self) ->
|
|
107
|
+
def _content_span(self) -> tuple[int, int]:
|
|
108
108
|
return 3, -3
|
|
@@ -1,5 +1,7 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from bisect import insort
|
|
2
|
-
from typing import Iterable
|
|
4
|
+
from typing import Iterable
|
|
3
5
|
|
|
4
6
|
from ._argument import Argument
|
|
5
7
|
from ._wikilist import WikiList
|
|
@@ -19,7 +21,7 @@ class SubWikiTextWithArgs(SubWikiText):
|
|
|
19
21
|
_first_arg_sep = 0
|
|
20
22
|
|
|
21
23
|
@property
|
|
22
|
-
def _content_span(self) ->
|
|
24
|
+
def _content_span(self) -> tuple[int, int]:
|
|
23
25
|
return 2, -2
|
|
24
26
|
|
|
25
27
|
@property
|
|
@@ -32,7 +34,7 @@ class SubWikiTextWithArgs(SubWikiText):
|
|
|
32
34
|
return self._nesting_level(('Template', 'ParserFunction'))
|
|
33
35
|
|
|
34
36
|
@property
|
|
35
|
-
def arguments(self) ->
|
|
37
|
+
def arguments(self) -> list[Argument]:
|
|
36
38
|
"""Parse template content. Create self.name and self.arguments."""
|
|
37
39
|
shadow = self._shadow
|
|
38
40
|
split_spans = self._name_args_matcher(shadow, 2, -2).spans('arg')
|
|
@@ -65,8 +67,8 @@ class SubWikiTextWithArgs(SubWikiText):
|
|
|
65
67
|
return arguments
|
|
66
68
|
|
|
67
69
|
def get_lists(
|
|
68
|
-
self, pattern:
|
|
69
|
-
) ->
|
|
70
|
+
self, pattern: str | Iterable[str] = (r'\#', r'\*', '[:;]')
|
|
71
|
+
) -> list[WikiList]:
|
|
70
72
|
"""Return the lists in all arguments.
|
|
71
73
|
|
|
72
74
|
For performance reasons it is usually preferred to get a specific
|
|
@@ -103,5 +105,5 @@ class ParserFunction(SubWikiTextWithArgs):
|
|
|
103
105
|
_first_arg_sep = 58
|
|
104
106
|
|
|
105
107
|
@property
|
|
106
|
-
def parser_functions(self) ->
|
|
108
|
+
def parser_functions(self) -> list[ParserFunction]:
|
|
107
109
|
return super().parser_functions[1:]
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from ._wikitext import SubWikiText, rc
|
|
4
4
|
|
|
@@ -50,7 +50,7 @@ class Section(SubWikiText):
|
|
|
50
50
|
del self[m.end(2) : m.end(2) + level_diff]
|
|
51
51
|
|
|
52
52
|
@property
|
|
53
|
-
def title(self) ->
|
|
53
|
+
def title(self) -> str | None:
|
|
54
54
|
"""The title of this section.
|
|
55
55
|
|
|
56
56
|
getter: Return the title or None for lead sections or sections that
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
"""Define the functions required for parsing wikitext into spans."""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
from functools import partial
|
|
4
|
-
from typing import Callable, Dict,
|
|
6
|
+
from typing import Callable, Dict, List, Union
|
|
5
7
|
|
|
6
8
|
from regex import DOTALL, IGNORECASE, REVERSE, Match, compile as rc
|
|
7
9
|
|
|
@@ -210,7 +212,11 @@ HTML_END_TAG_FINDITER = rc(
|
|
|
210
212
|
).finditer
|
|
211
213
|
|
|
212
214
|
|
|
213
|
-
|
|
215
|
+
# [stan_start: int, span_end: int, Match, byte_array]
|
|
216
|
+
TypeToSpans = Dict[Union[str, int], List[List]]
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def parse_to_spans(byte_array: bytearray) -> TypeToSpans:
|
|
214
220
|
"""Calculate and set self._type_to_spans.
|
|
215
221
|
|
|
216
222
|
Extracted spans will be removed from byte_array.
|
|
@@ -321,7 +327,7 @@ def extract_tag_extensions(
|
|
|
321
327
|
def _parse_sub_spans(
|
|
322
328
|
byte_array: bytearray,
|
|
323
329
|
start: int,
|
|
324
|
-
end:
|
|
330
|
+
end: int | None,
|
|
325
331
|
pms_append: Callable,
|
|
326
332
|
pfs_append: Callable,
|
|
327
333
|
tls_append: Callable,
|
|
@@ -336,7 +342,7 @@ def _parse_sub_spans(
|
|
|
336
342
|
byte_array[ms:me] = byte_array[ms:me].translate(BRACKETS)
|
|
337
343
|
while True:
|
|
338
344
|
while True:
|
|
339
|
-
match:
|
|
345
|
+
match: Match | None = None
|
|
340
346
|
for match in WIKILINK_PARAM_FINDITER(byte_array, start, end):
|
|
341
347
|
ms, me = match.span()
|
|
342
348
|
if match[1] is None:
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from bisect import insort_right
|
|
2
4
|
from collections.abc import Mapping
|
|
3
|
-
from typing import Any,
|
|
5
|
+
from typing import Any, TypeVar
|
|
4
6
|
|
|
5
7
|
from regex import DOTALL, VERBOSE
|
|
6
8
|
|
|
@@ -86,7 +88,7 @@ class Table(SubWikiTextWithAttrs):
|
|
|
86
88
|
return shadow
|
|
87
89
|
|
|
88
90
|
@property
|
|
89
|
-
def _match_table(self) ->
|
|
91
|
+
def _match_table(self) -> list[list[Any]]:
|
|
90
92
|
"""Return match_table."""
|
|
91
93
|
table_shadow = self._table_shadow
|
|
92
94
|
# Remove table-start and table-end marks.
|
|
@@ -141,7 +143,7 @@ class Table(SubWikiTextWithAttrs):
|
|
|
141
143
|
strip: bool = True,
|
|
142
144
|
row: int = None,
|
|
143
145
|
column: int = None,
|
|
144
|
-
) ->
|
|
146
|
+
) -> list[list[str]] | list[str] | str:
|
|
145
147
|
"""Return a list containing lists of row values.
|
|
146
148
|
|
|
147
149
|
:param span: If true, calculate rows according to rowspans and colspans
|
|
@@ -209,7 +211,7 @@ class Table(SubWikiTextWithAttrs):
|
|
|
209
211
|
row: int = None,
|
|
210
212
|
column: int = None,
|
|
211
213
|
span: bool = True,
|
|
212
|
-
) ->
|
|
214
|
+
) -> list[list[Cell]] | list[Cell] | Cell:
|
|
213
215
|
"""Return a list of lists containing Cell objects.
|
|
214
216
|
|
|
215
217
|
:param span: If is True, rearrange the result according to colspan and
|
|
@@ -283,7 +285,7 @@ class Table(SubWikiTextWithAttrs):
|
|
|
283
285
|
return table_cells[row][column]
|
|
284
286
|
|
|
285
287
|
@property
|
|
286
|
-
def caption(self) ->
|
|
288
|
+
def caption(self) -> str | None:
|
|
287
289
|
"""Caption of the table. Support get and set."""
|
|
288
290
|
m = CAPTION_MATCH(self._shadow)
|
|
289
291
|
if m:
|
|
@@ -317,7 +319,7 @@ class Table(SubWikiTextWithAttrs):
|
|
|
317
319
|
return attrs_match
|
|
318
320
|
|
|
319
321
|
@property
|
|
320
|
-
def caption_attrs(self) ->
|
|
322
|
+
def caption_attrs(self) -> str | None:
|
|
321
323
|
"""Caption attributes. Support get and set operations."""
|
|
322
324
|
m = CAPTION_MATCH(self._shadow)
|
|
323
325
|
if m:
|
|
@@ -339,7 +341,7 @@ class Table(SubWikiTextWithAttrs):
|
|
|
339
341
|
self[m.end('preattrs') : end] = attrs
|
|
340
342
|
|
|
341
343
|
@property
|
|
342
|
-
def row_attrs(self) ->
|
|
344
|
+
def row_attrs(self) -> list[dict]:
|
|
343
345
|
"""Row attributes.
|
|
344
346
|
|
|
345
347
|
Use the setter of this property to set attributes for all rows.
|
|
@@ -363,7 +365,7 @@ class Table(SubWikiTextWithAttrs):
|
|
|
363
365
|
return attrs
|
|
364
366
|
|
|
365
367
|
@row_attrs.setter
|
|
366
|
-
def row_attrs(self, attrs:
|
|
368
|
+
def row_attrs(self, attrs: list[Mapping]):
|
|
367
369
|
for row_match, attrs_dict in reversed(
|
|
368
370
|
[*zip(FIND_ROWS(self._table_shadow), attrs)]
|
|
369
371
|
):
|
|
@@ -381,8 +383,8 @@ class Table(SubWikiTextWithAttrs):
|
|
|
381
383
|
|
|
382
384
|
|
|
383
385
|
def _apply_attr_spans(
|
|
384
|
-
table_attrs:
|
|
385
|
-
) ->
|
|
386
|
+
table_attrs: list[list[dict[str, str]]], table_data: list[list[T]]
|
|
387
|
+
) -> list[list[T]]:
|
|
386
388
|
"""Apply row and column spans and return table_data."""
|
|
387
389
|
# The following code is based on the table forming algorithm described
|
|
388
390
|
# at http://www.w3.org/TR/html5/tabular-data.html#processing-model-1
|
|
@@ -399,7 +401,7 @@ def _apply_attr_spans(
|
|
|
399
401
|
# if not table_data:
|
|
400
402
|
# return table_data
|
|
401
403
|
# 11
|
|
402
|
-
downward_growing_cells:
|
|
404
|
+
downward_growing_cells: list[tuple[T | None, int, int]] = []
|
|
403
405
|
# 13, 18
|
|
404
406
|
# Algorithm for processing rows
|
|
405
407
|
for attrs_row, row in zip(table_attrs, table_data):
|
|
@@ -8,7 +8,9 @@ For more info see:
|
|
|
8
8
|
* https://www.mediawiki.org/wiki/HTML_restriction
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
|
-
from
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from typing import Any
|
|
12
14
|
|
|
13
15
|
from regex import DOTALL, VERBOSE
|
|
14
16
|
|
|
@@ -55,7 +57,7 @@ class SubWikiTextWithAttrs(SubWikiText):
|
|
|
55
57
|
__slots__ = '_attrs_match'
|
|
56
58
|
|
|
57
59
|
@property
|
|
58
|
-
def attrs(self) ->
|
|
60
|
+
def attrs(self) -> dict[str, str]:
|
|
59
61
|
"""Return self attributes as a dictionary."""
|
|
60
62
|
spans = self._attrs_match.spans
|
|
61
63
|
string = self.string
|
|
@@ -73,7 +75,7 @@ class SubWikiTextWithAttrs(SubWikiText):
|
|
|
73
75
|
string[s:e] for s, e in self._attrs_match.spans('attr_name')
|
|
74
76
|
)
|
|
75
77
|
|
|
76
|
-
def get_attr(self, attr_name: str) ->
|
|
78
|
+
def get_attr(self, attr_name: str) -> str | None:
|
|
77
79
|
"""Return the value of the last attribute with the given name.
|
|
78
80
|
|
|
79
81
|
Return None if the attr_name does not exist in self.
|
|
@@ -163,7 +165,7 @@ class Tag(SubWikiTextWithAttrs):
|
|
|
163
165
|
self[start:end] = name
|
|
164
166
|
|
|
165
167
|
@property
|
|
166
|
-
def contents(self) ->
|
|
168
|
+
def contents(self) -> str | None:
|
|
167
169
|
"""Tag contents. Support both get and set operations.
|
|
168
170
|
|
|
169
171
|
setter:
|
|
@@ -210,10 +212,10 @@ class Tag(SubWikiTextWithAttrs):
|
|
|
210
212
|
def _extension_tags(self):
|
|
211
213
|
return super()._extension_tags[1:]
|
|
212
214
|
|
|
213
|
-
def get_tags(self, name=None) ->
|
|
215
|
+
def get_tags(self, name=None) -> list[Tag]:
|
|
214
216
|
return super().get_tags(name)[1:]
|
|
215
217
|
|
|
216
218
|
@property
|
|
217
|
-
def _content_span(self) ->
|
|
219
|
+
def _content_span(self) -> tuple[int, int]:
|
|
218
220
|
s = self.string
|
|
219
221
|
return s.find('>') + 1, s.rfind('<')
|
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Iterable, TypeVar
|
|
2
4
|
|
|
3
5
|
from regex import REVERSE
|
|
4
6
|
|
|
@@ -29,14 +31,14 @@ class Template(SubWikiTextWithArgs):
|
|
|
29
31
|
_first_arg_sep = 124
|
|
30
32
|
|
|
31
33
|
@property
|
|
32
|
-
def _content_span(self) ->
|
|
34
|
+
def _content_span(self) -> tuple[int, int]:
|
|
33
35
|
return 2, -2
|
|
34
36
|
|
|
35
37
|
def normal_name(
|
|
36
38
|
self,
|
|
37
39
|
rm_namespaces=('Template',),
|
|
38
40
|
*,
|
|
39
|
-
code: str = None,
|
|
41
|
+
code: str | None = None,
|
|
40
42
|
capitalize=False,
|
|
41
43
|
) -> str:
|
|
42
44
|
"""Return normal form of self.name.
|
|
@@ -110,7 +112,7 @@ class Template(SubWikiTextWithArgs):
|
|
|
110
112
|
else:
|
|
111
113
|
names.add(name)
|
|
112
114
|
|
|
113
|
-
def rm_dup_args_safe(self, tag: str = None) -> None:
|
|
115
|
+
def rm_dup_args_safe(self, tag: str | None = None) -> None:
|
|
114
116
|
"""Remove duplicate arguments in a safe manner.
|
|
115
117
|
|
|
116
118
|
Remove the duplicate arguments only in the following situations:
|
|
@@ -129,7 +131,7 @@ class Template(SubWikiTextWithArgs):
|
|
|
129
131
|
|
|
130
132
|
Also see `rm_first_of_dup_args` function.
|
|
131
133
|
"""
|
|
132
|
-
name_to_lastarg_vals:
|
|
134
|
+
name_to_lastarg_vals: dict[str, tuple[Argument, list[str]]] = {}
|
|
133
135
|
# Removing positional args affects their name. By reversing the list
|
|
134
136
|
# we avoid encountering those kind of args.
|
|
135
137
|
for arg in reversed(self.arguments):
|
|
@@ -171,9 +173,9 @@ class Template(SubWikiTextWithArgs):
|
|
|
171
173
|
self,
|
|
172
174
|
name: str,
|
|
173
175
|
value: str,
|
|
174
|
-
positional: bool = None,
|
|
175
|
-
before: str = None,
|
|
176
|
-
after: str = None,
|
|
176
|
+
positional: bool | None = None,
|
|
177
|
+
before: str | None = None,
|
|
178
|
+
after: str | None = None,
|
|
177
179
|
preserve_spacing=False,
|
|
178
180
|
) -> None:
|
|
179
181
|
"""Set the value for `name` argument. Add it if it doesn't exist.
|
|
@@ -272,14 +274,14 @@ class Template(SubWikiTextWithArgs):
|
|
|
272
274
|
# positional AND is to be added at the end of the template.
|
|
273
275
|
self.insert(-2, addstring)
|
|
274
276
|
|
|
275
|
-
def get_arg(self, name: str) ->
|
|
277
|
+
def get_arg(self, name: str) -> Argument | None:
|
|
276
278
|
"""Return the last argument with the given name.
|
|
277
279
|
|
|
278
280
|
Return None if no argument with that name is found.
|
|
279
281
|
"""
|
|
280
282
|
return get_arg(name, reversed(self.arguments))
|
|
281
283
|
|
|
282
|
-
def has_arg(self, name: str, value: str = None) -> bool:
|
|
284
|
+
def has_arg(self, name: str, value: str | None = None) -> bool:
|
|
283
285
|
"""Return true if the is an arg named `name`.
|
|
284
286
|
|
|
285
287
|
Also check equality of values if `value` is provided.
|
|
@@ -308,11 +310,11 @@ class Template(SubWikiTextWithArgs):
|
|
|
308
310
|
del arg[:]
|
|
309
311
|
|
|
310
312
|
@property
|
|
311
|
-
def templates(self) ->
|
|
313
|
+
def templates(self) -> list[Template]:
|
|
312
314
|
return super().templates[1:]
|
|
313
315
|
|
|
314
316
|
|
|
315
|
-
def mode(list_:
|
|
317
|
+
def mode(list_: list[T]) -> T:
|
|
316
318
|
"""Return the most common item in the list.
|
|
317
319
|
|
|
318
320
|
Return the first one if there are more than one most common items.
|
|
@@ -330,7 +332,7 @@ def mode(list_: List[T]) -> T:
|
|
|
330
332
|
return max(set(list_), key=list_.count)
|
|
331
333
|
|
|
332
334
|
|
|
333
|
-
def get_arg(name: str, args: Iterable[Argument]) ->
|
|
335
|
+
def get_arg(name: str, args: Iterable[Argument]) -> Argument | None:
|
|
334
336
|
"""Return the first argument in the args that has the given name.
|
|
335
337
|
|
|
336
338
|
Return None if no such argument is found.
|