wikitextparser 0.55.12__tar.gz → 0.56.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/.github/workflows/tests.yml +1 -1
  2. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/CHANGELOG.rst +16 -0
  3. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/PKG-INFO +3 -3
  4. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/README.rst +1 -1
  5. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/pyproject.toml +1 -1
  6. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/__init__.py +1 -1
  7. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_parser_function.py +1 -4
  8. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_section.py +2 -2
  9. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_wikitext.py +17 -31
  10. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/.coveragerc +0 -0
  11. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/.gitignore +0 -0
  12. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/.readthedocs.yaml +0 -0
  13. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/LICENSE.md +0 -0
  14. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/docs/CHANGELOG.rst +0 -0
  15. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/docs/Makefile +0 -0
  16. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/docs/README.rst +0 -0
  17. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/docs/conf.py +0 -0
  18. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/docs/index.rst +0 -0
  19. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/docs/make.bat +0 -0
  20. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_argument.py +0 -0
  21. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_cell.py +0 -0
  22. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_comment_bold_italic.py +0 -0
  23. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_config.py +0 -0
  24. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_externallink.py +0 -0
  25. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_parameter.py +0 -0
  26. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_spans.py +0 -0
  27. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_table.py +0 -0
  28. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_tag.py +0 -0
  29. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_template.py +0 -0
  30. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_wikilink.py +0 -0
  31. {wikitextparser-0.55.12 → wikitextparser-0.56.1}/wikitextparser/_wikilist.py +0 -0
@@ -13,7 +13,7 @@ jobs:
13
13
  strategy:
14
14
  matrix:
15
15
  os: [ubuntu-latest, windows-latest, macos-latest]
16
- python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
16
+ python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
17
17
  env:
18
18
  OS: ${{ matrix.os }}
19
19
  PYTHON: ${{ matrix.python-version }}
@@ -1,3 +1,19 @@
1
+ v0.56.1
2
+ -------
3
+ * Fixed a bug in ``get_sections`` when ``top_levels_only`` was ``True``.
4
+
5
+ v0.56.0
6
+ -------
7
+ * Drop Python 3.7 support.
8
+
9
+ v0.55.14
10
+ --------
11
+ * Fixed a bug in detecting the text of an external link. (#137)
12
+
13
+ v0.55.13
14
+ --------
15
+ * Fixed a bug in ``Section.level`` resulting in malformed section titles when multiple levels are added (#135)
16
+
1
17
  v0.55.12
2
18
  --------
3
19
  * Performance improvements in extracting bold and italic nodes. (#133)
@@ -1,10 +1,10 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wikitextparser
3
- Version: 0.55.12
3
+ Version: 0.56.1
4
4
  Summary: A simple parsing tool for MediaWiki's wikitext markup.
5
5
  Keywords: MediaWiki,wikitext,parser
6
6
  Author-email: 5j9 <5j9@users.noreply.github.com>
7
- Requires-Python: >= 3.7
7
+ Requires-Python: >= 3.8
8
8
  Description-Content-Type: text/x-rst
9
9
  Classifier: Programming Language :: Python
10
10
  Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
@@ -38,7 +38,7 @@ The purpose is to allow users easily extract and/or manipulate templates, templa
38
38
  Installation
39
39
  ============
40
40
 
41
- - Python 3.7+ is required
41
+ - Python 3.8+ is required
42
42
  - ``pip install wikitextparser``
43
43
 
44
44
  Usage
@@ -19,7 +19,7 @@ The purpose is to allow users easily extract and/or manipulate templates, templa
19
19
  Installation
20
20
  ============
21
21
 
22
- - Python 3.7+ is required
22
+ - Python 3.8+ is required
23
23
  - ``pip install wikitextparser``
24
24
 
25
25
  Usage
@@ -19,7 +19,7 @@ classifiers = [
19
19
  "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
20
20
  "Topic :: Text Processing",
21
21
  ]
22
- requires-python = ">= 3.7"
22
+ requires-python = ">= 3.8"
23
23
  dependencies = [
24
24
  "regex >= 2022.9.11",
25
25
  "wcwidth",
@@ -1,5 +1,5 @@
1
1
  # Scheme: [N!]N(.N)*[{a|b|rc}N][.postN][.devN]
2
- __version__ = '0.55.12'
2
+ __version__ = '0.56.1'
3
3
 
4
4
  from . import _wikitext
5
5
  from ._argument import Argument # noqa: F401
@@ -60,10 +60,7 @@ class SubWikiTextWithArgs(SubWikiText):
60
60
  else:
61
61
  arg_span = old_span
62
62
  arg = Argument(lststr, type_to_spans, arg_span, type_, self)
63
- arg._shadow_cache = (
64
- lststr[0][s:e],
65
- shadow[arg_self_start:arg_self_end],
66
- )
63
+ arg._span_data[3] = shadow[arg_self_start:arg_self_end]
67
64
  arguments_append(arg)
68
65
  return arguments
69
66
 
@@ -42,9 +42,9 @@ class Section(SubWikiText):
42
42
  if level_diff == 0:
43
43
  return
44
44
  if level_diff < 0:
45
- new_equals = '=' * abs(level_diff)
45
+ new_equals = '=' * -level_diff
46
46
  self.insert(0, new_equals)
47
- self.insert(m.end(2) + 1, new_equals)
47
+ self.insert(m.end(2) - level_diff, new_equals)
48
48
  return
49
49
  del self[:level_diff]
50
50
  del self[m.end(2) : m.end(2) + level_diff]
@@ -40,7 +40,6 @@ from ._spans import (
40
40
  END_TAG_PATTERN,
41
41
  EXTERNAL_LINK_URL_TAIL,
42
42
  INVALID_URL_CHARS,
43
- PARSABLE_TAG_EXTENSION_NAME,
44
43
  START_TAG_PATTERN,
45
44
  parse_to_spans,
46
45
  rc,
@@ -52,10 +51,6 @@ NAME_CAPTURING_HTML_START_TAG_FINDITER = rc(
52
51
  )
53
52
  ).finditer
54
53
 
55
- PARSABLE_TAG_EXTENSIONS_MATCH = rc(
56
- rb'<' + PARSABLE_TAG_EXTENSION_NAME + rb'\b', IGNORECASE
57
- ).match
58
-
59
54
  # External links
60
55
  BRACKET_EXTERNAL_LINK_SCHEMES = regex_pattern(
61
56
  _bare_external_link_schemes | {'//'}
@@ -74,7 +69,7 @@ INVALID_EXT_CHARS_SUB = rc( # the [:-4] slice allows \[ and \]
74
69
 
75
70
  # Sections
76
71
  SECTION_HEADING = rb'^(?<equals>={1,6})[^\n]+?(?P=equals)[ \t]*+$'
77
- SUBSECTION_HEADING = rb'^(?P=equals)=[^\n]+?(?P=equals)=[ \t]*+$'
72
+ SUB_SECTION = rb'(?:^(?P=equals)=[^\n]+?(?P=equals)=[ \t]*+$.*?)*'
78
73
  LEAD_SECTION = rb'(?<section>(?<equals>).*?)'
79
74
  SECTIONS_FULLMATCH = rc(
80
75
  LEAD_SECTION
@@ -89,9 +84,8 @@ SECTIONS_TOP_LEVELS_ONLY = rc(
89
84
  + rb'(?<section>'
90
85
  + SECTION_HEADING
91
86
  + rb'.*?'
92
- + SUBSECTION_HEADING
93
- + rb'.*?'
94
- rb')*',
87
+ + SUB_SECTION
88
+ + rb')*',
95
89
  DOTALL | MULTILINE | VERBOSE,
96
90
  ).fullmatch
97
91
 
@@ -113,11 +107,7 @@ TABLE_FINDITER = rc(
113
107
  DOTALL | MULTILINE | VERBOSE,
114
108
  ).finditer
115
109
 
116
- substitute_apostrophes = rc( # bold-italic, bold, or italic tokens
117
- rb"('\0*+){2,}+(?=[^']|$)",
118
- MULTILINE | VERBOSE,
119
- ).sub
120
- find_lines = rc(rb'(.*?)$').finditer
110
+ substitute_apostrophes = rc(rb"('\0*+){2,}+(?=[^']|$)", MULTILINE).sub
121
111
 
122
112
  BOLD_FINDITER = rc(
123
113
  rb"""
@@ -213,7 +203,7 @@ class WikiText:
213
203
  # The following class attribute acts as a default value.
214
204
  _type = 'WikiText'
215
205
 
216
- __slots__ = '_type_to_spans', '_lststr', '_span_data', '_shadow_cache'
206
+ __slots__ = '_type_to_spans', '_lststr', '_span_data'
217
207
 
218
208
  def __init__(
219
209
  self,
@@ -241,7 +231,6 @@ class WikiText:
241
231
  if _type not in SPAN_PARSER_TYPES:
242
232
  type_to_spans = self._type_to_spans = parse_to_spans(byte_array)
243
233
  type_to_spans[_type] = [span]
244
- self._shadow_cache = string, byte_array
245
234
  else:
246
235
  # In SPAN_PARSER_TYPES, we can't pass the original byte_array to
247
236
  # parser to generate the shadow because it will replace the whole
@@ -259,7 +248,6 @@ class WikiText:
259
248
  byte_array[0] = 3
260
249
  byte_array[-1] = 32
261
250
  type_to_spans = parse_to_spans(byte_array)
262
- self._shadow_cache = string, byte_array
263
251
  type_to_spans[_type].insert(0, span)
264
252
  self._type_to_spans = type_to_spans
265
253
  if type(self) is Parameter:
@@ -443,10 +431,7 @@ class WikiText:
443
431
  @property
444
432
  def span(self) -> tuple:
445
433
  """Return the span of self relative to the start of the root node."""
446
- # In Python 3.7 and earlier, generalized iterable unpacking in yield
447
- # and return statements requires enclosing parentheses:
448
- # https://docs.python.org/3.8/whatsnew/3.8.html#other-language-changes
449
- return (*self._span_data[:2],) # noqa
434
+ return (*self._span_data[:2],)
450
435
 
451
436
  @property
452
437
  def string(self) -> str:
@@ -1072,12 +1057,11 @@ class WikiText:
1072
1057
  s = starts[1]
1073
1058
  append_bold_start(s)
1074
1059
  return b'_' * (s - starts[0]) + m.string[s : m.end()]
1075
- if n > 5: # more than 5 apostrophes -> hide the prior ones
1076
- odd_bold_italics ^= True
1077
- odd_italics ^= True
1078
- s = starts[-5]
1079
- return b'_' * (s - starts[0]) + m.string[s : m.end()]
1080
- raise # execution should never reach here
1060
+ # more than 5 apostrophes -> hide the prior ones
1061
+ odd_bold_italics ^= True
1062
+ odd_italics ^= True
1063
+ s = starts[-5]
1064
+ return b'_' * (s - starts[0]) + m.string[s : m.end()]
1081
1065
 
1082
1066
  return bytearray(b'\n').join(
1083
1067
  [
@@ -1206,7 +1190,7 @@ class WikiText:
1206
1190
  )
1207
1191
 
1208
1192
  @property
1209
- def _ext_link_shadow(self):
1193
+ def _ext_link_shadow(self) -> bytearray:
1210
1194
  """Replace the invalid chars of SPAN_PARSER_TYPES with b'_'.
1211
1195
 
1212
1196
  For comments, all characters are replaced, but for ('Template',
@@ -1216,12 +1200,14 @@ class WikiText:
1216
1200
  byte_array = bytearray(self._lststr[0][ss:se], 'ascii', 'replace')
1217
1201
  subspans = self._subspans
1218
1202
  for s, e, _, _ in subspans('Comment'):
1219
- byte_array[s:e] = (e - s) * b'_'
1203
+ byte_array[s - ss : e - ss] = (e - s) * b'_'
1220
1204
  for s, e, _, _ in subspans('WikiLink'):
1221
- byte_array[s:e] = (e - s) * b' '
1205
+ byte_array[s - ss : e - ss] = (e - s) * b' '
1222
1206
  for type_ in 'Template', 'ParserFunction', 'Parameter':
1223
1207
  for s, e, _, _ in subspans(type_):
1224
- byte_array[s:e] = INVALID_EXT_CHARS_SUB(b' ', byte_array[s:e])
1208
+ byte_array[s - ss : e - ss] = INVALID_EXT_CHARS_SUB(
1209
+ b' ', byte_array[s:e]
1210
+ )
1225
1211
  return byte_array
1226
1212
 
1227
1213
  @property