wikitextparser 0.56.3__tar.gz → 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. wikitextparser-1.0.0/PKG-INFO +413 -0
  2. {wikitextparser-0.56.3 → wikitextparser-1.0.0}/README.rst +20 -1
  3. {wikitextparser-0.56.3 → wikitextparser-1.0.0}/pyproject.toml +29 -13
  4. {wikitextparser-0.56.3 → wikitextparser-1.0.0}/wikitextparser/__init__.py +1 -1
  5. {wikitextparser-0.56.3 → wikitextparser-1.0.0}/wikitextparser/_argument.py +1 -1
  6. {wikitextparser-0.56.3 → wikitextparser-1.0.0}/wikitextparser/_cell.py +6 -6
  7. {wikitextparser-0.56.3 → wikitextparser-1.0.0}/wikitextparser/_config.py +1 -1
  8. {wikitextparser-0.56.3 → wikitextparser-1.0.0}/wikitextparser/_externallink.py +2 -2
  9. {wikitextparser-0.56.3 → wikitextparser-1.0.0}/wikitextparser/_section.py +3 -1
  10. {wikitextparser-0.56.3 → wikitextparser-1.0.0}/wikitextparser/_table.py +92 -32
  11. {wikitextparser-0.56.3 → wikitextparser-1.0.0}/wikitextparser/_template.py +13 -14
  12. {wikitextparser-0.56.3 → wikitextparser-1.0.0}/wikitextparser/_wikilist.py +4 -6
  13. {wikitextparser-0.56.3 → wikitextparser-1.0.0}/wikitextparser/_wikitext.py +18 -20
  14. {wikitextparser-0.56.3 → wikitextparser-1.0.0/wikitextparser/wikitextparser.egg-info}/PKG-INFO +396 -393
  15. wikitextparser-1.0.0/wikitextparser/wikitextparser.egg-info/SOURCES.txt +23 -0
  16. wikitextparser-1.0.0/wikitextparser/wikitextparser.egg-info/dependency_links.txt +1 -0
  17. wikitextparser-1.0.0/wikitextparser/wikitextparser.egg-info/requires.txt +10 -0
  18. wikitextparser-1.0.0/wikitextparser/wikitextparser.egg-info/top_level.txt +1 -0
  19. wikitextparser-1.0.0/wikitextparser/wikitextparser.egg-info/zip-safe +1 -0
  20. wikitextparser-0.56.3/.coveragerc +0 -4
  21. wikitextparser-0.56.3/.github/workflows/tests.yml +0 -42
  22. wikitextparser-0.56.3/.gitignore +0 -1
  23. wikitextparser-0.56.3/.readthedocs.yaml +0 -25
  24. wikitextparser-0.56.3/.vscode/settings.json +0 -7
  25. wikitextparser-0.56.3/CHANGELOG.rst +0 -553
  26. wikitextparser-0.56.3/LICENSE.md +0 -674
  27. wikitextparser-0.56.3/docs/CHANGELOG.rst +0 -3
  28. wikitextparser-0.56.3/docs/Makefile +0 -20
  29. wikitextparser-0.56.3/docs/README.rst +0 -5
  30. wikitextparser-0.56.3/docs/conf.py +0 -189
  31. wikitextparser-0.56.3/docs/index.rst +0 -152
  32. wikitextparser-0.56.3/docs/make.bat +0 -36
  33. {wikitextparser-0.56.3 → wikitextparser-1.0.0}/wikitextparser/_comment_bold_italic.py +0 -0
  34. {wikitextparser-0.56.3 → wikitextparser-1.0.0}/wikitextparser/_parameter.py +0 -0
  35. {wikitextparser-0.56.3 → wikitextparser-1.0.0}/wikitextparser/_parser_function.py +0 -0
  36. {wikitextparser-0.56.3 → wikitextparser-1.0.0}/wikitextparser/_spans.py +0 -0
  37. {wikitextparser-0.56.3 → wikitextparser-1.0.0}/wikitextparser/_tag.py +0 -0
  38. {wikitextparser-0.56.3 → wikitextparser-1.0.0}/wikitextparser/_wikilink.py +0 -0
@@ -0,0 +1,413 @@
1
+ Metadata-Version: 2.3
2
+ Name: wikitextparser
3
+ Version: 1.0.0
4
+ Summary: A simple parsing tool for MediaWiki's wikitext markup.
5
+ Keywords: MediaWiki,wikitext,parser
6
+ Author: 5j9
7
+ Author-email: 5j9 <5j9@users.noreply.github.com>
8
+ License: GNU General Public License v3 (GPLv3)
9
+ Classifier: Programming Language :: Python
10
+ Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
11
+ Classifier: Topic :: Text Processing
12
+ Requires-Dist: regex>=2022.9.11
13
+ Requires-Dist: wcwidth
14
+ Requires-Dist: coverage ; extra == 'dev'
15
+ Requires-Dist: pytest ; extra == 'tests'
16
+ Requires-Python: >=3.8
17
+ Project-URL: Homepage, https://github.com/5j9/wikitextparser
18
+ Provides-Extra: dev
19
+ Provides-Extra: tests
20
+ Description-Content-Type: text/x-rst
21
+
22
+ .. image:: https://github.com/5j9/wikitextparser/actions/workflows/tests.yml/badge.svg
23
+ :target: https://github.com/5j9/wikitextparser/actions/workflows/tests.yml
24
+ .. image:: https://codecov.io/github/5j9/wikitextparser/coverage.svg?branch=master
25
+ :target: https://codecov.io/github/5j9/wikitextparser
26
+ .. image:: https://readthedocs.org/projects/wikitextparser/badge/?version=latest
27
+ :target: http://wikitextparser.readthedocs.io/en/latest/?badge=latest
28
+
29
+ ==============
30
+ WikiTextParser
31
+ ==============
32
+ .. Quick Start Guid
33
+
34
+ A simple to use WikiText parsing library for `MediaWiki <https://www.mediawiki.org/wiki/MediaWiki>`_.
35
+
36
+ The purpose is to allow users easily extract and/or manipulate templates, template parameters, parser functions, tables, external links, wikilinks, lists, etc. found in wikitexts.
37
+
38
+ .. contents:: Table of Contents
39
+
40
+ Installation
41
+ ============
42
+
43
+ - Python 3.8+ is required
44
+ - ``pip install wikitextparser``
45
+
46
+ Usage
47
+ =====
48
+
49
+ .. code:: python
50
+
51
+ >>> import wikitextparser as wtp
52
+
53
+ WikiTextParser can detect sections, parser functions, templates, wiki links, external links, arguments, tables, wiki lists, and comments in your wikitext. The following sections are a quick overview of some of these functionalities.
54
+
55
+ You may also want to have a look at the test modules for more examples and probable pitfalls (expected failures).
56
+
57
+ Templates
58
+ ---------
59
+
60
+ .. code:: python
61
+
62
+ >>> parsed = wtp.parse("{{text|value1{{text|value2}}}}")
63
+ >>> parsed.templates
64
+ [Template('{{text|value1{{text|value2}}}}'), Template('{{text|value2}}')]
65
+ >>> parsed.templates[0].arguments
66
+ [Argument("|value1{{text|value2}}")]
67
+ >>> parsed.templates[0].arguments[0].value = 'value3'
68
+ >>> print(parsed)
69
+ {{text|value3}}
70
+
71
+ The ``pformat`` method returns a pretty-print formatted string for templates:
72
+
73
+ .. code:: python
74
+
75
+ >>> parsed = wtp.parse('{{t1 |b=b|c=c| d={{t2|e=e|f=f}} }}')
76
+ >>> t1, t2 = parsed.templates
77
+ >>> print(t2.pformat())
78
+ {{t2
79
+ | e = e
80
+ | f = f
81
+ }}
82
+ >>> print(t1.pformat())
83
+ {{t1
84
+ | b = b
85
+ | c = c
86
+ | d = {{t2
87
+ | e = e
88
+ | f = f
89
+ }}
90
+ }}
91
+
92
+ ``Template.rm_dup_args_safe`` and ``Template.rm_first_of_dup_args`` methods can be used to clean-up `pages using duplicate arguments in template calls <https://en.wikipedia.org/wiki/Category:Pages_using_duplicate_arguments_in_template_calls>`_:
93
+
94
+ .. code:: python
95
+
96
+ >>> t = wtp.Template('{{t|a=a|a=b|a=a}}')
97
+ >>> t.rm_dup_args_safe()
98
+ >>> t
99
+ Template('{{t|a=b|a=a}}')
100
+ >>> t = wtp.Template('{{t|a=a|a=b|a=a}}')
101
+ >>> t.rm_first_of_dup_args()
102
+ >>> t
103
+ Template('{{t|a=a}}')
104
+
105
+ Template parameters:
106
+
107
+ .. code:: python
108
+
109
+ >>> param = wtp.parse('{{{a|b}}}').parameters[0]
110
+ >>> param.name
111
+ 'a'
112
+ >>> param.default
113
+ 'b'
114
+ >>> param.default = 'c'
115
+ >>> param
116
+ Parameter('{{{a|c}}}')
117
+ >>> param.append_default('d')
118
+ >>> param
119
+ Parameter('{{{a|{{{d|c}}}}}}')
120
+
121
+
122
+ WikiLinks
123
+ ---------
124
+
125
+ .. code:: python
126
+
127
+ >>> wl = wtp.parse('... [[title#fragmet|text]] ...').wikilinks[0]
128
+ >>> wl.title = 'new_title'
129
+ >>> wl.fragment = 'new_fragmet'
130
+ >>> wl.text = 'X'
131
+ >>> wl
132
+ WikiLink('[[new_title#new_fragmet|X]]')
133
+ >>> del wl.text
134
+ >>> wl
135
+ WikiLink('[[new_title#new_fragmet]]')
136
+
137
+ All WikiLink properties support get, set, and delete operations. Categories are special cases of WikiLinks, in that they are prefixed with the category namespace, which is case insensitive and may be internationalized:
138
+
139
+ .. code:: python
140
+
141
+ >>> parsed = wtp.parse("""
142
+ [[Category:Foo]]
143
+ [[Κατηγορία:Bar]]
144
+ [[Other link]]
145
+ """)
146
+ >>> categories = [
147
+ wl
148
+ for wl
149
+ in parsed.wikilinks
150
+ if wl.title.partition(':')[0]
151
+ .strip()
152
+ .lower()
153
+ in ["category", "κατηγορία"]
154
+ ]
155
+ >>> categories
156
+ [WikiLink('[[Category:Foo]]'), WikiLink('[[Category:Bar]]')]
157
+
158
+ Sections
159
+ --------
160
+
161
+ .. code:: python
162
+
163
+ >>> parsed = wtp.parse("""
164
+ ... == h2 ==
165
+ ... t2
166
+ ... === h3 ===
167
+ ... t3
168
+ ... === h3 ===
169
+ ... t3
170
+ ... == h22 ==
171
+ ... t22
172
+ ... {{text|value3}}
173
+ ... [[Z|X]]
174
+ ... """)
175
+ >>> parsed.sections
176
+ [Section('\n'),
177
+ Section('== h2 ==\nt2\n=== h3 ===\nt3\n=== h3 ===\nt3\n'),
178
+ Section('=== h3 ===\nt3\n'),
179
+ Section('=== h3 ===\nt3\n'),
180
+ Section('== h22 ==\nt22\n{{text|value3}}\n[[Z|X]]\n')]
181
+ >>> parsed.sections[1].title = 'newtitle'
182
+ >>> print(parsed)
183
+
184
+ ==newtitle==
185
+ t2
186
+ === h3 ===
187
+ t3
188
+ === h3 ===
189
+ t3
190
+ == h22 ==
191
+ t22
192
+ {{text|value3}}
193
+ [[Z|X]]
194
+ >>> del parsed.sections[1].title
195
+ >>>> print(parsed)
196
+
197
+ t2
198
+ === h3 ===
199
+ t3
200
+ === h3 ===
201
+ t3
202
+ == h22 ==
203
+ t22
204
+ {{text|value3}}
205
+ [[Z|X]]
206
+
207
+ Tables
208
+ ------
209
+
210
+ Extracting cell values of a table:
211
+
212
+ .. code:: python
213
+
214
+ >>> p = wtp.parse("""{|
215
+ ... | Orange || Apple || more
216
+ ... |-
217
+ ... | Bread || Pie || more
218
+ ... |-
219
+ ... | Butter || Ice cream || and more
220
+ ... |}""")
221
+ >>> p.tables[0].data()
222
+ [['Orange', 'Apple', 'more'],
223
+ ['Bread', 'Pie', 'more'],
224
+ ['Butter', 'Ice cream', 'and more']]
225
+
226
+ By default, values are arranged according to ``colspan`` and ``rowspan`` attributes:
227
+
228
+ .. code:: python
229
+
230
+ >>> t = wtp.Table("""{| class="wikitable sortable"
231
+ ... |-
232
+ ... ! a !! b !! c
233
+ ... |-
234
+ ... !colspan = "2" | d || e
235
+ ... |-
236
+ ... |}""")
237
+ >>> t.data()
238
+ [['a', 'b', 'c'], ['d', 'd', 'e']]
239
+ >>> t.data(span=False)
240
+ [['a', 'b', 'c'], ['d', 'e']]
241
+
242
+ Calling the ``cells`` method of a ``Table`` returns table cells as ``Cell`` objects. Cell objects provide methods for getting or setting each cell's attributes or values individually:
243
+
244
+ .. code:: python
245
+
246
+ >>> cell = t.cells(row=1, column=1)
247
+ >>> cell.attrs
248
+ {'colspan': '2'}
249
+ >>> cell.set('colspan', '3')
250
+ >>> print(t)
251
+ {| class="wikitable sortable"
252
+ |-
253
+ ! a !! b !! c
254
+ |-
255
+ !colspan = "3" | d || e
256
+ |-
257
+ |}
258
+
259
+ HTML attributes of Table, Cell, and Tag objects are accessible via
260
+ ``get_attr``, ``set_attr``, ``has_attr``, and ``del_attr`` methods.
261
+
262
+ Lists
263
+ -----
264
+
265
+ The ``get_lists`` method provides access to lists within the wikitext.
266
+
267
+ .. code:: python
268
+
269
+ >>> parsed = wtp.parse(
270
+ ... 'text\n'
271
+ ... '* list item a\n'
272
+ ... '* list item b\n'
273
+ ... '** sub-list of b\n'
274
+ ... '* list item c\n'
275
+ ... '** sub-list of b\n'
276
+ ... 'text'
277
+ ... )
278
+ >>> wikilist = parsed.get_lists()[0]
279
+ >>> wikilist.items
280
+ [' list item a', ' list item b', ' list item c']
281
+
282
+ The ``sublists`` method can be used to get all sub-lists of the current list or just sub-lists of specific items:
283
+
284
+ .. code:: python
285
+
286
+ >>> wikilist.sublists()
287
+ [WikiList('** sub-list of b\n'), WikiList('** sub-list of b\n')]
288
+ >>> wikilist.sublists(1)[0].items
289
+ [' sub-list of b']
290
+
291
+ It also has an optional ``pattern`` argument that works similar to ``lists``, except that the current list pattern will be automatically added to it as a prefix:
292
+
293
+ .. code:: python
294
+
295
+ >>> wikilist = wtp.WikiList('#a\n#b\n##ba\n#*bb\n#:bc\n#c', '\#')
296
+ >>> wikilist.sublists()
297
+ [WikiList('##ba\n'), WikiList('#*bb\n'), WikiList('#:bc\n')]
298
+ >>> wikilist.sublists(pattern='\*')
299
+ [WikiList('#*bb\n')]
300
+
301
+
302
+ Convert one type of list to another using the convert method. Specifying the starting pattern of the desired lists can facilitate finding them and improves the performance:
303
+
304
+ .. code:: python
305
+
306
+ >>> wl = wtp.WikiList(
307
+ ... ':*A1\n:*#B1\n:*#B2\n:*:continuing A1\n:*A2',
308
+ ... pattern=':\*'
309
+ ... )
310
+ >>> print(wl)
311
+ :*A1
312
+ :*#B1
313
+ :*#B2
314
+ :*:continuing A1
315
+ :*A2
316
+ >>> wl.convert('#')
317
+ >>> print(wl)
318
+ #A1
319
+ ##B1
320
+ ##B2
321
+ #:continuing A1
322
+ #A2
323
+
324
+ Tags
325
+ ----
326
+
327
+ Accessing HTML tags:
328
+
329
+ .. code:: python
330
+
331
+ >>> p = wtp.parse('text<ref name="c">citation</ref>\n<references/>')
332
+ >>> ref, references = p.get_tags()
333
+ >>> ref.name = 'X'
334
+ >>> ref
335
+ Tag('<X name="c">citation</X>')
336
+ >>> references
337
+ Tag('<references/>')
338
+
339
+ WikiTextParser is able to handle common usages of HTML and extension tags. However it is not a fully-fledged HTML parser and may fail on edge cases or malformed HTML input. Please open an issue on github if you encounter bugs.
340
+
341
+ Miscellaneous
342
+ -------------
343
+ ``parent`` and ``ancestors`` methods can be used to access a node's parent or ancestors respectively:
344
+
345
+ .. code:: python
346
+
347
+ >>> template_d = parse("{{a|{{b|{{c|{{d}}}}}}}}").templates[3]
348
+ >>> template_d.ancestors()
349
+ [Template('{{c|{{d}}}}'),
350
+ Template('{{b|{{c|{{d}}}}}}'),
351
+ Template('{{a|{{b|{{c|{{d}}}}}}}}')]
352
+ >>> template_d.parent()
353
+ Template('{{c|{{d}}}}')
354
+ >>> _.parent()
355
+ Template('{{b|{{c|{{d}}}}}}')
356
+ >>> _.parent()
357
+ Template('{{a|{{b|{{c|{{d}}}}}}}}')
358
+ >>> _.parent() # Returns None
359
+
360
+ Use the optional ``type_`` argument if looking for ancestors of a specific type:
361
+
362
+ .. code:: python
363
+
364
+ >>> parsed = parse('{{a|{{#if:{{b{{c<!---->}}}}}}}}')
365
+ >>> comment = parsed.comments[0]
366
+ >>> comment.ancestors(type_='ParserFunction')
367
+ [ParserFunction('{{#if:{{b{{c<!---->}}}}}}')]
368
+
369
+
370
+ To delete/remove any object from its parents use ``del object[:]`` or ``del object.string``.
371
+
372
+ The ``remove_markup`` function or ``plain_text`` method can be used to remove wiki markup:
373
+
374
+ .. code:: python
375
+
376
+ >>> from wikitextparser import remove_markup, parse
377
+ >>> s = "'''a'''<!--comment--> [[b|c]] [[d]]"
378
+ >>> remove_markup(s)
379
+ 'a c d'
380
+ >>> parse(s).plain_text()
381
+ 'a c d'
382
+
383
+ Compared with mwparserfromhell
384
+ ==============================
385
+
386
+ `mwparserfromhell <https://github.com/earwig/mwparserfromhell>`_ is a mature and widely used library with nearly the same purposes as ``wikitextparser``. The main reason leading me to create ``wikitextparser`` was that ``mwparserfromhell`` could not parse wikitext in certain situations that I needed it for. See mwparserfromhell's issues `40 <https://github.com/earwig/mwparserfromhell/issues/40>`_, `42 <https://github.com/earwig/mwparserfromhell/issues/42>`_, `88 <https://github.com/earwig/mwparserfromhell/issues/88>`_, and other related issues. In many of those situation ``wikitextparser`` may be able to give you more acceptable results.
387
+
388
+ Also note that ``wikitextparser`` is still using 0.x.y version `meaning <https://semver.org/>`_ that the API is not stable and may change in the future versions.
389
+
390
+ The tokenizer in ``mwparserfromhell`` is written in C. Tokenization in ``wikitextparser`` is mostly done using the ``regex`` library which is also in C.
391
+ I have not rigorously compared the two libraries in terms of performance, i.e. execution time and memory usage. In my limited experience, ``wikitextparser`` has a decent performance in realistic cases and should be able to compete and may even have little performance benefits in some situations.
392
+
393
+ If you have had a chance to compare these libraries in terms of performance or capabilities please share your experience by opening an issue on github.
394
+
395
+ Some of the unique features of ``wikitextparser`` are: Providing access to individual cells of each table, pretty-printing templates, a WikiList class with rudimentary methods to work with `lists <https://www.mediawiki.org/wiki/Help:Lists>`_, and a few other functions.
396
+
397
+ Known issues and limitations
398
+ ============================
399
+
400
+ * The contents of templates/parameters are not known to offline parsers. For example an offline parser cannot know if the markup ``[[{{z|a}}]]`` should be treated as wikilink or not, it depends on the inner-workings of the ``{{z}}`` template. In these situations ``wikitextparser`` tries to use a best guess. ``[[{{z|a}}]]`` is treated as a wikilink (why else would anyone call a template inside wikilink markup, and even if it is not a wikilink, usually no harm is done).
401
+ * Localized namespace names are unknown, so for example ``[[File:...]]`` links are treated as normal wikilinks. ``mwparserfromhell`` has similar issue, see `#87 <https://github.com/earwig/mwparserfromhell/issues/87>`_ and `#136 <https://github.com/earwig/mwparserfromhell/issues/136>`_. As a workaround, `Pywikibot <https://www.mediawiki.org/wiki/Manual:Pywikibot>`_ can be used for determining the namespace.
402
+ * `Linktrails <https://www.mediawiki.org/wiki/Help:Links>`_ are language dependant and are not supported. `Also not supported by mwparserfromhell <https://github.com/earwig/mwparserfromhell/issues/82>`_. However given the trail pattern and knowing that ``wikilink.span[1]`` is the ending position of a wikilink, it is possible to compute a WikiLink's linktrail.
403
+ * Templates adjacent to external links are never considered part of the link. In reality, this depends on the contents of the template. Example: ``parse('http://example.com{{dead link}}').external_links[0].url == 'http://example.com'``
404
+ * List of valid `extension tags <https://www.mediawiki.org/wiki/Parser_extension_tags>`_ depends on the extensions intalled on the wiki. The ``tags`` method currently only supports the ones on English Wikipedia. A configuration option might be added in the future to address this issue.
405
+ * ``wikitextparser`` currently does not provide an `ast.walk <https://docs.python.org/3/library/ast.html#ast.walk>`_-like method yielding all descendant nodes.
406
+ * `Parser functions <https://www.mediawiki.org/wiki/Help:Extension:ParserFunctions>`_ and `magic words <https://www.mediawiki.org/wiki/Help:Magic_words>`_ are not evaluated.
407
+
408
+
409
+ Credits
410
+ =======
411
+ * `python <https://www.python.org/>`_
412
+ * `regex <https://github.com/mrabarnett/mrab-regex>`_
413
+ * `wcwidth <https://github.com/jquast/wcwidth>`_
@@ -113,7 +113,26 @@ WikiLinks
113
113
  >>> wl
114
114
  WikiLink('[[new_title#new_fragmet]]')
115
115
 
116
- All WikiLink properties support get, set, and delete operations.
116
+ All WikiLink properties support get, set, and delete operations. Categories are special cases of WikiLinks, in that they are prefixed with the category namespace, which is case insensitive and may be internationalized:
117
+
118
+ .. code:: python
119
+
120
+ >>> parsed = wtp.parse("""
121
+ [[Category:Foo]]
122
+ [[Κατηγορία:Bar]]
123
+ [[Other link]]
124
+ """)
125
+ >>> categories = [
126
+ wl
127
+ for wl
128
+ in parsed.wikilinks
129
+ if wl.title.partition(':')[0]
130
+ .strip()
131
+ .lower()
132
+ in ["category", "κατηγορία"]
133
+ ]
134
+ >>> categories
135
+ [WikiLink('[[Category:Foo]]'), WikiLink('[[Category:Bar]]')]
117
136
 
118
137
  Sections
119
138
  --------
@@ -1,6 +1,6 @@
1
1
  [build-system]
2
- requires = ['flit_core >=3.2,<4']
3
- build-backend = 'flit_core.buildapi'
2
+ requires = ['uv_build>=0.8.3,<0.9.0']
3
+ build-backend = 'uv_build'
4
4
 
5
5
  [project]
6
6
  name = "wikitextparser"
@@ -24,9 +24,7 @@ dependencies = [
24
24
  "regex >= 2022.9.11",
25
25
  "wcwidth",
26
26
  ]
27
- dynamic = [
28
- "version",
29
- ]
27
+ version = "1.0.0"
30
28
 
31
29
  [project.license]
32
30
  text = "GNU General Public License v3 (GPLv3)"
@@ -42,21 +40,24 @@ tests = [
42
40
  "pytest",
43
41
  ]
44
42
 
45
- [tool.flit.sdist]
46
- exclude = ['tests/', 'doc/', 'dev/']
47
-
48
43
  [tool.ruff]
49
44
  line-length = 79
50
45
  format.quote-style = 'single'
51
46
  lint.isort.combine-as-imports = true
52
47
  lint.extend-select = [
53
- 'FA', # flake8-future-annotations
54
- 'I', # isort
55
- 'UP', # pyupgrade
48
+ 'W605', # invalid-escape-sequence
49
+ 'FA', # flake8-future-annotations
50
+ 'I', # isort
51
+ 'UP', # pyupgrade
52
+ 'RUF', # Ruff-specific rules (RUF)
56
53
  ]
57
54
  lint.ignore = [
58
- 'UP027', # list comprehensions are faster than generator expressions
59
- 'E721', # Do not compare types, use `isinstance()`
55
+ 'E721', # Do not compare types, use `isinstance()`
56
+ 'RUF001', # ambiguous-unicode-character-string
57
+ 'RUF002', # ambiguous-unicode-character-docstring
58
+ 'RUF003', # ambiguous-unicode-character-comment
59
+ 'RUF012', # mutable-class-default
60
+ 'RUF059', # Unpacked variable never used
60
61
  ]
61
62
 
62
63
  [tool.pytest.ini_options]
@@ -70,3 +71,18 @@ reportUnnecessaryCast = "warning"
70
71
  reportUnnecessaryContains = "warning"
71
72
  reportUnnecessaryIsInstance = "warning"
72
73
  reportUnnecessaryTypeIgnoreComment = true
74
+ reportInvalidStringEscapeSequence = false
75
+ reportConstantRedefinition = 'error'
76
+ reportTypeCommentUsage = 'warning'
77
+ reportUnnecessaryComparison = 'warning'
78
+ venvPath = "."
79
+ venv = ".venv"
80
+
81
+ [tool.uv.build-backend]
82
+ module-root = ""
83
+ module-name = "wikitextparser"
84
+
85
+ [dependency-groups]
86
+ dev = [
87
+ "pytest>=8.3.5",
88
+ ]
@@ -1,5 +1,5 @@
1
1
  # Scheme: [N!]N(.N)*[{a|b|rc}N][.postN][.devN]
2
- __version__ = '0.56.3'
2
+ __version__ = '1.0.0'
3
3
 
4
4
  from . import _wikitext
5
5
  from ._argument import Argument # noqa: F401
@@ -24,7 +24,7 @@ class Argument(SubWikiText):
24
24
  See https://www.mediawiki.org/wiki/Help:Templates for more information.
25
25
  """
26
26
 
27
- __slots__ = '_shadow_match_cache', '_parent'
27
+ __slots__ = '_parent', '_shadow_match_cache'
28
28
 
29
29
  def __init__(
30
30
  self,
@@ -142,7 +142,7 @@ INLINE_NONHAEDER_CELL_MATCH = rc(
142
142
 
143
143
 
144
144
  class Cell(SubWikiTextWithAttrs):
145
- __slots__ = '_header', '_match_cache', '_attrs_match_cache'
145
+ __slots__ = '_attrs_match_cache', '_header', '_match_cache'
146
146
 
147
147
  def __init__(
148
148
  self,
@@ -189,15 +189,15 @@ class Cell(SubWikiTextWithAttrs):
189
189
  return cache_match # type: ignore
190
190
  shadow = self._shadow
191
191
  if shadow[0] == 10: # ord('\n')
192
- m = NEWLINE_CELL_MATCH(shadow)
192
+ m: Match[bytes] = NEWLINE_CELL_MATCH(shadow) # type: ignore
193
193
  self._header = m['sep'] == 33 # ord('!')
194
194
  elif self._header:
195
- m = INLINE_HAEDER_CELL_MATCH(shadow)
195
+ m = INLINE_HAEDER_CELL_MATCH(shadow) # type: ignore
196
196
  else:
197
- m = INLINE_NONHAEDER_CELL_MATCH(shadow)
197
+ m = INLINE_NONHAEDER_CELL_MATCH(shadow) # type: ignore
198
198
  self._match_cache = m, string
199
199
  self._attrs_match_cache = None, None
200
- return m # type: ignore
200
+ return m
201
201
 
202
202
  @property
203
203
  def value(self) -> str:
@@ -246,7 +246,7 @@ class Cell(SubWikiTextWithAttrs):
246
246
  attrs_start, attrs_end = cell_match.span('attrs')
247
247
  if attrs_start != -1:
248
248
  encoded_attr_name = attr_name.encode()
249
- attrs_m = ATTRS_MATCH(shadow, attrs_start, attrs_end)
249
+ attrs_m: Match[bytes] = ATTRS_MATCH(shadow, attrs_start, attrs_end) # type: ignore
250
250
  for i, n in enumerate(reversed(attrs_m.captures('attr_name'))):
251
251
  if n == encoded_attr_name:
252
252
  vs, ve = attrs_m.spans('attr_value')[-i - 1]
@@ -14,7 +14,7 @@ def _plant_trie(strings: _Iterable[str]) -> dict:
14
14
  for string in strings:
15
15
  d = trie
16
16
  for char in string:
17
- d[char] = char in d and d[char] or {}
17
+ d[char] = (char in d and d[char]) or {}
18
18
  d = d[char]
19
19
  d[''] = None # EOS
20
20
  return trie
@@ -17,7 +17,7 @@ class ExternalLink(SubWikiText):
17
17
  external links.
18
18
  """
19
19
  if self(0) == '[':
20
- return self(1, URL_MATCH(self._ext_link_shadow, 1).end())
20
+ return self(1, URL_MATCH(self._ext_link_shadow, 1).end()) # type: ignore
21
21
  return self.string
22
22
 
23
23
  @url.setter
@@ -38,7 +38,7 @@ class ExternalLink(SubWikiText):
38
38
  """
39
39
  string = self.string
40
40
  if string[0] == '[':
41
- url_end = URL_MATCH(self._ext_link_shadow, 1).end()
41
+ url_end = URL_MATCH(self._ext_link_shadow, 1).end() # type: ignore
42
42
  end_char = string[url_end]
43
43
  if end_char == ']':
44
44
  return None
@@ -1,5 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from regex import Match
4
+
3
5
  from ._wikitext import SubWikiText, rc
4
6
 
5
7
  HEADER_MATCH = rc(rb'(={1,6})([^\n]+?)\1[ \t]*(\n|\Z)').match
@@ -37,7 +39,7 @@ class Section(SubWikiText):
37
39
 
38
40
  @level.setter
39
41
  def level(self, value: int) -> None:
40
- m = self._header_match
42
+ m: Match[bytes] = self._header_match # type: ignore
41
43
  level_diff = len(m[1]) - value
42
44
  if level_diff == 0:
43
45
  return