zensical 0.0.9__cp310-abi3-musllinux_1_2_i686.whl → 0.0.12__cp310-abi3-musllinux_1_2_i686.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of zensical might be problematic. Click here for more details.
- zensical/__init__.py +4 -4
- zensical/bootstrap/.github/workflows/docs.yml +1 -0
- zensical/bootstrap/zensical.toml +3 -3
- zensical/config.py +130 -185
- zensical/extensions/emoji.py +20 -25
- zensical/extensions/links.py +19 -23
- zensical/extensions/preview.py +27 -39
- zensical/extensions/search.py +81 -81
- zensical/extensions/utilities/filter.py +3 -8
- zensical/main.py +33 -46
- zensical/markdown.py +18 -17
- zensical/templates/assets/javascripts/workers/{search.5e1f2129.min.js → search.5df7522c.min.js} +1 -1
- zensical/templates/assets/stylesheets/classic/main.6f483be1.min.css +1 -0
- zensical/templates/assets/stylesheets/modern/main.09f707be.min.css +1 -0
- zensical/templates/base.html +3 -3
- zensical/zensical.abi3.so +0 -0
- zensical/zensical.pyi +5 -11
- {zensical-0.0.9.dist-info → zensical-0.0.12.dist-info}/METADATA +6 -2
- {zensical-0.0.9.dist-info → zensical-0.0.12.dist-info}/RECORD +23 -23
- zensical.libs/libgcc_s-f5fcfe20.so.1 +0 -0
- zensical/templates/assets/stylesheets/classic/main.6eec86b3.min.css +0 -1
- zensical/templates/assets/stylesheets/modern/main.2644c6b7.min.css +0 -1
- zensical.libs/libgcc_s-27e5a392.so.1 +0 -0
- {zensical-0.0.9.dist-info → zensical-0.0.12.dist-info}/WHEEL +0 -0
- {zensical-0.0.9.dist-info → zensical-0.0.12.dist-info}/entry_points.txt +0 -0
- {zensical-0.0.9.dist-info → zensical-0.0.12.dist-info}/licenses/LICENSE.md +0 -0
zensical/extensions/emoji.py
CHANGED
|
@@ -26,21 +26,22 @@ from __future__ import annotations
|
|
|
26
26
|
import codecs
|
|
27
27
|
import functools
|
|
28
28
|
import os
|
|
29
|
-
|
|
30
29
|
from glob import iglob
|
|
31
|
-
from
|
|
32
|
-
from pymdownx import emoji, twemoji_db
|
|
30
|
+
from typing import TYPE_CHECKING
|
|
33
31
|
from xml.etree.ElementTree import Element
|
|
34
32
|
|
|
33
|
+
from pymdownx import emoji, twemoji_db
|
|
34
|
+
|
|
35
|
+
if TYPE_CHECKING:
|
|
36
|
+
from markdown import Markdown
|
|
37
|
+
|
|
35
38
|
# -----------------------------------------------------------------------------
|
|
36
39
|
# Functions
|
|
37
40
|
# -----------------------------------------------------------------------------
|
|
38
41
|
|
|
39
42
|
|
|
40
|
-
def twemoji(options:
|
|
41
|
-
"""
|
|
42
|
-
Create twemoji index.
|
|
43
|
-
"""
|
|
43
|
+
def twemoji(options: dict, md: Markdown) -> dict: # noqa: ARG001
|
|
44
|
+
"""Create twemoji index."""
|
|
44
45
|
paths = options.get("custom_icons", [])[:]
|
|
45
46
|
return _load_twemoji_index(tuple(paths))
|
|
46
47
|
|
|
@@ -53,14 +54,12 @@ def to_svg(
|
|
|
53
54
|
alt: str,
|
|
54
55
|
title: str,
|
|
55
56
|
category: str,
|
|
56
|
-
options:
|
|
57
|
+
options: dict,
|
|
57
58
|
md: Markdown,
|
|
58
|
-
):
|
|
59
|
-
"""
|
|
60
|
-
Load icon.
|
|
61
|
-
"""
|
|
59
|
+
) -> Element[str]:
|
|
60
|
+
"""Load icon."""
|
|
62
61
|
if not uc:
|
|
63
|
-
icons = md.inlinePatterns["emoji"].emoji_index["emoji"]
|
|
62
|
+
icons = md.inlinePatterns["emoji"].emoji_index["emoji"] # type: ignore[attr-defined]
|
|
64
63
|
|
|
65
64
|
# Create and return element to host icon
|
|
66
65
|
el = Element("span", {"class": options.get("classes", index)})
|
|
@@ -78,20 +77,16 @@ def to_svg(
|
|
|
78
77
|
# -----------------------------------------------------------------------------
|
|
79
78
|
|
|
80
79
|
|
|
81
|
-
@functools.
|
|
82
|
-
def _load(file: str):
|
|
83
|
-
"""
|
|
84
|
-
Load icon from file.
|
|
85
|
-
"""
|
|
80
|
+
@functools.cache
|
|
81
|
+
def _load(file: str) -> str:
|
|
82
|
+
"""Load icon from file."""
|
|
86
83
|
with codecs.open(file, encoding="utf-8") as f:
|
|
87
84
|
return f.read()
|
|
88
85
|
|
|
89
86
|
|
|
90
|
-
@functools.
|
|
91
|
-
def _load_twemoji_index(paths):
|
|
92
|
-
"""
|
|
93
|
-
Load twemoji index and add icons.
|
|
94
|
-
"""
|
|
87
|
+
@functools.cache
|
|
88
|
+
def _load_twemoji_index(paths: tuple[str, ...]) -> dict:
|
|
89
|
+
"""Load twemoji index and add icons."""
|
|
95
90
|
index = {
|
|
96
91
|
"name": "twemoji",
|
|
97
92
|
"emoji": twemoji_db.emoji,
|
|
@@ -106,8 +101,8 @@ def _load_twemoji_index(paths):
|
|
|
106
101
|
|
|
107
102
|
# Index icons provided by the theme and via custom icons
|
|
108
103
|
glob = os.path.join(base, "**", "*.svg")
|
|
109
|
-
|
|
110
|
-
for file in
|
|
104
|
+
svgs = iglob(os.path.normpath(glob), recursive=True)
|
|
105
|
+
for file in svgs:
|
|
111
106
|
icon = file[len(base) + 1 : -4].replace(os.path.sep, "-")
|
|
112
107
|
|
|
113
108
|
# Add icon to index
|
zensical/extensions/links.py
CHANGED
|
@@ -23,12 +23,17 @@
|
|
|
23
23
|
|
|
24
24
|
from __future__ import annotations
|
|
25
25
|
|
|
26
|
+
from pathlib import PurePosixPath
|
|
27
|
+
from typing import TYPE_CHECKING
|
|
28
|
+
from urllib.parse import urlparse
|
|
29
|
+
|
|
26
30
|
from markdown import Extension, Markdown
|
|
27
31
|
from markdown.treeprocessors import Treeprocessor
|
|
28
32
|
from markdown.util import AMP_SUBSTITUTE
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
from
|
|
33
|
+
|
|
34
|
+
if TYPE_CHECKING:
|
|
35
|
+
from xml.etree.ElementTree import Element
|
|
36
|
+
|
|
32
37
|
|
|
33
38
|
# -----------------------------------------------------------------------------
|
|
34
39
|
# Classes
|
|
@@ -36,8 +41,7 @@ from urllib.parse import urlparse
|
|
|
36
41
|
|
|
37
42
|
|
|
38
43
|
class LinksProcessor(Treeprocessor):
|
|
39
|
-
"""
|
|
40
|
-
Tree processor to replace links in Markdown with URLs.
|
|
44
|
+
"""Tree processor to replace links in Markdown with URLs.
|
|
41
45
|
|
|
42
46
|
Note that we view this as a bandaid until we can do processing on proper
|
|
43
47
|
HTML ASTs in Rust. In the meantime, we just replace them as we find them.
|
|
@@ -50,7 +54,7 @@ class LinksProcessor(Treeprocessor):
|
|
|
50
54
|
self.path = path # Current page
|
|
51
55
|
self.use_directory_urls = use_directory_urls
|
|
52
56
|
|
|
53
|
-
def run(self, root: Element):
|
|
57
|
+
def run(self, root: Element) -> None:
|
|
54
58
|
# Now, we determine whether the current page is an index page, as we
|
|
55
59
|
# must apply slightly different handling in case of directory URLs
|
|
56
60
|
current_is_index = get_name(self.path) in ("index.md", "README.md")
|
|
@@ -64,7 +68,7 @@ class LinksProcessor(Treeprocessor):
|
|
|
64
68
|
# Extract value - Python Markdown does some weird stuff where it
|
|
65
69
|
# replaces mailto: links with double encoded entities. MkDocs just
|
|
66
70
|
# skips if it detects that, so we do the same.
|
|
67
|
-
value = el.get(key)
|
|
71
|
+
value = el.get(key, "")
|
|
68
72
|
if AMP_SUBSTITUTE in value:
|
|
69
73
|
continue
|
|
70
74
|
|
|
@@ -101,21 +105,15 @@ class LinksProcessor(Treeprocessor):
|
|
|
101
105
|
|
|
102
106
|
|
|
103
107
|
class LinksExtension(Extension):
|
|
104
|
-
"""
|
|
105
|
-
A Markdown extension to resolve links to other Markdown files.
|
|
106
|
-
"""
|
|
108
|
+
"""A Markdown extension to resolve links to other Markdown files."""
|
|
107
109
|
|
|
108
110
|
def __init__(self, path: str, use_directory_urls: bool):
|
|
109
|
-
"""
|
|
110
|
-
Initialize the extension.
|
|
111
|
-
"""
|
|
111
|
+
"""Initialize the extension."""
|
|
112
112
|
self.path = path # Current page
|
|
113
113
|
self.use_directory_urls = use_directory_urls
|
|
114
114
|
|
|
115
|
-
def extendMarkdown(self, md: Markdown):
|
|
116
|
-
"""
|
|
117
|
-
Register Markdown extension.
|
|
118
|
-
"""
|
|
115
|
+
def extendMarkdown(self, md: Markdown) -> None: # noqa: N802
|
|
116
|
+
"""Register Markdown extension."""
|
|
119
117
|
md.registerExtension(self)
|
|
120
118
|
|
|
121
119
|
# Create and register treeprocessor - we use the same priority as the
|
|
@@ -123,7 +121,7 @@ class LinksExtension(Extension):
|
|
|
123
121
|
# after our treeprocessor, so we can check the original Markdown URIs
|
|
124
122
|
# before they are resolved to URLs.
|
|
125
123
|
processor = LinksProcessor(md, self.path, self.use_directory_urls)
|
|
126
|
-
md.treeprocessors.register(processor, "
|
|
124
|
+
md.treeprocessors.register(processor, "zrelpath", 0)
|
|
127
125
|
|
|
128
126
|
|
|
129
127
|
# -----------------------------------------------------------------------------
|
|
@@ -132,8 +130,6 @@ class LinksExtension(Extension):
|
|
|
132
130
|
|
|
133
131
|
|
|
134
132
|
def get_name(path: str) -> str:
|
|
135
|
-
"""
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
path = PurePosixPath(path)
|
|
139
|
-
return path.name
|
|
133
|
+
"""Get the name of a file from a given path."""
|
|
134
|
+
pure_path = PurePosixPath(path)
|
|
135
|
+
return pure_path.name
|
zensical/extensions/preview.py
CHANGED
|
@@ -24,14 +24,17 @@
|
|
|
24
24
|
from __future__ import annotations
|
|
25
25
|
|
|
26
26
|
import posixpath
|
|
27
|
+
from typing import TYPE_CHECKING, Any
|
|
28
|
+
from urllib.parse import urlparse
|
|
27
29
|
|
|
28
30
|
from markdown import Extension, Markdown
|
|
29
31
|
from markdown.treeprocessors import Treeprocessor
|
|
30
|
-
from urllib.parse import urlparse
|
|
31
|
-
from xml.etree.ElementTree import Element
|
|
32
32
|
|
|
33
|
-
from .links import LinksProcessor
|
|
34
|
-
from .utilities.filter import Filter
|
|
33
|
+
from zensical.extensions.links import LinksProcessor
|
|
34
|
+
from zensical.extensions.utilities.filter import Filter
|
|
35
|
+
|
|
36
|
+
if TYPE_CHECKING:
|
|
37
|
+
from xml.etree.ElementTree import Element
|
|
35
38
|
|
|
36
39
|
# -----------------------------------------------------------------------------
|
|
37
40
|
# Classes
|
|
@@ -39,25 +42,20 @@ from .utilities.filter import Filter
|
|
|
39
42
|
|
|
40
43
|
|
|
41
44
|
class PreviewProcessor(Treeprocessor):
|
|
42
|
-
"""
|
|
43
|
-
A Markdown treeprocessor to enable instant previews on links.
|
|
45
|
+
"""A Markdown treeprocessor to enable instant previews on links.
|
|
44
46
|
|
|
45
47
|
Note that this treeprocessor is dependent on the `links` treeprocessor
|
|
46
48
|
registered programmatically before rendering a page.
|
|
47
49
|
"""
|
|
48
50
|
|
|
49
51
|
def __init__(self, md: Markdown, config: dict):
|
|
50
|
-
"""
|
|
51
|
-
Initialize the treeprocessor.
|
|
52
|
-
"""
|
|
52
|
+
"""Initialize the treeprocessor."""
|
|
53
53
|
super().__init__(md)
|
|
54
54
|
self.config = config
|
|
55
55
|
|
|
56
|
-
def run(self, root: Element):
|
|
57
|
-
"""
|
|
58
|
-
|
|
59
|
-
"""
|
|
60
|
-
at = self.md.treeprocessors.get_index_for_name("relpath")
|
|
56
|
+
def run(self, root: Element) -> None:
|
|
57
|
+
"""Run the treeprocessor."""
|
|
58
|
+
at = self.md.treeprocessors.get_index_for_name("zrelpath")
|
|
61
59
|
|
|
62
60
|
# Hack: Python Markdown has no notion of where it is, i.e., which file
|
|
63
61
|
# is being processed. This seems to be a deliberate design decision, as
|
|
@@ -84,9 +82,10 @@ class PreviewProcessor(Treeprocessor):
|
|
|
84
82
|
# Walk through all configurations - @todo refactor so that we don't
|
|
85
83
|
# iterate multiple times over the same elements
|
|
86
84
|
for configuration in configurations:
|
|
87
|
-
if not configuration.get("sources")
|
|
88
|
-
|
|
89
|
-
|
|
85
|
+
if not configuration.get("sources") and not configuration.get(
|
|
86
|
+
"targets"
|
|
87
|
+
):
|
|
88
|
+
continue
|
|
90
89
|
|
|
91
90
|
# Skip if page should not be considered
|
|
92
91
|
filter = get_filter(configuration, "sources")
|
|
@@ -123,8 +122,7 @@ class PreviewProcessor(Treeprocessor):
|
|
|
123
122
|
|
|
124
123
|
|
|
125
124
|
class PreviewExtension(Extension):
|
|
126
|
-
"""
|
|
127
|
-
A Markdown extension to enable instant previews on links.
|
|
125
|
+
"""A Markdown extension to enable instant previews on links.
|
|
128
126
|
|
|
129
127
|
This extensions allows to automatically add the `data-preview` attribute to
|
|
130
128
|
internal links matching specific criteria, so Material for MkDocs renders a
|
|
@@ -132,10 +130,8 @@ class PreviewExtension(Extension):
|
|
|
132
130
|
add previews to links in a programmatic way.
|
|
133
131
|
"""
|
|
134
132
|
|
|
135
|
-
def __init__(self, *args, **kwargs):
|
|
136
|
-
"""
|
|
137
|
-
Initialize the extension.
|
|
138
|
-
"""
|
|
133
|
+
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
134
|
+
"""Initialize the extension."""
|
|
139
135
|
self.config = {
|
|
140
136
|
"configurations": [[], "Filter configurations"],
|
|
141
137
|
"sources": [{}, "Link sources"],
|
|
@@ -143,10 +139,8 @@ class PreviewExtension(Extension):
|
|
|
143
139
|
}
|
|
144
140
|
super().__init__(*args, **kwargs)
|
|
145
141
|
|
|
146
|
-
def extendMarkdown(self, md: Markdown):
|
|
147
|
-
"""
|
|
148
|
-
Register Markdown extension.
|
|
149
|
-
"""
|
|
142
|
+
def extendMarkdown(self, md: Markdown) -> None: # noqa: N802
|
|
143
|
+
"""Register Markdown extension."""
|
|
150
144
|
md.registerExtension(self)
|
|
151
145
|
|
|
152
146
|
# Create and register treeprocessor - we use the same priority as the
|
|
@@ -162,17 +156,13 @@ class PreviewExtension(Extension):
|
|
|
162
156
|
# -----------------------------------------------------------------------------
|
|
163
157
|
|
|
164
158
|
|
|
165
|
-
def get_filter(settings: dict, key: str):
|
|
166
|
-
"""
|
|
167
|
-
|
|
168
|
-
"""
|
|
169
|
-
return Filter(config=settings.get(key)) # type: ignore
|
|
159
|
+
def get_filter(settings: dict, key: str) -> Filter:
|
|
160
|
+
"""Get file filter from settings."""
|
|
161
|
+
return Filter(config=settings.get(key, {}))
|
|
170
162
|
|
|
171
163
|
|
|
172
164
|
def resolve(processor_path: str, url_path: str) -> str:
|
|
173
|
-
"""
|
|
174
|
-
Resolve a relative URL path against the processor path.
|
|
175
|
-
"""
|
|
165
|
+
"""Resolve a relative URL path against the processor path."""
|
|
176
166
|
# Remove the file name from the processor path to get the directory
|
|
177
167
|
base_path = posixpath.dirname(processor_path)
|
|
178
168
|
|
|
@@ -194,8 +184,6 @@ def resolve(processor_path: str, url_path: str) -> str:
|
|
|
194
184
|
return posixpath.join(*base_segments)
|
|
195
185
|
|
|
196
186
|
|
|
197
|
-
def makeExtension(**kwargs):
|
|
198
|
-
"""
|
|
199
|
-
Register Markdown extension.
|
|
200
|
-
"""
|
|
187
|
+
def makeExtension(**kwargs: Any) -> PreviewExtension: # noqa: N802
|
|
188
|
+
"""Register Markdown extension."""
|
|
201
189
|
return PreviewExtension(**kwargs)
|
zensical/extensions/search.py
CHANGED
|
@@ -23,8 +23,9 @@
|
|
|
23
23
|
|
|
24
24
|
from html import escape
|
|
25
25
|
from html.parser import HTMLParser
|
|
26
|
+
from typing import Any
|
|
26
27
|
|
|
27
|
-
from markdown import Extension
|
|
28
|
+
from markdown import Extension, Markdown
|
|
28
29
|
from markdown.postprocessors import Postprocessor
|
|
29
30
|
|
|
30
31
|
# -----------------------------------------------------------------------------
|
|
@@ -33,17 +34,14 @@ from markdown.postprocessors import Postprocessor
|
|
|
33
34
|
|
|
34
35
|
|
|
35
36
|
class SearchProcessor(Postprocessor):
|
|
36
|
-
"""
|
|
37
|
-
Post processor that extracts searchable content from the rendered HTML.
|
|
38
|
-
"""
|
|
37
|
+
"""Post processor that extracts searchable content from the rendered HTML."""
|
|
39
38
|
|
|
40
|
-
def __init__(self, md):
|
|
39
|
+
def __init__(self, md: Markdown) -> None:
|
|
41
40
|
super().__init__(md)
|
|
42
|
-
self.data = []
|
|
41
|
+
self.data: list[dict[str, Any]] = []
|
|
43
42
|
|
|
44
|
-
def run(self, html):
|
|
43
|
+
def run(self, html: str) -> str:
|
|
45
44
|
"""Process the rendered HTML and extract text length."""
|
|
46
|
-
|
|
47
45
|
# Divide page content into sections
|
|
48
46
|
parser = Parser()
|
|
49
47
|
parser.feed(html)
|
|
@@ -76,17 +74,17 @@ class SearchProcessor(Postprocessor):
|
|
|
76
74
|
class SearchExtension(Extension):
|
|
77
75
|
"""Markdown extension for search indexing."""
|
|
78
76
|
|
|
79
|
-
def __init__(self, **kwargs):
|
|
77
|
+
def __init__(self, **kwargs: Any) -> None:
|
|
80
78
|
self.config = {"keep": [set(), "Set of HTML tags to keep in output"]}
|
|
81
79
|
super().__init__(**kwargs)
|
|
82
80
|
|
|
83
|
-
def extendMarkdown(self, md):
|
|
81
|
+
def extendMarkdown(self, md: Markdown) -> None: # noqa: N802
|
|
84
82
|
"""Register the PostProcessor with Markdown."""
|
|
85
83
|
processor = SearchProcessor(md)
|
|
86
84
|
md.postprocessors.register(processor, "search", 0)
|
|
87
85
|
|
|
88
86
|
|
|
89
|
-
def makeExtension(**kwargs):
|
|
87
|
+
def makeExtension(**kwargs: Any) -> SearchExtension: # noqa: N802
|
|
90
88
|
"""Factory function for creating the extension."""
|
|
91
89
|
return SearchExtension(**kwargs)
|
|
92
90
|
|
|
@@ -96,13 +94,16 @@ def makeExtension(**kwargs):
|
|
|
96
94
|
|
|
97
95
|
# HTML element
|
|
98
96
|
class Element:
|
|
99
|
-
"""
|
|
97
|
+
"""HTML element.
|
|
98
|
+
|
|
100
99
|
An element with attributes, essentially a small wrapper object for the
|
|
101
100
|
parser to access attributes in other callbacks than handle_starttag.
|
|
102
101
|
"""
|
|
103
102
|
|
|
104
103
|
# Initialize HTML element
|
|
105
|
-
def __init__(
|
|
104
|
+
def __init__(
|
|
105
|
+
self, tag: str, attrs: dict[str, str | None] | None = None
|
|
106
|
+
) -> None:
|
|
106
107
|
self.tag = tag
|
|
107
108
|
self.attrs = attrs or {}
|
|
108
109
|
|
|
@@ -111,18 +112,17 @@ class Element:
|
|
|
111
112
|
return self.tag
|
|
112
113
|
|
|
113
114
|
# Support comparison (compare by tag only)
|
|
114
|
-
def __eq__(self, other):
|
|
115
|
-
if other
|
|
115
|
+
def __eq__(self, other: object) -> bool:
|
|
116
|
+
if isinstance(other, Element):
|
|
116
117
|
return self.tag == other.tag
|
|
117
|
-
|
|
118
|
-
return self.tag == other
|
|
118
|
+
return self.tag == other
|
|
119
119
|
|
|
120
120
|
# Support set operations
|
|
121
121
|
def __hash__(self):
|
|
122
122
|
return hash(self.tag)
|
|
123
123
|
|
|
124
124
|
# Check whether the element should be excluded
|
|
125
|
-
def is_excluded(self):
|
|
125
|
+
def is_excluded(self) -> bool:
|
|
126
126
|
return "data-search-exclude" in self.attrs
|
|
127
127
|
|
|
128
128
|
|
|
@@ -131,31 +131,31 @@ class Element:
|
|
|
131
131
|
|
|
132
132
|
# HTML section
|
|
133
133
|
class Section:
|
|
134
|
-
"""
|
|
134
|
+
"""HTML section.
|
|
135
|
+
|
|
135
136
|
A block of text with markup, preceded by a title (with markup), i.e., a
|
|
136
137
|
headline with a certain level (h1-h6). Internally used by the parser.
|
|
137
138
|
"""
|
|
138
139
|
|
|
139
140
|
# Initialize HTML section
|
|
140
|
-
def __init__(self, el, level, depth=0):
|
|
141
|
+
def __init__(self, el: Element, level: int, depth: int = 0) -> None:
|
|
141
142
|
self.el = el
|
|
142
|
-
self.depth = depth
|
|
143
|
+
self.depth: int | float = depth
|
|
143
144
|
self.level = level
|
|
144
145
|
|
|
145
146
|
# Initialize section data
|
|
146
|
-
self.text = []
|
|
147
|
-
self.title = []
|
|
148
|
-
self.id = None
|
|
147
|
+
self.text: list[str] = []
|
|
148
|
+
self.title: list[str] = []
|
|
149
|
+
self.id: str | None = None
|
|
149
150
|
|
|
150
151
|
# String representation
|
|
151
152
|
def __repr__(self):
|
|
152
153
|
if self.id:
|
|
153
|
-
return "
|
|
154
|
-
|
|
155
|
-
return self.el.tag
|
|
154
|
+
return f"{self.el.tag}#{self.id}"
|
|
155
|
+
return self.el.tag
|
|
156
156
|
|
|
157
157
|
# Check whether the section should be excluded
|
|
158
|
-
def is_excluded(self):
|
|
158
|
+
def is_excluded(self) -> bool:
|
|
159
159
|
return self.el.is_excluded()
|
|
160
160
|
|
|
161
161
|
|
|
@@ -164,7 +164,8 @@ class Section:
|
|
|
164
164
|
|
|
165
165
|
# HTML parser
|
|
166
166
|
class Parser(HTMLParser):
|
|
167
|
-
"""
|
|
167
|
+
"""Section divider.
|
|
168
|
+
|
|
168
169
|
This parser divides the given string of HTML into a list of sections, each
|
|
169
170
|
of which are preceded by a h1-h6 level heading. A white- and blacklist of
|
|
170
171
|
tags dictates which tags should be preserved as part of the index, and
|
|
@@ -172,31 +173,31 @@ class Parser(HTMLParser):
|
|
|
172
173
|
"""
|
|
173
174
|
|
|
174
175
|
# Initialize HTML parser
|
|
175
|
-
def __init__(self, *args, **kwargs):
|
|
176
|
+
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
176
177
|
super().__init__(*args, **kwargs)
|
|
177
178
|
|
|
178
179
|
# Tags to skip
|
|
179
|
-
self.skip =
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
]
|
|
185
|
-
)
|
|
180
|
+
self.skip: set[str | Element] = {
|
|
181
|
+
"object", # Objects
|
|
182
|
+
"script", # Scripts
|
|
183
|
+
"style", # Styles
|
|
184
|
+
}
|
|
186
185
|
|
|
187
186
|
# Current context and section
|
|
188
|
-
self.context = []
|
|
189
|
-
self.section = None
|
|
187
|
+
self.context: list[Element] = []
|
|
188
|
+
self.section: Section | None = None
|
|
190
189
|
|
|
191
190
|
# All parsed sections
|
|
192
|
-
self.data = []
|
|
191
|
+
self.data: list[Section] = []
|
|
193
192
|
|
|
194
193
|
# Called at the start of every HTML tag
|
|
195
|
-
def handle_starttag(
|
|
196
|
-
attrs
|
|
194
|
+
def handle_starttag(
|
|
195
|
+
self, tag: str, attrs: list[tuple[str, str | None]]
|
|
196
|
+
) -> None:
|
|
197
|
+
attrs_dict = dict(attrs)
|
|
197
198
|
|
|
198
199
|
# Ignore self-closing tags
|
|
199
|
-
el = Element(tag,
|
|
200
|
+
el = Element(tag, attrs_dict)
|
|
200
201
|
if tag not in void:
|
|
201
202
|
self.context.append(el)
|
|
202
203
|
else:
|
|
@@ -205,7 +206,7 @@ class Parser(HTMLParser):
|
|
|
205
206
|
# Handle heading
|
|
206
207
|
if tag in ([f"h{x}" for x in range(1, 7)]):
|
|
207
208
|
depth = len(self.context)
|
|
208
|
-
if "id" in
|
|
209
|
+
if "id" in attrs_dict:
|
|
209
210
|
# Ensure top-level section
|
|
210
211
|
if tag != "h1" and not self.data:
|
|
211
212
|
self.section = Section(Element("hx"), 1, depth)
|
|
@@ -214,7 +215,7 @@ class Parser(HTMLParser):
|
|
|
214
215
|
# Set identifier, if not first section
|
|
215
216
|
self.section = Section(el, int(tag[1:2]), depth)
|
|
216
217
|
if self.data:
|
|
217
|
-
self.section.id =
|
|
218
|
+
self.section.id = attrs_dict["id"]
|
|
218
219
|
|
|
219
220
|
# Append section to list
|
|
220
221
|
self.data.append(self.section)
|
|
@@ -225,7 +226,7 @@ class Parser(HTMLParser):
|
|
|
225
226
|
self.data.append(self.section)
|
|
226
227
|
|
|
227
228
|
# Handle special cases to skip
|
|
228
|
-
for key, value in
|
|
229
|
+
for key, value in attrs_dict.items():
|
|
229
230
|
# Skip block if explicitly excluded from search
|
|
230
231
|
if key == "data-search-exclude":
|
|
231
232
|
self.skip.add(el)
|
|
@@ -247,7 +248,7 @@ class Parser(HTMLParser):
|
|
|
247
248
|
data.append(f"<{tag}>")
|
|
248
249
|
|
|
249
250
|
# Called at the end of every HTML tag
|
|
250
|
-
def handle_endtag(self, tag):
|
|
251
|
+
def handle_endtag(self, tag: str) -> None:
|
|
251
252
|
if not self.context or self.context[-1] != tag:
|
|
252
253
|
return
|
|
253
254
|
|
|
@@ -255,6 +256,7 @@ class Parser(HTMLParser):
|
|
|
255
256
|
# a headline is nested in another element. In that case, we close the
|
|
256
257
|
# current section, continuing to append data to the previous section,
|
|
257
258
|
# which could also be a nested section – see https://bit.ly/3IxxIJZ
|
|
259
|
+
assert self.section is not None # noqa: S101
|
|
258
260
|
if self.section.depth > len(self.context):
|
|
259
261
|
for section in reversed(self.data):
|
|
260
262
|
if section.depth <= len(self.context):
|
|
@@ -295,7 +297,7 @@ class Parser(HTMLParser):
|
|
|
295
297
|
data.append(f"</{tag}>")
|
|
296
298
|
|
|
297
299
|
# Called for the text contents of each tag
|
|
298
|
-
def handle_data(self, data):
|
|
300
|
+
def handle_data(self, data: str) -> None:
|
|
299
301
|
if self.skip.intersection(self.context):
|
|
300
302
|
return
|
|
301
303
|
|
|
@@ -324,9 +326,11 @@ class Parser(HTMLParser):
|
|
|
324
326
|
|
|
325
327
|
# Collapse adjacent whitespace
|
|
326
328
|
elif data.isspace():
|
|
327
|
-
if
|
|
328
|
-
self.section.text
|
|
329
|
-
|
|
329
|
+
if (
|
|
330
|
+
not self.section.text
|
|
331
|
+
or not self.section.text[-1].isspace()
|
|
332
|
+
or "pre" in self.context
|
|
333
|
+
):
|
|
330
334
|
self.section.text.append(data)
|
|
331
335
|
|
|
332
336
|
# Handle everything else
|
|
@@ -339,35 +343,31 @@ class Parser(HTMLParser):
|
|
|
339
343
|
# -----------------------------------------------------------------------------
|
|
340
344
|
|
|
341
345
|
# Tags to keep
|
|
342
|
-
keep =
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
]
|
|
353
|
-
)
|
|
346
|
+
keep = {
|
|
347
|
+
"p",
|
|
348
|
+
"code",
|
|
349
|
+
"pre",
|
|
350
|
+
"li",
|
|
351
|
+
"ol",
|
|
352
|
+
"ul",
|
|
353
|
+
"sub",
|
|
354
|
+
"sup",
|
|
355
|
+
}
|
|
354
356
|
|
|
355
357
|
# Tags that are self-closing
|
|
356
|
-
void =
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
]
|
|
373
|
-
)
|
|
358
|
+
void = {
|
|
359
|
+
"area",
|
|
360
|
+
"base",
|
|
361
|
+
"br",
|
|
362
|
+
"col",
|
|
363
|
+
"embed",
|
|
364
|
+
"hr",
|
|
365
|
+
"img",
|
|
366
|
+
"input",
|
|
367
|
+
"link",
|
|
368
|
+
"meta",
|
|
369
|
+
"param",
|
|
370
|
+
"source",
|
|
371
|
+
"track",
|
|
372
|
+
"wbr",
|
|
373
|
+
}
|
|
@@ -31,13 +31,10 @@ from fnmatch import fnmatch
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
class Filter:
|
|
34
|
-
"""
|
|
35
|
-
A filter.
|
|
36
|
-
"""
|
|
34
|
+
"""A filter."""
|
|
37
35
|
|
|
38
36
|
def __init__(self, config: dict):
|
|
39
|
-
"""
|
|
40
|
-
Initialize the filter.
|
|
37
|
+
"""Initialize the filter.
|
|
41
38
|
|
|
42
39
|
Arguments:
|
|
43
40
|
config: The filter configuration.
|
|
@@ -45,8 +42,7 @@ class Filter:
|
|
|
45
42
|
self.config = config
|
|
46
43
|
|
|
47
44
|
def __call__(self, value: str) -> bool:
|
|
48
|
-
"""
|
|
49
|
-
Filter a value.
|
|
45
|
+
"""Filter a value.
|
|
50
46
|
|
|
51
47
|
First, the inclusion patterns are checked. Regardless of whether they
|
|
52
48
|
are present, the exclusion patterns are checked afterwards. This allows
|
|
@@ -59,7 +55,6 @@ class Filter:
|
|
|
59
55
|
Returns:
|
|
60
56
|
Whether the value should be included.
|
|
61
57
|
"""
|
|
62
|
-
|
|
63
58
|
# Check if value matches one of the inclusion patterns
|
|
64
59
|
if "include" in self.config:
|
|
65
60
|
for pattern in self.config["include"]:
|