webwidgets 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,123 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
12
+
13
+ name: "CD: Publish to PyPI (or TestPyPI)"
14
+
15
+ on:
16
+ push:
17
+ tags:
18
+ - "[0-9]+.[0-9]+.[0-9]+"
19
+ - "[0-9]+.[0-9]+.[0-9]+.dev[0-9]+"
20
+ - "[0-9]+.[0-9]+.[0-9]+[ab][0-9]+"
21
+ - "[0-9]+.[0-9]+.[0-9]+[ab][0-9]+.dev[0-9]+"
22
+ - "[0-9]+.[0-9]+.[0-9].dev[0-9]+"
23
+ - "[0-9]+.[0-9]+.[0-9]+rc[0-9]+"
24
+ - "[0-9]+.[0-9]+.[0-9]+rc[0-9]+.dev[0-9]+"
25
+ - "[0-9]+.[0-9]+.[0-9]+post[0-9]+"
26
+ - "[0-9]+.[0-9]+.[0-9]+post[0-9]+.dev[0-9]+"
27
+ - "testpypi/[0-9]+.[0-9]+.[0-9]+"
28
+ - "testpypi/[0-9]+.[0-9]+.[0-9]+.dev[0-9]+"
29
+ - "testpypi/[0-9]+.[0-9]+.[0-9]+[ab][0-9]+"
30
+ - "testpypi/[0-9]+.[0-9]+.[0-9]+[ab][0-9]+.dev[0-9]+"
31
+ - "testpypi/[0-9]+.[0-9]+.[0-9].dev[0-9]+"
32
+ - "testpypi/[0-9]+.[0-9]+.[0-9]+rc[0-9]+"
33
+ - "testpypi/[0-9]+.[0-9]+.[0-9]+rc[0-9]+.dev[0-9]+"
34
+ - "testpypi/[0-9]+.[0-9]+.[0-9]+post[0-9]+"
35
+ - "testpypi/[0-9]+.[0-9]+.[0-9]+post[0-9]+.dev[0-9]+"
36
+
37
+ jobs:
38
+ ensure-main:
39
+ if: github.event.base_ref == 'refs/heads/main'
40
+ name: Ensure tag was pushed to main
41
+ runs-on: ubuntu-latest
42
+ steps:
43
+ - name: Log that tag was pushed to main
44
+ run: echo "Tag was pushed to main branch. Starting CD workflow."
45
+
46
+ build:
47
+ name: Build package
48
+ needs: ensure-main
49
+ runs-on: ubuntu-latest
50
+ steps:
51
+ - uses: actions/checkout@v4
52
+ with:
53
+ persist-credentials: false
54
+ - name: Set up Python
55
+ uses: actions/setup-python@v5
56
+ with:
57
+ python-version: "3.x"
58
+ - name: Install pypa/build
59
+ run: >-
60
+ python3 -m
61
+ pip install
62
+ build
63
+ --user
64
+ - name: Install pypa/hatch
65
+ run: python3 -m pip install hatch
66
+ - name: Set version with hatch
67
+ run: |
68
+ # Using variable instead of GitHub-specific contexts
69
+ TAG=$(git describe --tags --abbrev=0)
70
+ echo "Tag is: $TAG"
71
+ # Removing testpypi/ from tag before setting version
72
+ VERSION=$(echo $TAG | awk '{gsub(/testpypi\//,"")}1')
73
+ echo "Setting version from tag: $VERSION"
74
+ hatch version $VERSION
75
+ - name: Build a binary wheel and a source tarball
76
+ run: python3 -m build
77
+ - name: Store the distribution packages
78
+ uses: actions/upload-artifact@v4
79
+ with:
80
+ name: python-package-distributions
81
+ path: dist/
82
+
83
+ publish-to-pypi:
84
+ if: ${{ !startsWith(github.ref_name, 'testpypi/') }}
85
+ name: Publish to PyPI
86
+ needs:
87
+ - build
88
+ runs-on: ubuntu-latest
89
+ environment:
90
+ name: pypi
91
+ url: https://pypi.org/p/webwidgets
92
+ permissions:
93
+ id-token: write # IMPORTANT: mandatory for trusted publishing
94
+ steps:
95
+ - name: Download all the dists
96
+ uses: actions/download-artifact@v4
97
+ with:
98
+ name: python-package-distributions
99
+ path: dist/
100
+ - name: Publish package to PyPI
101
+ uses: pypa/gh-action-pypi-publish@release/v1
102
+
103
+ publish-to-testpypi:
104
+ if: startsWith(github.ref_name, 'testpypi/')
105
+ name: Publish to TestPyPI
106
+ needs:
107
+ - build
108
+ runs-on: ubuntu-latest
109
+ environment:
110
+ name: testpypi
111
+ url: https://test.pypi.org/p/webwidgets
112
+ permissions:
113
+ id-token: write # IMPORTANT: mandatory for trusted publishing
114
+ steps:
115
+ - name: Download all the dists
116
+ uses: actions/download-artifact@v4
117
+ with:
118
+ name: python-package-distributions
119
+ path: dist/
120
+ - name: Publish package to TestPyPI
121
+ uses: pypa/gh-action-pypi-publish@release/v1
122
+ with:
123
+ repository-url: https://test.pypi.org/legacy/
@@ -0,0 +1,80 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
12
+
13
+ name: "Full CI: Python 3.9-13 on all OSes"
14
+
15
+ on:
16
+ push:
17
+ branches: main
18
+ pull_request:
19
+ branches: main
20
+
21
+ permissions:
22
+ contents: read
23
+
24
+ jobs:
25
+ lint_source:
26
+ strategy:
27
+ matrix:
28
+ python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
29
+ os: [ubuntu-latest, windows-latest, macos-latest]
30
+ name: Lint source on ${{ matrix.os }} Python ${{ matrix.python-version }}
31
+ runs-on: ${{ matrix.os }}
32
+ steps:
33
+ - name: Checkout repository
34
+ uses: actions/checkout@v4
35
+ - name: Set up Python ${{ matrix.python-version }}
36
+ uses: actions/setup-python@v3
37
+ with:
38
+ python-version: ${{ matrix.python-version }}
39
+ - name: Install flake8
40
+ run: |
41
+ python -c "import platform; print('OS', platform.system())"
42
+ python -c "import sys; print('Python version', sys.version)"
43
+ python -m pip install --upgrade pip
44
+ pip install flake8
45
+ - name: Lint with flake8
46
+ run: |
47
+ # stop the build if there are Python syntax errors or undefined names
48
+ flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
49
+ # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
50
+ flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
51
+
52
+ test_build:
53
+ needs: lint_source
54
+ strategy:
55
+ matrix:
56
+ python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
57
+ os: [ubuntu-latest, windows-latest, macos-latest]
58
+ name: Test build on ${{ matrix.os }} Python ${{ matrix.python-version }}
59
+ runs-on: ${{ matrix.os }}
60
+ steps:
61
+ - name: Checkout repository
62
+ uses: actions/checkout@v4
63
+ - name: Set up Python ${{ matrix.python-version }}
64
+ uses: actions/setup-python@v3
65
+ with:
66
+ python-version: ${{ matrix.python-version }}
67
+ - name: Install pytest
68
+ run: |
69
+ python -c "import platform; print('OS', platform.system())"
70
+ python -c "import sys; print('Python version', sys.version)"
71
+ python -m pip install --upgrade pip
72
+ pip install pytest
73
+ - name: Build and install
74
+ run: |
75
+ pip install .
76
+ # Removing webwidgets directory so imports come from build
77
+ rm -r webwidgets
78
+ - name: Test with pytest
79
+ run: |
80
+ pytest tests
@@ -0,0 +1,78 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
12
+
13
+ name: "Quick CI: Python 3.9-11 on Ubuntu"
14
+
15
+ on:
16
+ push:
17
+ branches: '*'
18
+
19
+ permissions:
20
+ contents: read
21
+
22
+ jobs:
23
+ lint_source:
24
+ strategy:
25
+ matrix:
26
+ python-version: ["3.9", "3.10", "3.11"]
27
+ name: Lint source on Python ${{ matrix.python-version }}
28
+ runs-on: ubuntu-latest
29
+ steps:
30
+ - name: Checkout repository
31
+ uses: actions/checkout@v4
32
+ - name: Set up Python ${{ matrix.python-version }}
33
+ uses: actions/setup-python@v3
34
+ with:
35
+ python-version: ${{ matrix.python-version }}
36
+ - name: Install flake8
37
+ run: |
38
+ python -c "import sys; print('Python version', sys.version)"
39
+ python -m pip install --upgrade pip
40
+ pip install flake8
41
+ - name: Lint with flake8
42
+ run: |
43
+ # stop the build if there are Python syntax errors or undefined names
44
+ flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
45
+ # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
46
+ flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
47
+
48
+ test_build:
49
+ needs: lint_source
50
+ strategy:
51
+ matrix:
52
+ python-version: ["3.9", "3.10", "3.11"]
53
+ name: Test build on Python ${{ matrix.python-version }}
54
+ runs-on: ubuntu-latest
55
+ steps:
56
+ - name: Checkout repository
57
+ uses: actions/checkout@v4
58
+ - name: Set up Python ${{ matrix.python-version }}
59
+ uses: actions/setup-python@v3
60
+ with:
61
+ python-version: ${{ matrix.python-version }}
62
+ - name: Install pytest
63
+ run: |
64
+ python -c "import sys; print('Python version', sys.version)"
65
+ python -m pip install --upgrade pip
66
+ pip install pytest
67
+ - name: Build and install
68
+ run: |
69
+ echo "Current directory:"
70
+ ls -la
71
+ pip install .
72
+ # Removing webwidgets directory so imports come from build
73
+ rm -r webwidgets
74
+ echo "Removed webwidgets directory. New content:"
75
+ ls -la
76
+ - name: Test with pytest
77
+ run: |
78
+ pytest tests
@@ -0,0 +1 @@
1
+ __pycache__
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 mlaasri
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,18 @@
1
+ Metadata-Version: 2.4
2
+ Name: webwidgets
3
+ Version: 0.1.0
4
+ Summary: A Python package for designing web UIs.
5
+ Project-URL: Source code, https://github.com/mlaasri/WebWidgets
6
+ Author: mlaasri
7
+ License-File: LICENSE
8
+ Keywords: design,webui
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Programming Language :: Python :: 3
11
+ Requires-Python: >=3.9
12
+ Description-Content-Type: text/markdown
13
+
14
+ # WebWidgets
15
+
16
+ ![CI Status](https://img.shields.io/github/actions/workflow/status/mlaasri/WebWidgets/ci-full.yml?branch=main)
17
+
18
+ A Python package for creating web UIs
@@ -0,0 +1,5 @@
1
+ # WebWidgets
2
+
3
+ ![CI Status](https://img.shields.io/github/actions/workflow/status/mlaasri/WebWidgets/ci-full.yml?branch=main)
4
+
5
+ A Python package for creating web UIs
@@ -0,0 +1,28 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "webwidgets"
7
+ dynamic = ["version"]
8
+ description = "A Python package for designing web UIs."
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license-files = { paths = ["LICENSE"] }
12
+ authors = [
13
+ { name="mlaasri" }
14
+ ]
15
+ keywords = ["webui", "design"]
16
+ classifiers = [
17
+ "Programming Language :: Python :: 3",
18
+ "Operating System :: OS Independent",
19
+ ]
20
+
21
+ [project.urls]
22
+ "Source code" = "https://github.com/mlaasri/WebWidgets"
23
+
24
+ [tool.hatch.version]
25
+ path = "webwidgets/__init__.py"
26
+
27
+ [tool.hatch.build]
28
+ directory = "dist"
@@ -0,0 +1,11 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
@@ -0,0 +1,11 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
@@ -0,0 +1,334 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
12
+
13
+ import pytest
14
+ from webwidgets.compilation.html.html_node import HTMLNode, no_start_tag, no_end_tag, RawText
15
+
16
+
17
+ class TestHTMLNode:
18
+ class CustomNode(HTMLNode):
19
+ pass
20
+
21
+ @no_start_tag
22
+ class NoStartNode(HTMLNode):
23
+ pass
24
+
25
+ @no_end_tag
26
+ class NoEndNode(HTMLNode):
27
+ pass
28
+
29
+ @no_start_tag
30
+ @no_end_tag
31
+ class NoStartEndNode(HTMLNode):
32
+ pass
33
+
34
+ class OneLineNode(HTMLNode):
35
+ one_line = True
36
+
37
+ class OneLineNoStartNode(NoStartNode):
38
+ one_line = True
39
+
40
+ class KwargsReceiverNode(HTMLNode):
41
+ def to_html(self, return_lines: bool, message: str,
42
+ **kwargs):
43
+ if return_lines:
44
+ return [message]
45
+ return message
46
+
47
+ def test_basic_node(self):
48
+ node = HTMLNode()
49
+ assert node.start_tag == "<htmlnode>"
50
+ assert node.end_tag == "</htmlnode>"
51
+ assert node.to_html() == "<htmlnode></htmlnode>"
52
+
53
+ def test_custom_name(self):
54
+ node = TestHTMLNode.CustomNode()
55
+ assert node.start_tag == "<customnode>"
56
+ assert node.end_tag == "</customnode>"
57
+ assert node.to_html() == "<customnode></customnode>"
58
+
59
+ def test_attributes(self):
60
+ node = HTMLNode(attributes={'id': 'test-id', 'class': 'test-class'})
61
+ assert node.start_tag == '<htmlnode id="test-id" class="test-class">'
62
+ assert node.end_tag == '</htmlnode>'
63
+ assert node.to_html() == '<htmlnode id="test-id" class="test-class"></htmlnode>'
64
+
65
+ def test_no_start_tag(self):
66
+ node = TestHTMLNode.NoStartNode()
67
+ assert node.start_tag == ''
68
+ assert node.end_tag == '</nostartnode>'
69
+ assert node.to_html() == "</nostartnode>"
70
+
71
+ def test_no_end_tag(self):
72
+ node = TestHTMLNode.NoEndNode()
73
+ assert node.start_tag == '<noendnode>'
74
+ assert node.end_tag == ''
75
+ assert node.to_html() == "<noendnode>"
76
+
77
+ def test_no_start_end_tag(self):
78
+ node = TestHTMLNode.NoStartEndNode()
79
+ assert node.start_tag == ''
80
+ assert node.end_tag == ''
81
+ assert node.to_html() == ""
82
+
83
+ def test_one_line_rendering(self):
84
+ node = HTMLNode(children=[RawText('child1'),
85
+ RawText('child2')])
86
+ expected_html = "<htmlnode>child1child2</htmlnode>"
87
+ assert node.to_html(force_one_line=True) == expected_html
88
+
89
+ def test_no_start_tag_with_one_line(self):
90
+ node = TestHTMLNode.NoStartNode(children=[RawText('child1'),
91
+ RawText('child2')])
92
+ expected_html = "child1child2</nostartnode>"
93
+ assert node.to_html(force_one_line=True) == expected_html
94
+
95
+ def test_no_end_tag_with_one_line(self):
96
+ node = TestHTMLNode.NoEndNode(children=[RawText('child1'),
97
+ RawText('child2')])
98
+ expected_html = "<noendnode>child1child2"
99
+ assert node.to_html(force_one_line=True) == expected_html
100
+
101
+ def test_recursive_rendering(self):
102
+ inner_node = HTMLNode(children=[RawText('inner_child')])
103
+ node = TestHTMLNode.CustomNode(children=[inner_node])
104
+ expected_html = '\n'.join([
105
+ "<customnode>",
106
+ " <htmlnode>",
107
+ " inner_child",
108
+ " </htmlnode>",
109
+ "</customnode>"
110
+ ])
111
+ assert node.to_html() == expected_html
112
+ assert node.to_html(force_one_line=False) == expected_html
113
+
114
+ def test_no_start_tag_with_recursive_rendering(self):
115
+ inner_node = HTMLNode(children=[RawText('inner_child')])
116
+ node = TestHTMLNode.NoStartNode(children=[inner_node])
117
+ expected_html = '\n'.join([
118
+ " <htmlnode>",
119
+ " inner_child",
120
+ " </htmlnode>",
121
+ "</nostartnode>"
122
+ ])
123
+ assert node.to_html() == expected_html
124
+
125
+ def test_no_end_tag_with_recursive_rendering(self):
126
+ inner_node = HTMLNode(children=[RawText('inner_child')])
127
+ node = TestHTMLNode.NoEndNode(children=[inner_node])
128
+ expected_html = '\n'.join([
129
+ "<noendnode>",
130
+ " <htmlnode>",
131
+ " inner_child",
132
+ " </htmlnode>"
133
+ ])
134
+ assert node.to_html() == expected_html
135
+
136
+ def test_recursive_rendering_one_line(self):
137
+ inner_node = HTMLNode(children=[RawText('inner_child')])
138
+ node = TestHTMLNode.CustomNode(children=[inner_node])
139
+ expected_html = "<customnode><htmlnode>inner_child</htmlnode></customnode>"
140
+ assert node.to_html(force_one_line=True) == expected_html
141
+
142
+ def test_recursive_rendering_one_line_propagation(self):
143
+ one_line = TestHTMLNode.OneLineNode(
144
+ [HTMLNode(children=[RawText('inner_child')])]
145
+ )
146
+ node = HTMLNode(children=[one_line])
147
+ expected_html = '\n'.join([
148
+ "<htmlnode>",
149
+ " <onelinenode><htmlnode>inner_child</htmlnode></onelinenode>",
150
+ "</htmlnode>"
151
+ ])
152
+ assert node.to_html() == expected_html
153
+
154
+ def test_recursive_rendering_of_tagless_mix(self):
155
+ children = [
156
+ TestHTMLNode.NoEndNode([RawText("child1")]),
157
+ TestHTMLNode.NoStartNode([RawText("child2")]),
158
+ TestHTMLNode.NoEndNode([RawText("child3")]),
159
+ ]
160
+ inner_node = TestHTMLNode.NoStartNode(children=children)
161
+ node = TestHTMLNode.NoEndNode(children=[inner_node])
162
+ expected_html = '\n'.join([
163
+ "<noendnode>",
164
+ " <noendnode>",
165
+ " child1",
166
+ " child2",
167
+ " </nostartnode>",
168
+ " <noendnode>",
169
+ " child3",
170
+ " </nostartnode>"
171
+ ])
172
+ assert node.to_html() == expected_html
173
+
174
+ def test_recursive_rendering_of_tagless_mix_one_line(self):
175
+ children = [
176
+ TestHTMLNode.NoEndNode([RawText("child1")]),
177
+ TestHTMLNode.OneLineNoStartNode([RawText("child2")]),
178
+ TestHTMLNode.NoEndNode([RawText("child3")]),
179
+ ]
180
+ inner_node = TestHTMLNode.NoStartNode(children=children)
181
+ node = TestHTMLNode.NoEndNode(children=[inner_node])
182
+ expected_html = '\n'.join([
183
+ "<noendnode>",
184
+ " <noendnode>",
185
+ " child1",
186
+ " child2</onelinenostartnode>",
187
+ " <noendnode>",
188
+ " child3",
189
+ " </nostartnode>"
190
+ ])
191
+ assert node.to_html() == expected_html
192
+
193
+ def test_recursive_rendering_of_tagless_mix_force_one_line(self):
194
+ children = [
195
+ TestHTMLNode.NoEndNode([RawText("child1")]),
196
+ TestHTMLNode.NoStartNode([RawText("child2")]),
197
+ TestHTMLNode.NoEndNode([RawText("child3")]),
198
+ ]
199
+ inner_node = TestHTMLNode.NoStartNode(children=children)
200
+ node = TestHTMLNode.NoEndNode(children=[inner_node])
201
+ expected_html = "<noendnode><noendnode>child1child2</nostartnode>" + \
202
+ "<noendnode>child3</nostartnode>"
203
+ assert node.to_html(force_one_line=True) == expected_html
204
+
205
+ def test_raw_text_as_orphan_node(self):
206
+ node = HTMLNode(children=[
207
+ TestHTMLNode.CustomNode(),
208
+ RawText("raw_text")
209
+ ])
210
+ expected_html = '\n'.join([
211
+ "<htmlnode>",
212
+ " <customnode></customnode>",
213
+ " raw_text",
214
+ "</htmlnode>"
215
+ ])
216
+ assert node.to_html() == expected_html
217
+
218
+ @pytest.mark.parametrize("indent_level", [0, 1, 2])
219
+ @pytest.mark.parametrize("indent_size", [3, 4, 8])
220
+ def test_indentation(self, indent_level: int, indent_size: int):
221
+ """Test the to_html method with different indentation parameters."""
222
+
223
+ # Creating a simple HTMLNode
224
+ node = HTMLNode(children=[
225
+ RawText('child1'),
226
+ RawText('child2'),
227
+ HTMLNode(children=[
228
+ RawText('grandchild1'),
229
+ RawText('grandchild2')
230
+ ])
231
+ ])
232
+
233
+ # Expected output based on the test parameters
234
+ expected_html = "\n".join([
235
+ f"{' ' * indent_size * indent_level}<htmlnode>",
236
+ f"{' ' * indent_size * (indent_level + 1)}child1",
237
+ f"{' ' * indent_size * (indent_level + 1)}child2",
238
+ f"{' ' * indent_size * (indent_level + 1)}<htmlnode>",
239
+ f"{' ' * indent_size * (indent_level + 2)}grandchild1",
240
+ f"{' ' * indent_size * (indent_level + 2)}grandchild2",
241
+ f"{' ' * indent_size * (indent_level + 1)}</htmlnode>",
242
+ f"{' ' * indent_size * indent_level}</htmlnode>"
243
+ ])
244
+
245
+ # Calling to_html with the test parameters
246
+ actual_html = node.to_html(
247
+ indent_size=indent_size, indent_level=indent_level)
248
+ assert actual_html == expected_html
249
+
250
+ @pytest.mark.parametrize("indent_level", [0, 1, 2])
251
+ @pytest.mark.parametrize("indent_size", [3, 4, 8])
252
+ def test_indentation_empty_node(self, indent_level, indent_size):
253
+ node = HTMLNode()
254
+ expected_html = f"{' ' * indent_size * indent_level}<htmlnode></htmlnode>"
255
+ actual_html = node.to_html(
256
+ indent_size=indent_size, indent_level=indent_level)
257
+ assert actual_html == expected_html
258
+
259
+ def test_collapse_empty(self):
260
+ node = HTMLNode(children=[
261
+ TestHTMLNode.CustomNode(),
262
+ HTMLNode(children=[RawText('grandchild1')])
263
+ ])
264
+ expected_html = "\n".join([
265
+ "<htmlnode>",
266
+ " <customnode></customnode>",
267
+ " <htmlnode>",
268
+ " grandchild1",
269
+ " </htmlnode>",
270
+ "</htmlnode>"
271
+ ])
272
+ assert node.to_html() == expected_html
273
+ assert node.to_html(collapse_empty=True) == expected_html
274
+
275
+ def test_not_collapse_empty(self):
276
+ node = HTMLNode(children=[
277
+ TestHTMLNode.CustomNode(),
278
+ HTMLNode(children=[RawText('grandchild1')])
279
+ ])
280
+ expected_html = "\n".join([
281
+ "<htmlnode>",
282
+ " <customnode>",
283
+ " </customnode>",
284
+ " <htmlnode>",
285
+ " grandchild1",
286
+ " </htmlnode>",
287
+ "</htmlnode>"
288
+ ])
289
+ assert node.to_html(collapse_empty=False) == expected_html
290
+
291
+ def test_kwargs_pass_down(self):
292
+ node = HTMLNode(children=[
293
+ TestHTMLNode.CustomNode(),
294
+ TestHTMLNode.KwargsReceiverNode()
295
+ ])
296
+ expected_html = "\n".join([
297
+ "<htmlnode>",
298
+ " <customnode></customnode>",
299
+ "Message is 42",
300
+ "</htmlnode>"
301
+ ])
302
+ assert node.to_html(message="Message is 42") == expected_html
303
+
304
+ @pytest.mark.parametrize("raw, sanitized", [
305
+ ('<div>text</div>', "&lt;div&gt;text&lt;&sol;div&gt;"),
306
+ ('\"Yes?\" > \'No!\'', "&quot;Yes?&quot; &gt; &apos;No!&apos;"),
307
+ ('Yes &\nNo', "Yes &<br>No"),
308
+ ])
309
+ def test_sanitize_raw_text(self, raw, sanitized):
310
+ node = HTMLNode(children=[RawText(raw)])
311
+ expected_html = "\n".join([
312
+ "<htmlnode>",
313
+ f" {sanitized}",
314
+ "</htmlnode>"
315
+ ])
316
+ assert node.to_html() == expected_html
317
+ assert node.to_html(replace_all_entities=False) == expected_html
318
+
319
+ @pytest.mark.parametrize("raw, sanitized", [
320
+ ('<div>text</div>',
321
+ "&lt;div&gt;text&lt;&sol;div&gt;"),
322
+ ('\"Yes?\" > \'No!\'',
323
+ "&quot;Yes&quest;&quot; &gt; &apos;No&excl;&apos;"),
324
+ ('Yes &\nNo',
325
+ "Yes &amp;<br>No"),
326
+ ])
327
+ def test_sanitize_all_entities_in_raw_text(self, raw, sanitized):
328
+ node = HTMLNode(children=[RawText(raw)])
329
+ expected_html = "\n".join([
330
+ "<htmlnode>",
331
+ f" {sanitized}",
332
+ "</htmlnode>"
333
+ ])
334
+ assert node.to_html(replace_all_entities=True) == expected_html
@@ -0,0 +1,11 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
@@ -0,0 +1,105 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
12
+
13
+ import pytest
14
+ from webwidgets.utility.sanitizing import HTML_ENTITIES, \
15
+ CHAR_TO_HTML_ENTITIES, sanitize_html_text
16
+
17
+
18
+ class TestSanitizingHTMLText:
19
+ def test_no_empty_html_entities(self):
20
+ assert all(e for _, e in CHAR_TO_HTML_ENTITIES.items())
21
+
22
+ @pytest.mark.parametrize("name", [
23
+ 'amp;', 'lt;', 'gt;', 'semi;', 'sol;', 'apos;', 'quot;'
24
+ ])
25
+ def test_html_entity_names(self, name):
26
+ assert name in HTML_ENTITIES
27
+
28
+ def test_html_entities_inverted(self):
29
+ assert set(CHAR_TO_HTML_ENTITIES['&']) == set((
30
+ 'amp;', 'AMP', 'amp', 'AMP;'))
31
+ assert CHAR_TO_HTML_ENTITIES['&'][0] == 'amp;'
32
+ assert set(CHAR_TO_HTML_ENTITIES['>']) == set((
33
+ 'gt;', 'GT', 'gt', 'GT;'))
34
+ assert CHAR_TO_HTML_ENTITIES['>'][0] == 'gt;'
35
+ assert CHAR_TO_HTML_ENTITIES['\u0391'] == ('Alpha;',)
36
+
37
+ @pytest.mark.parametrize("html_entity", [
38
+ '&AMP', '&lt;', '&gt;', '&sol;'
39
+ ])
40
+ def test_sanitize_html_text(self, html_entity):
41
+ text = '<div>Some text &{} and more</div>'.format(html_entity)
42
+ expected_text_partial = '&lt;div&gt;Some text &{} and more&lt;&sol;div&gt;'.format(
43
+ html_entity)
44
+ assert sanitize_html_text(text) == expected_text_partial
45
+ expected_text_full = '&lt;div&gt;Some text &amp;{} and more&lt;&sol;div&gt;'.format(
46
+ html_entity)
47
+ assert sanitize_html_text(
48
+ text, replace_all_entities=True) == expected_text_full
49
+
50
+ def test_sanitize_double_delimiting_characters(self):
51
+ text = "&&copy &&copy; &copy;; copy;;"
52
+ expected = "&amp;&copy &amp;&copy; &copy;&semi; copy&semi;&semi;"
53
+ assert sanitize_html_text(text, replace_all_entities=True) == expected
54
+
55
+ def test_sanitize_missing_ampersand(self):
56
+ text = "copy; lt; gt;"
57
+ expected = "copy&semi; lt&semi; gt&semi;"
58
+ assert sanitize_html_text(text, replace_all_entities=True) == expected
59
+
60
+ @pytest.mark.parametrize("text, expected", [
61
+ ("Some text abcdefghijklmnopqrstuvwxyz",
62
+ "Some text abcdefghijklmnopqrstuvwxyz"),
63
+ ("0123456789.!?#",
64
+ "0123456789.!?#"),
65
+ ("& &; &aamp; &amp &amp; &AMP;",
66
+ "& &; &aamp; &amp &amp; &AMP;"),
67
+ ("&sool; &sol;/",
68
+ "&sool; &sol;&sol;"),
69
+ ('<div>Some text &sol;</div>',
70
+ '&lt;div&gt;Some text &sol;&lt;&sol;div&gt;'),
71
+ ('Some text\nand more',
72
+ 'Some text<br>and more'),
73
+ ('<p>&nbsp;</p>',
74
+ '&lt;p&gt;&nbsp;&lt;&sol;p&gt;'),
75
+ ("This 'quote' is not \"there\".",
76
+ "This &apos;quote&apos; is not &quot;there&quot;."),
77
+ ("This is a mix < than 100% & 3/5",
78
+ "This is a mix &lt; than 100% & 3&sol;5")
79
+ ])
80
+ def test_sanitize_html_with_partial_entity_replacement(self, text, expected):
81
+ assert sanitize_html_text(text) == expected
82
+ assert sanitize_html_text(text, replace_all_entities=False) == expected
83
+
84
+ @pytest.mark.parametrize("text, expected", [
85
+ ("Some text abcdefghijklmnopqrstuvwxyz",
86
+ "Some text abcdefghijklmnopqrstuvwxyz"),
87
+ ("0123456789.!?#",
88
+ "0123456789&period;&excl;&quest;&num;"),
89
+ ("& &; &aamp; &amp &amp; &AMP;",
90
+ "&amp; &amp;&semi; &amp;aamp&semi; &amp &amp; &AMP;"),
91
+ ("&sool; &sol;/",
92
+ "&amp;sool&semi; &sol;&sol;"),
93
+ ('<div>Some text &sol;</div>',
94
+ '&lt;div&gt;Some text &sol;&lt;&sol;div&gt;'),
95
+ ('Some text\nand more',
96
+ 'Some text<br>and more'),
97
+ ('<p>&nbsp;</p>',
98
+ '&lt;p&gt;&nbsp;&lt;&sol;p&gt;'),
99
+ ("This 'quote' is not \"there\".",
100
+ "This &apos;quote&apos; is not &quot;there&quot;&period;"),
101
+ ("This is a mix < than 100% & 3/5",
102
+ "This is a mix &lt; than 100&percnt; &amp; 3&sol;5")
103
+ ])
104
+ def test_sanitize_html_with_full_entity_replacement(self, text, expected):
105
+ assert sanitize_html_text(text, replace_all_entities=True) == expected
@@ -0,0 +1,15 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
12
+
13
+ __version__ = "0.1.0" # Dynamically set by build backend
14
+
15
+ from . import compilation
@@ -0,0 +1,13 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
12
+
13
+ from . import html
@@ -0,0 +1,13 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
12
+
13
+ from .html_node import HTMLNode, no_start_tag, no_end_tag, RawText
@@ -0,0 +1,210 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
12
+
13
+ import itertools
14
+ from typing import Any, Dict, List, Union
15
+ from webwidgets.utility.sanitizing import sanitize_html_text
16
+
17
+
18
+ class HTMLNode:
19
+ """Represents an HTML node (for example, a div or a span).
20
+ """
21
+
22
+ one_line: bool = False
23
+
24
+ def __init__(self, children: List['HTMLNode'] = [], attributes: Dict[str, str] = {}):
25
+ """Creates an HTMLNode with optional children and attributes.
26
+
27
+ :param children: List of child HTML nodes. Defaults to an empty list.
28
+ :param attributes: Dictionary of attributes for the node. Defaults to an empty dictionary.
29
+ """
30
+ self.children = children
31
+ self.attributes = attributes
32
+
33
+ def _get_tag_name(self) -> str:
34
+ """Returns the tag name of the HTML node.
35
+
36
+ The tag name of a node object is the name of its class in lowercase.
37
+
38
+ :return: The tag name of the HTML node.
39
+ :rtype: str
40
+ """
41
+ return self.__class__.__name__.lower()
42
+
43
+ def _render_attributes(self) -> str:
44
+ """Renders the attributes of the HTML node into a string that can be added to the start tag.
45
+
46
+ :return: A string containing all attribute key-value pairs separated by spaces.
47
+ :rtype: str
48
+ """
49
+ return ' '.join(
50
+ f'{key}="{value}"' for key, value in self.attributes.items()
51
+ )
52
+
53
+ def add(self, child: 'HTMLNode') -> None:
54
+ """
55
+ Adds a child to the HTML node.
56
+
57
+ :param child: The child to be added.
58
+ """
59
+ self.children.append(child)
60
+
61
+ @property
62
+ def start_tag(self) -> str:
63
+ """Returns the opening tag of the HTML node, including any attributes.
64
+
65
+ :return: A string containing the opening tag of the element with its attributes.
66
+ :rtype: str
67
+ """
68
+ # Rendering attributes
69
+ attributes = self._render_attributes()
70
+ maybe_space = ' ' if attributes else ''
71
+
72
+ # Building start tag
73
+ return f"<{self._get_tag_name()}{maybe_space}{attributes}>"
74
+
75
+ @property
76
+ def end_tag(self) -> str:
77
+ """Returns the closing tag of the HTML node.
78
+
79
+ :return: A string containing the closing tag of the element.
80
+ :rtype: str
81
+ """
82
+ return f"</{self._get_tag_name()}>"
83
+
84
+ def to_html(self, collapse_empty: bool = True,
85
+ indent_size: int = 4, indent_level: int = 0,
86
+ force_one_line: bool = False, return_lines: bool = False,
87
+ **kwargs: Any) -> Union[str, List[str]]:
88
+ """Converts the HTML node into HTML code.
89
+
90
+ :param collapse_empty: If True, collapses empty elements into a single line.
91
+ Defaults to True.
92
+ :type collapse_empty: bool
93
+ :param indent_size: The number of spaces to use for each indentation level.
94
+ :type indent_size: int
95
+ :param indent_level: The current level of indentation in the HTML output.
96
+ :type indent_level: int
97
+ :param force_one_line: If True, forces all child elements to be rendered on a single line without additional
98
+ indentation. Defaults to False.
99
+ :type force_one_line: bool
100
+ :param return_lines: Whether to return the lines of HTML code individually. Defaults to False.
101
+ :type return_lines: bool
102
+ :param **kwargs: Additional keyword arguments to pass down to child elements.
103
+ :type **kwargs: Any
104
+ :return: A string containing the HTML representation of the element if
105
+ `return_lines` is `False` (default), or the list of individual lines
106
+ from that HTML code if `return_lines` is `True`.
107
+ :rtype: str or List[str]
108
+ """
109
+ # Opening the element
110
+ indentation = "" if force_one_line else ' ' * indent_size * indent_level
111
+ html_lines = [indentation + self.start_tag]
112
+
113
+ # If content must be in one line
114
+ if self.one_line or force_one_line or (collapse_empty
115
+ and not self.children):
116
+ html_lines += list(itertools.chain.from_iterable(
117
+ [c.to_html(collapse_empty=collapse_empty,
118
+ indent_level=0, force_one_line=True, return_lines=True,
119
+ **kwargs)
120
+ for c in self.children]))
121
+ html_lines += [self.end_tag]
122
+ html_lines = [''.join(html_lines)] # Flattening the line
123
+
124
+ # If content spans multi-line
125
+ else:
126
+ html_lines += list(itertools.chain.from_iterable(
127
+ [c.to_html(collapse_empty=collapse_empty,
128
+ indent_size=indent_size,
129
+ indent_level=indent_level + 1,
130
+ return_lines=True,
131
+ **kwargs)
132
+ for c in self.children]))
133
+ html_lines += [indentation + self.end_tag]
134
+ html_lines = [l for l in html_lines if any(
135
+ c != ' ' for c in l)] # Trimming empty lines
136
+
137
+ # If return_lines is True, return a list of lines
138
+ if return_lines:
139
+ return html_lines
140
+
141
+ # Otherwise, return a single string
142
+ return '\n'.join(html_lines)
143
+
144
+
145
+ def no_start_tag(cls):
146
+ """Decorator to remove the start tag from an HTMLNode subclass.
147
+
148
+ :param cls: A subclass of HTMLNode whose start tag should be removed.
149
+ :return: The given class with an empty start tag.
150
+ """
151
+ cls.start_tag = property(
152
+ lambda _: '', doc="This element does not have a start tag")
153
+ return cls
154
+
155
+
156
+ def no_end_tag(cls):
157
+ """Decorator to remove the end tag from an HTMLNode subclass.
158
+
159
+ :param cls: A subclass of HTMLNode whose end tag should be removed.
160
+ :return: The given class with an empty end tag.
161
+ """
162
+ cls.end_tag = property(
163
+ lambda _: '', doc="This element does not have an end tag")
164
+ return cls
165
+
166
+
167
+ @no_start_tag
168
+ @no_end_tag
169
+ class RawText(HTMLNode):
170
+ """A raw text node that contains text without any HTML tags."""
171
+
172
+ one_line = True
173
+
174
+ def __init__(self, text: str):
175
+ """Creates a raw text node.
176
+
177
+ :param text: The text content of the node. It will be sanitized in
178
+ :py:meth:`RawText.to_html` before being written into HTML code.
179
+ :type text: str
180
+ """
181
+ super().__init__()
182
+ self.text = text
183
+
184
+ def to_html(self, indent_size: int = 4, indent_level: int = 0,
185
+ return_lines: bool = False, replace_all_entities: bool = False,
186
+ **kwargs: Any) -> Union[str, List[str]]:
187
+ """Converts the raw text node to HTML.
188
+
189
+ The text is sanitized by the :py:func:`sanitize_html_text` function before
190
+ being written into HTML code.
191
+
192
+ :param indent_size: See :py:meth:`HTMLNode.to_html`.
193
+ :type indent_size: int
194
+ :param indent_level: See :py:meth:`HTMLNode.to_html`.
195
+ :type indent_level: int
196
+ :param return_lines: See :py:meth:`HTMLNode.to_html`.
197
+ :type return_lines: bool
198
+ :param replace_all_entities: See :py:func:`sanitize_html_text`.
199
+ :type replace_all_entities: bool
200
+ :param kwargs: Other keyword arguments. These are ignored.
201
+ :type kwargs: Any
202
+ :return: See :py:meth:`HTMLNode.to_html`.
203
+ :rtype: str or List[str]
204
+ """
205
+ sanitized = sanitize_html_text(
206
+ self.text, replace_all_entities=replace_all_entities)
207
+ line = ' ' * indent_size * indent_level + sanitized
208
+ if return_lines:
209
+ return [line]
210
+ return line
@@ -0,0 +1,13 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
12
+
13
+ from .sanitizing import *
@@ -0,0 +1,132 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
12
+
13
+ from html.entities import html5 as HTML_ENTITIES
14
+ import re
15
+ from typing import Tuple
16
+
17
+
18
+ # Maps characters to their corresponding character references. If a character can be
19
+ # represented by multiple entities, the preferred one is placed first in the tuple.
20
+ # Preference is given to the shortest one with a semicolon, in lowercase if possible
21
+ # (e.g. "&amp;").
22
+ CHAR_TO_HTML_ENTITIES = {v: sorted([
23
+ k for k in HTML_ENTITIES if HTML_ENTITIES[k] == v
24
+ ], key=len) for v in HTML_ENTITIES.values()}
25
+ for _, entities in CHAR_TO_HTML_ENTITIES.items():
26
+ e = next((e for e in entities if ';' in e), entities[0])
27
+ i = entities.index(e.lower() if e.lower() in entities else e)
28
+ entities[i], entities[0] = entities[0], entities[i]
29
+ CHAR_TO_HTML_ENTITIES = {k: tuple(v)
30
+ for k, v in CHAR_TO_HTML_ENTITIES.items()}
31
+
32
+
33
+ # Regular expression mathing all isolated '&' characters that are not part of an
34
+ # HTML entity.
35
+ _REGEX_AMP = re.compile(f"&(?!({'|'.join(HTML_ENTITIES.keys())}))")
36
+
37
+
38
+ # Regular expression matching all isolated ';' characters that are not part of an
39
+ # HTML entity. The expression essentially concatenates one lookbehind per entity.
40
+ _REGEXP_SEMI = re.compile(
41
+ ''.join(f"(?<!&{e.replace(';', '')})"
42
+ for e in HTML_ENTITIES if ';' in e) + ';')
43
+
44
+
45
+ # Entities that are always replaced during sanitization. These are: <, >, /,
46
+ # according to rule 13.1.2.6 of the HTML5 specification, as well as single quotes
47
+ # ', double quotes ", and new line characters '\n'.
48
+ # Source: https://html.spec.whatwg.org/multipage/syntax.html#cdata-rcdata-restrictions
49
+ _ALWAYS_SANITIZED = ("\u003C", "\u003E", "\u002F", "'", "\"", "\n")
50
+
51
+
52
+ # Entities other than new line characters '\n' (which require special treatment)
53
+ # that are always replaced during sanitization.
54
+ _ALWAYS_SANITIZED_BUT_NEW_LINES = tuple(
55
+ e for e in _ALWAYS_SANITIZED if e != '\n')
56
+
57
+
58
+ # Entities other than the ampersand and semicolon (which require special treatment
59
+ # because they are part of other entities) that are replaced by default during
60
+ # sanitization but can also be skipped for speed. This set of entities consists of
61
+ # all remaining entities but the ampersand and semicolon.
62
+ _OPTIONALLY_SANITIZED_BUT_AMP_SEMI = tuple(
63
+ set(CHAR_TO_HTML_ENTITIES.keys()) - set(_ALWAYS_SANITIZED) - set({'&', ';'}))
64
+
65
+
66
+ def replace_html_entities(text: str, characters: Tuple[str]) -> str:
67
+ """Replaces characters with their corresponding HTML entities in the given text.
68
+
69
+ If a character can be represented by multiple entities, preference is given to
70
+ the shortest one that contains a semicolon, in lowercase if possible.
71
+
72
+ :param text: The input text containing HTML entities.
73
+ :type text: str
74
+ :param characters: The characters to be replaced by their HTML entity. Usually
75
+ each item in the tuple is a single character, but some entities span
76
+ multiple characters.
77
+ :type characters: Tuple[str]
78
+ :return: The text with HTML entities replaced.
79
+ :rtype: str
80
+ """
81
+ for c in characters:
82
+ entity = CHAR_TO_HTML_ENTITIES[c][0] # Preferred is first
83
+ text = text.replace(c, '&' + entity)
84
+ return text
85
+
86
+
87
+ def sanitize_html_text(text: str, replace_all_entities: bool = False) -> str:
88
+ """Sanitizes raw HTML text by replacing certain characters with HTML-friendly equivalents.
89
+
90
+ Sanitization affects the following characters:
91
+ - `<`, `/`, and `>`, replaced with their corresponding HTML entities `lt;`,
92
+ `gt;`, and `sol;` according to rule 13.1.2.6 of the HTML5 specification
93
+ (see source:
94
+ https://html.spec.whatwg.org/multipage/syntax.html#cdata-rcdata-restrictions)
95
+ - single quotes `'` and double quotes `"`, replaced with their corresponding
96
+ HTML entities `apos;` and `quot;`
97
+ - new line characters '\\n', replaced with `br` tags
98
+ - if `replace_all_entities` is True, every character that can be represented by
99
+ an HTML entity is replaced with that entity. If a character can be
100
+ represented by multiple entities, preference is given to the shortest one
101
+ that contains a semicolon, in lowercase if possible.
102
+
103
+ See https://html.spec.whatwg.org/multipage/named-characters.html for a list of
104
+ all supported entities.
105
+
106
+ :param text: The raw HTML text that needs sanitization.
107
+ :type text: str
108
+ :param replace_all_entities: Whether to replace every character that can be
109
+ represented by an HTML entity. Use False to skip non-mandatory characters
110
+ and increase speed. Default is False.
111
+ :type replace_all_entities: bool
112
+ :return: The sanitized HTML text.
113
+ :rtype: str
114
+ """
115
+ # We start with all optional HTML entities, which enables us to replace all '&'
116
+ # and ';' before subsequently introducing more of them.
117
+ if replace_all_entities:
118
+
119
+ # Replacing '&' ONLY when not part of an HTML entity itself
120
+ text = _REGEX_AMP.sub('&amp;', text)
121
+
122
+ # Replacing ';' ONLY when not part of an HTML entity itself
123
+ text = _REGEXP_SEMI.sub('&semi;', text)
124
+
125
+ # Replacing the remaining HTML entities
126
+ text = replace_html_entities(text, _OPTIONALLY_SANITIZED_BUT_AMP_SEMI)
127
+
128
+ # Then we replace all mandatory HTML entities
129
+ text = replace_html_entities(text, _ALWAYS_SANITIZED_BUT_NEW_LINES)
130
+ text = text.replace('\n', '<br>') # Has to be last because of < and >
131
+
132
+ return text