webwidgets 0.1.0__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,123 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
12
+
13
+ name: "CD: Publish to PyPI (or TestPyPI)"
14
+
15
+ on:
16
+ push:
17
+ tags:
18
+ - "[0-9]+.[0-9]+.[0-9]+"
19
+ - "[0-9]+.[0-9]+.[0-9]+.dev[0-9]+"
20
+ - "[0-9]+.[0-9]+.[0-9]+[ab][0-9]+"
21
+ - "[0-9]+.[0-9]+.[0-9]+[ab][0-9]+.dev[0-9]+"
22
+ - "[0-9]+.[0-9]+.[0-9].dev[0-9]+"
23
+ - "[0-9]+.[0-9]+.[0-9]+rc[0-9]+"
24
+ - "[0-9]+.[0-9]+.[0-9]+rc[0-9]+.dev[0-9]+"
25
+ - "[0-9]+.[0-9]+.[0-9]+post[0-9]+"
26
+ - "[0-9]+.[0-9]+.[0-9]+post[0-9]+.dev[0-9]+"
27
+ - "testpypi/[0-9]+.[0-9]+.[0-9]+"
28
+ - "testpypi/[0-9]+.[0-9]+.[0-9]+.dev[0-9]+"
29
+ - "testpypi/[0-9]+.[0-9]+.[0-9]+[ab][0-9]+"
30
+ - "testpypi/[0-9]+.[0-9]+.[0-9]+[ab][0-9]+.dev[0-9]+"
31
+ - "testpypi/[0-9]+.[0-9]+.[0-9].dev[0-9]+"
32
+ - "testpypi/[0-9]+.[0-9]+.[0-9]+rc[0-9]+"
33
+ - "testpypi/[0-9]+.[0-9]+.[0-9]+rc[0-9]+.dev[0-9]+"
34
+ - "testpypi/[0-9]+.[0-9]+.[0-9]+post[0-9]+"
35
+ - "testpypi/[0-9]+.[0-9]+.[0-9]+post[0-9]+.dev[0-9]+"
36
+
37
+ jobs:
38
+ ensure-main:
39
+ if: github.event.base_ref == 'refs/heads/main'
40
+ name: Ensure tag was pushed to main
41
+ runs-on: ubuntu-latest
42
+ steps:
43
+ - name: Log that tag was pushed to main
44
+ run: echo "Tag was pushed to main branch. Starting CD workflow."
45
+
46
+ build:
47
+ name: Build package
48
+ needs: ensure-main
49
+ runs-on: ubuntu-latest
50
+ steps:
51
+ - uses: actions/checkout@v4
52
+ with:
53
+ persist-credentials: false
54
+ - name: Set up Python
55
+ uses: actions/setup-python@v5
56
+ with:
57
+ python-version: "3.x"
58
+ - name: Install pypa/build
59
+ run: >-
60
+ python3 -m
61
+ pip install
62
+ build
63
+ --user
64
+ - name: Install pypa/hatch
65
+ run: python3 -m pip install hatch
66
+ - name: Set version with hatch
67
+ run: |
68
+ # Using variable instead of GitHub-specific contexts
69
+ TAG=$(git describe --tags --abbrev=0)
70
+ echo "Tag is: $TAG"
71
+ # Removing testpypi/ from tag before setting version
72
+ VERSION=$(echo $TAG | awk '{gsub(/testpypi\//,"")}1')
73
+ echo "Setting version from tag: $VERSION"
74
+ hatch version $VERSION
75
+ - name: Build a binary wheel and a source tarball
76
+ run: python3 -m build
77
+ - name: Store the distribution packages
78
+ uses: actions/upload-artifact@v4
79
+ with:
80
+ name: python-package-distributions
81
+ path: dist/
82
+
83
+ publish-to-pypi:
84
+ if: ${{ !startsWith(github.ref_name, 'testpypi/') }}
85
+ name: Publish to PyPI
86
+ needs:
87
+ - build
88
+ runs-on: ubuntu-latest
89
+ environment:
90
+ name: pypi
91
+ url: https://pypi.org/p/webwidgets
92
+ permissions:
93
+ id-token: write # IMPORTANT: mandatory for trusted publishing
94
+ steps:
95
+ - name: Download all the dists
96
+ uses: actions/download-artifact@v4
97
+ with:
98
+ name: python-package-distributions
99
+ path: dist/
100
+ - name: Publish package to PyPI
101
+ uses: pypa/gh-action-pypi-publish@release/v1
102
+
103
+ publish-to-testpypi:
104
+ if: startsWith(github.ref_name, 'testpypi/')
105
+ name: Publish to TestPyPI
106
+ needs:
107
+ - build
108
+ runs-on: ubuntu-latest
109
+ environment:
110
+ name: testpypi
111
+ url: https://test.pypi.org/p/webwidgets
112
+ permissions:
113
+ id-token: write # IMPORTANT: mandatory for trusted publishing
114
+ steps:
115
+ - name: Download all the dists
116
+ uses: actions/download-artifact@v4
117
+ with:
118
+ name: python-package-distributions
119
+ path: dist/
120
+ - name: Publish package to TestPyPI
121
+ uses: pypa/gh-action-pypi-publish@release/v1
122
+ with:
123
+ repository-url: https://test.pypi.org/legacy/
@@ -0,0 +1,80 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
12
+
13
+ name: "Full CI: Python 3.9-13 on all OSes"
14
+
15
+ on:
16
+ push:
17
+ branches: main
18
+ pull_request:
19
+ branches: main
20
+
21
+ permissions:
22
+ contents: read
23
+
24
+ jobs:
25
+ lint_source:
26
+ strategy:
27
+ matrix:
28
+ python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
29
+ os: [ubuntu-latest, windows-latest, macos-latest]
30
+ name: Lint source on ${{ matrix.os }} Python ${{ matrix.python-version }}
31
+ runs-on: ${{ matrix.os }}
32
+ steps:
33
+ - name: Checkout repository
34
+ uses: actions/checkout@v4
35
+ - name: Set up Python ${{ matrix.python-version }}
36
+ uses: actions/setup-python@v3
37
+ with:
38
+ python-version: ${{ matrix.python-version }}
39
+ - name: Install flake8
40
+ run: |
41
+ python -c "import platform; print('OS', platform.system())"
42
+ python -c "import sys; print('Python version', sys.version)"
43
+ python -m pip install --upgrade pip
44
+ pip install flake8
45
+ - name: Lint with flake8
46
+ run: |
47
+ # stop the build if there are Python syntax errors or undefined names
48
+ flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
49
+ # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
50
+ flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
51
+
52
+ test_build:
53
+ needs: lint_source
54
+ strategy:
55
+ matrix:
56
+ python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
57
+ os: [ubuntu-latest, windows-latest, macos-latest]
58
+ name: Test build on ${{ matrix.os }} Python ${{ matrix.python-version }}
59
+ runs-on: ${{ matrix.os }}
60
+ steps:
61
+ - name: Checkout repository
62
+ uses: actions/checkout@v4
63
+ - name: Set up Python ${{ matrix.python-version }}
64
+ uses: actions/setup-python@v3
65
+ with:
66
+ python-version: ${{ matrix.python-version }}
67
+ - name: Install pytest
68
+ run: |
69
+ python -c "import platform; print('OS', platform.system())"
70
+ python -c "import sys; print('Python version', sys.version)"
71
+ python -m pip install --upgrade pip
72
+ pip install pytest
73
+ - name: Build and install
74
+ run: |
75
+ pip install .
76
+ # Removing webwidgets directory so imports come from build
77
+ rm -r webwidgets
78
+ - name: Test with pytest
79
+ run: |
80
+ pytest tests
@@ -0,0 +1,78 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
12
+
13
+ name: "Quick CI: Python 3.9-11 on Ubuntu"
14
+
15
+ on:
16
+ push:
17
+ branches: '*'
18
+
19
+ permissions:
20
+ contents: read
21
+
22
+ jobs:
23
+ lint_source:
24
+ strategy:
25
+ matrix:
26
+ python-version: ["3.9", "3.10", "3.11"]
27
+ name: Lint source on Python ${{ matrix.python-version }}
28
+ runs-on: ubuntu-latest
29
+ steps:
30
+ - name: Checkout repository
31
+ uses: actions/checkout@v4
32
+ - name: Set up Python ${{ matrix.python-version }}
33
+ uses: actions/setup-python@v3
34
+ with:
35
+ python-version: ${{ matrix.python-version }}
36
+ - name: Install flake8
37
+ run: |
38
+ python -c "import sys; print('Python version', sys.version)"
39
+ python -m pip install --upgrade pip
40
+ pip install flake8
41
+ - name: Lint with flake8
42
+ run: |
43
+ # stop the build if there are Python syntax errors or undefined names
44
+ flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
45
+ # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
46
+ flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
47
+
48
+ test_build:
49
+ needs: lint_source
50
+ strategy:
51
+ matrix:
52
+ python-version: ["3.9", "3.10", "3.11"]
53
+ name: Test build on Python ${{ matrix.python-version }}
54
+ runs-on: ubuntu-latest
55
+ steps:
56
+ - name: Checkout repository
57
+ uses: actions/checkout@v4
58
+ - name: Set up Python ${{ matrix.python-version }}
59
+ uses: actions/setup-python@v3
60
+ with:
61
+ python-version: ${{ matrix.python-version }}
62
+ - name: Install pytest
63
+ run: |
64
+ python -c "import sys; print('Python version', sys.version)"
65
+ python -m pip install --upgrade pip
66
+ pip install pytest
67
+ - name: Build and install
68
+ run: |
69
+ echo "Current directory:"
70
+ ls -la
71
+ pip install .
72
+ # Removing webwidgets directory so imports come from build
73
+ rm -r webwidgets
74
+ echo "Removed webwidgets directory. New content:"
75
+ ls -la
76
+ - name: Test with pytest
77
+ run: |
78
+ pytest tests
@@ -0,0 +1 @@
1
+ __pycache__
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 mlaasri
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,18 @@
1
+ Metadata-Version: 2.4
2
+ Name: webwidgets
3
+ Version: 0.1.0
4
+ Summary: A Python package for designing web UIs.
5
+ Project-URL: Source code, https://github.com/mlaasri/WebWidgets
6
+ Author: mlaasri
7
+ License-File: LICENSE
8
+ Keywords: design,webui
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Programming Language :: Python :: 3
11
+ Requires-Python: >=3.9
12
+ Description-Content-Type: text/markdown
13
+
14
+ # WebWidgets
15
+
16
+ ![CI Status](https://img.shields.io/github/actions/workflow/status/mlaasri/WebWidgets/ci-full.yml?branch=main)
17
+
18
+ A Python package for creating web UIs
@@ -0,0 +1,5 @@
1
+ # WebWidgets
2
+
3
+ ![CI Status](https://img.shields.io/github/actions/workflow/status/mlaasri/WebWidgets/ci-full.yml?branch=main)
4
+
5
+ A Python package for creating web UIs
@@ -0,0 +1,28 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "webwidgets"
7
+ dynamic = ["version"]
8
+ description = "A Python package for designing web UIs."
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license-files = { paths = ["LICENSE"] }
12
+ authors = [
13
+ { name="mlaasri" }
14
+ ]
15
+ keywords = ["webui", "design"]
16
+ classifiers = [
17
+ "Programming Language :: Python :: 3",
18
+ "Operating System :: OS Independent",
19
+ ]
20
+
21
+ [project.urls]
22
+ "Source code" = "https://github.com/mlaasri/WebWidgets"
23
+
24
+ [tool.hatch.version]
25
+ path = "webwidgets/__init__.py"
26
+
27
+ [tool.hatch.build]
28
+ directory = "dist"
@@ -0,0 +1,11 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
@@ -0,0 +1,11 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
@@ -0,0 +1,334 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
12
+
13
+ import pytest
14
+ from webwidgets.compilation.html.html_node import HTMLNode, no_start_tag, no_end_tag, RawText
15
+
16
+
17
+ class TestHTMLNode:
18
+ class CustomNode(HTMLNode):
19
+ pass
20
+
21
+ @no_start_tag
22
+ class NoStartNode(HTMLNode):
23
+ pass
24
+
25
+ @no_end_tag
26
+ class NoEndNode(HTMLNode):
27
+ pass
28
+
29
+ @no_start_tag
30
+ @no_end_tag
31
+ class NoStartEndNode(HTMLNode):
32
+ pass
33
+
34
+ class OneLineNode(HTMLNode):
35
+ one_line = True
36
+
37
+ class OneLineNoStartNode(NoStartNode):
38
+ one_line = True
39
+
40
+ class KwargsReceiverNode(HTMLNode):
41
+ def to_html(self, return_lines: bool, message: str,
42
+ **kwargs):
43
+ if return_lines:
44
+ return [message]
45
+ return message
46
+
47
+ def test_basic_node(self):
48
+ node = HTMLNode()
49
+ assert node.start_tag == "<htmlnode>"
50
+ assert node.end_tag == "</htmlnode>"
51
+ assert node.to_html() == "<htmlnode></htmlnode>"
52
+
53
+ def test_custom_name(self):
54
+ node = TestHTMLNode.CustomNode()
55
+ assert node.start_tag == "<customnode>"
56
+ assert node.end_tag == "</customnode>"
57
+ assert node.to_html() == "<customnode></customnode>"
58
+
59
+ def test_attributes(self):
60
+ node = HTMLNode(attributes={'id': 'test-id', 'class': 'test-class'})
61
+ assert node.start_tag == '<htmlnode id="test-id" class="test-class">'
62
+ assert node.end_tag == '</htmlnode>'
63
+ assert node.to_html() == '<htmlnode id="test-id" class="test-class"></htmlnode>'
64
+
65
+ def test_no_start_tag(self):
66
+ node = TestHTMLNode.NoStartNode()
67
+ assert node.start_tag == ''
68
+ assert node.end_tag == '</nostartnode>'
69
+ assert node.to_html() == "</nostartnode>"
70
+
71
+ def test_no_end_tag(self):
72
+ node = TestHTMLNode.NoEndNode()
73
+ assert node.start_tag == '<noendnode>'
74
+ assert node.end_tag == ''
75
+ assert node.to_html() == "<noendnode>"
76
+
77
+ def test_no_start_end_tag(self):
78
+ node = TestHTMLNode.NoStartEndNode()
79
+ assert node.start_tag == ''
80
+ assert node.end_tag == ''
81
+ assert node.to_html() == ""
82
+
83
+ def test_one_line_rendering(self):
84
+ node = HTMLNode(children=[RawText('child1'),
85
+ RawText('child2')])
86
+ expected_html = "<htmlnode>child1child2</htmlnode>"
87
+ assert node.to_html(force_one_line=True) == expected_html
88
+
89
+ def test_no_start_tag_with_one_line(self):
90
+ node = TestHTMLNode.NoStartNode(children=[RawText('child1'),
91
+ RawText('child2')])
92
+ expected_html = "child1child2</nostartnode>"
93
+ assert node.to_html(force_one_line=True) == expected_html
94
+
95
+ def test_no_end_tag_with_one_line(self):
96
+ node = TestHTMLNode.NoEndNode(children=[RawText('child1'),
97
+ RawText('child2')])
98
+ expected_html = "<noendnode>child1child2"
99
+ assert node.to_html(force_one_line=True) == expected_html
100
+
101
+ def test_recursive_rendering(self):
102
+ inner_node = HTMLNode(children=[RawText('inner_child')])
103
+ node = TestHTMLNode.CustomNode(children=[inner_node])
104
+ expected_html = '\n'.join([
105
+ "<customnode>",
106
+ " <htmlnode>",
107
+ " inner_child",
108
+ " </htmlnode>",
109
+ "</customnode>"
110
+ ])
111
+ assert node.to_html() == expected_html
112
+ assert node.to_html(force_one_line=False) == expected_html
113
+
114
+ def test_no_start_tag_with_recursive_rendering(self):
115
+ inner_node = HTMLNode(children=[RawText('inner_child')])
116
+ node = TestHTMLNode.NoStartNode(children=[inner_node])
117
+ expected_html = '\n'.join([
118
+ " <htmlnode>",
119
+ " inner_child",
120
+ " </htmlnode>",
121
+ "</nostartnode>"
122
+ ])
123
+ assert node.to_html() == expected_html
124
+
125
+ def test_no_end_tag_with_recursive_rendering(self):
126
+ inner_node = HTMLNode(children=[RawText('inner_child')])
127
+ node = TestHTMLNode.NoEndNode(children=[inner_node])
128
+ expected_html = '\n'.join([
129
+ "<noendnode>",
130
+ " <htmlnode>",
131
+ " inner_child",
132
+ " </htmlnode>"
133
+ ])
134
+ assert node.to_html() == expected_html
135
+
136
+ def test_recursive_rendering_one_line(self):
137
+ inner_node = HTMLNode(children=[RawText('inner_child')])
138
+ node = TestHTMLNode.CustomNode(children=[inner_node])
139
+ expected_html = "<customnode><htmlnode>inner_child</htmlnode></customnode>"
140
+ assert node.to_html(force_one_line=True) == expected_html
141
+
142
+ def test_recursive_rendering_one_line_propagation(self):
143
+ one_line = TestHTMLNode.OneLineNode(
144
+ [HTMLNode(children=[RawText('inner_child')])]
145
+ )
146
+ node = HTMLNode(children=[one_line])
147
+ expected_html = '\n'.join([
148
+ "<htmlnode>",
149
+ " <onelinenode><htmlnode>inner_child</htmlnode></onelinenode>",
150
+ "</htmlnode>"
151
+ ])
152
+ assert node.to_html() == expected_html
153
+
154
+ def test_recursive_rendering_of_tagless_mix(self):
155
+ children = [
156
+ TestHTMLNode.NoEndNode([RawText("child1")]),
157
+ TestHTMLNode.NoStartNode([RawText("child2")]),
158
+ TestHTMLNode.NoEndNode([RawText("child3")]),
159
+ ]
160
+ inner_node = TestHTMLNode.NoStartNode(children=children)
161
+ node = TestHTMLNode.NoEndNode(children=[inner_node])
162
+ expected_html = '\n'.join([
163
+ "<noendnode>",
164
+ " <noendnode>",
165
+ " child1",
166
+ " child2",
167
+ " </nostartnode>",
168
+ " <noendnode>",
169
+ " child3",
170
+ " </nostartnode>"
171
+ ])
172
+ assert node.to_html() == expected_html
173
+
174
+ def test_recursive_rendering_of_tagless_mix_one_line(self):
175
+ children = [
176
+ TestHTMLNode.NoEndNode([RawText("child1")]),
177
+ TestHTMLNode.OneLineNoStartNode([RawText("child2")]),
178
+ TestHTMLNode.NoEndNode([RawText("child3")]),
179
+ ]
180
+ inner_node = TestHTMLNode.NoStartNode(children=children)
181
+ node = TestHTMLNode.NoEndNode(children=[inner_node])
182
+ expected_html = '\n'.join([
183
+ "<noendnode>",
184
+ " <noendnode>",
185
+ " child1",
186
+ " child2</onelinenostartnode>",
187
+ " <noendnode>",
188
+ " child3",
189
+ " </nostartnode>"
190
+ ])
191
+ assert node.to_html() == expected_html
192
+
193
+ def test_recursive_rendering_of_tagless_mix_force_one_line(self):
194
+ children = [
195
+ TestHTMLNode.NoEndNode([RawText("child1")]),
196
+ TestHTMLNode.NoStartNode([RawText("child2")]),
197
+ TestHTMLNode.NoEndNode([RawText("child3")]),
198
+ ]
199
+ inner_node = TestHTMLNode.NoStartNode(children=children)
200
+ node = TestHTMLNode.NoEndNode(children=[inner_node])
201
+ expected_html = "<noendnode><noendnode>child1child2</nostartnode>" + \
202
+ "<noendnode>child3</nostartnode>"
203
+ assert node.to_html(force_one_line=True) == expected_html
204
+
205
+ def test_raw_text_as_orphan_node(self):
206
+ node = HTMLNode(children=[
207
+ TestHTMLNode.CustomNode(),
208
+ RawText("raw_text")
209
+ ])
210
+ expected_html = '\n'.join([
211
+ "<htmlnode>",
212
+ " <customnode></customnode>",
213
+ " raw_text",
214
+ "</htmlnode>"
215
+ ])
216
+ assert node.to_html() == expected_html
217
+
218
+ @pytest.mark.parametrize("indent_level", [0, 1, 2])
219
+ @pytest.mark.parametrize("indent_size", [3, 4, 8])
220
+ def test_indentation(self, indent_level: int, indent_size: int):
221
+ """Test the to_html method with different indentation parameters."""
222
+
223
+ # Creating a simple HTMLNode
224
+ node = HTMLNode(children=[
225
+ RawText('child1'),
226
+ RawText('child2'),
227
+ HTMLNode(children=[
228
+ RawText('grandchild1'),
229
+ RawText('grandchild2')
230
+ ])
231
+ ])
232
+
233
+ # Expected output based on the test parameters
234
+ expected_html = "\n".join([
235
+ f"{' ' * indent_size * indent_level}<htmlnode>",
236
+ f"{' ' * indent_size * (indent_level + 1)}child1",
237
+ f"{' ' * indent_size * (indent_level + 1)}child2",
238
+ f"{' ' * indent_size * (indent_level + 1)}<htmlnode>",
239
+ f"{' ' * indent_size * (indent_level + 2)}grandchild1",
240
+ f"{' ' * indent_size * (indent_level + 2)}grandchild2",
241
+ f"{' ' * indent_size * (indent_level + 1)}</htmlnode>",
242
+ f"{' ' * indent_size * indent_level}</htmlnode>"
243
+ ])
244
+
245
+ # Calling to_html with the test parameters
246
+ actual_html = node.to_html(
247
+ indent_size=indent_size, indent_level=indent_level)
248
+ assert actual_html == expected_html
249
+
250
+ @pytest.mark.parametrize("indent_level", [0, 1, 2])
251
+ @pytest.mark.parametrize("indent_size", [3, 4, 8])
252
+ def test_indentation_empty_node(self, indent_level, indent_size):
253
+ node = HTMLNode()
254
+ expected_html = f"{' ' * indent_size * indent_level}<htmlnode></htmlnode>"
255
+ actual_html = node.to_html(
256
+ indent_size=indent_size, indent_level=indent_level)
257
+ assert actual_html == expected_html
258
+
259
+ def test_collapse_empty(self):
260
+ node = HTMLNode(children=[
261
+ TestHTMLNode.CustomNode(),
262
+ HTMLNode(children=[RawText('grandchild1')])
263
+ ])
264
+ expected_html = "\n".join([
265
+ "<htmlnode>",
266
+ " <customnode></customnode>",
267
+ " <htmlnode>",
268
+ " grandchild1",
269
+ " </htmlnode>",
270
+ "</htmlnode>"
271
+ ])
272
+ assert node.to_html() == expected_html
273
+ assert node.to_html(collapse_empty=True) == expected_html
274
+
275
+ def test_not_collapse_empty(self):
276
+ node = HTMLNode(children=[
277
+ TestHTMLNode.CustomNode(),
278
+ HTMLNode(children=[RawText('grandchild1')])
279
+ ])
280
+ expected_html = "\n".join([
281
+ "<htmlnode>",
282
+ " <customnode>",
283
+ " </customnode>",
284
+ " <htmlnode>",
285
+ " grandchild1",
286
+ " </htmlnode>",
287
+ "</htmlnode>"
288
+ ])
289
+ assert node.to_html(collapse_empty=False) == expected_html
290
+
291
+ def test_kwargs_pass_down(self):
292
+ node = HTMLNode(children=[
293
+ TestHTMLNode.CustomNode(),
294
+ TestHTMLNode.KwargsReceiverNode()
295
+ ])
296
+ expected_html = "\n".join([
297
+ "<htmlnode>",
298
+ " <customnode></customnode>",
299
+ "Message is 42",
300
+ "</htmlnode>"
301
+ ])
302
+ assert node.to_html(message="Message is 42") == expected_html
303
+
304
+ @pytest.mark.parametrize("raw, sanitized", [
305
+ ('<div>text</div>', "&lt;div&gt;text&lt;&sol;div&gt;"),
306
+ ('\"Yes?\" > \'No!\'', "&quot;Yes?&quot; &gt; &apos;No!&apos;"),
307
+ ('Yes &\nNo', "Yes &<br>No"),
308
+ ])
309
+ def test_sanitize_raw_text(self, raw, sanitized):
310
+ node = HTMLNode(children=[RawText(raw)])
311
+ expected_html = "\n".join([
312
+ "<htmlnode>",
313
+ f" {sanitized}",
314
+ "</htmlnode>"
315
+ ])
316
+ assert node.to_html() == expected_html
317
+ assert node.to_html(replace_all_entities=False) == expected_html
318
+
319
+ @pytest.mark.parametrize("raw, sanitized", [
320
+ ('<div>text</div>',
321
+ "&lt;div&gt;text&lt;&sol;div&gt;"),
322
+ ('\"Yes?\" > \'No!\'',
323
+ "&quot;Yes&quest;&quot; &gt; &apos;No&excl;&apos;"),
324
+ ('Yes &\nNo',
325
+ "Yes &amp;<br>No"),
326
+ ])
327
+ def test_sanitize_all_entities_in_raw_text(self, raw, sanitized):
328
+ node = HTMLNode(children=[RawText(raw)])
329
+ expected_html = "\n".join([
330
+ "<htmlnode>",
331
+ f" {sanitized}",
332
+ "</htmlnode>"
333
+ ])
334
+ assert node.to_html(replace_all_entities=True) == expected_html
@@ -0,0 +1,11 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
@@ -0,0 +1,105 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
12
+
13
+ import pytest
14
+ from webwidgets.utility.sanitizing import HTML_ENTITIES, \
15
+ CHAR_TO_HTML_ENTITIES, sanitize_html_text
16
+
17
+
18
+ class TestSanitizingHTMLText:
19
+ def test_no_empty_html_entities(self):
20
+ assert all(e for _, e in CHAR_TO_HTML_ENTITIES.items())
21
+
22
+ @pytest.mark.parametrize("name", [
23
+ 'amp;', 'lt;', 'gt;', 'semi;', 'sol;', 'apos;', 'quot;'
24
+ ])
25
+ def test_html_entity_names(self, name):
26
+ assert name in HTML_ENTITIES
27
+
28
+ def test_html_entities_inverted(self):
29
+ assert set(CHAR_TO_HTML_ENTITIES['&']) == set((
30
+ 'amp;', 'AMP', 'amp', 'AMP;'))
31
+ assert CHAR_TO_HTML_ENTITIES['&'][0] == 'amp;'
32
+ assert set(CHAR_TO_HTML_ENTITIES['>']) == set((
33
+ 'gt;', 'GT', 'gt', 'GT;'))
34
+ assert CHAR_TO_HTML_ENTITIES['>'][0] == 'gt;'
35
+ assert CHAR_TO_HTML_ENTITIES['\u0391'] == ('Alpha;',)
36
+
37
+ @pytest.mark.parametrize("html_entity", [
38
+ '&AMP', '&lt;', '&gt;', '&sol;'
39
+ ])
40
+ def test_sanitize_html_text(self, html_entity):
41
+ text = '<div>Some text &{} and more</div>'.format(html_entity)
42
+ expected_text_partial = '&lt;div&gt;Some text &{} and more&lt;&sol;div&gt;'.format(
43
+ html_entity)
44
+ assert sanitize_html_text(text) == expected_text_partial
45
+ expected_text_full = '&lt;div&gt;Some text &amp;{} and more&lt;&sol;div&gt;'.format(
46
+ html_entity)
47
+ assert sanitize_html_text(
48
+ text, replace_all_entities=True) == expected_text_full
49
+
50
+ def test_sanitize_double_delimiting_characters(self):
51
+ text = "&&copy &&copy; &copy;; copy;;"
52
+ expected = "&amp;&copy &amp;&copy; &copy;&semi; copy&semi;&semi;"
53
+ assert sanitize_html_text(text, replace_all_entities=True) == expected
54
+
55
+ def test_sanitize_missing_ampersand(self):
56
+ text = "copy; lt; gt;"
57
+ expected = "copy&semi; lt&semi; gt&semi;"
58
+ assert sanitize_html_text(text, replace_all_entities=True) == expected
59
+
60
+ @pytest.mark.parametrize("text, expected", [
61
+ ("Some text abcdefghijklmnopqrstuvwxyz",
62
+ "Some text abcdefghijklmnopqrstuvwxyz"),
63
+ ("0123456789.!?#",
64
+ "0123456789.!?#"),
65
+ ("& &; &aamp; &amp &amp; &AMP;",
66
+ "& &; &aamp; &amp &amp; &AMP;"),
67
+ ("&sool; &sol;/",
68
+ "&sool; &sol;&sol;"),
69
+ ('<div>Some text &sol;</div>',
70
+ '&lt;div&gt;Some text &sol;&lt;&sol;div&gt;'),
71
+ ('Some text\nand more',
72
+ 'Some text<br>and more'),
73
+ ('<p>&nbsp;</p>',
74
+ '&lt;p&gt;&nbsp;&lt;&sol;p&gt;'),
75
+ ("This 'quote' is not \"there\".",
76
+ "This &apos;quote&apos; is not &quot;there&quot;."),
77
+ ("This is a mix < than 100% & 3/5",
78
+ "This is a mix &lt; than 100% & 3&sol;5")
79
+ ])
80
+ def test_sanitize_html_with_partial_entity_replacement(self, text, expected):
81
+ assert sanitize_html_text(text) == expected
82
+ assert sanitize_html_text(text, replace_all_entities=False) == expected
83
+
84
+ @pytest.mark.parametrize("text, expected", [
85
+ ("Some text abcdefghijklmnopqrstuvwxyz",
86
+ "Some text abcdefghijklmnopqrstuvwxyz"),
87
+ ("0123456789.!?#",
88
+ "0123456789&period;&excl;&quest;&num;"),
89
+ ("& &; &aamp; &amp &amp; &AMP;",
90
+ "&amp; &amp;&semi; &amp;aamp&semi; &amp &amp; &AMP;"),
91
+ ("&sool; &sol;/",
92
+ "&amp;sool&semi; &sol;&sol;"),
93
+ ('<div>Some text &sol;</div>',
94
+ '&lt;div&gt;Some text &sol;&lt;&sol;div&gt;'),
95
+ ('Some text\nand more',
96
+ 'Some text<br>and more'),
97
+ ('<p>&nbsp;</p>',
98
+ '&lt;p&gt;&nbsp;&lt;&sol;p&gt;'),
99
+ ("This 'quote' is not \"there\".",
100
+ "This &apos;quote&apos; is not &quot;there&quot;&period;"),
101
+ ("This is a mix < than 100% & 3/5",
102
+ "This is a mix &lt; than 100&percnt; &amp; 3&sol;5")
103
+ ])
104
+ def test_sanitize_html_with_full_entity_replacement(self, text, expected):
105
+ assert sanitize_html_text(text, replace_all_entities=True) == expected
@@ -0,0 +1,15 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
12
+
13
+ __version__ = "0.1.0" # Dynamically set by build backend
14
+
15
+ from . import compilation
@@ -0,0 +1,13 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
12
+
13
+ from . import html
@@ -0,0 +1,13 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
12
+
13
+ from .html_node import HTMLNode, no_start_tag, no_end_tag, RawText
@@ -0,0 +1,210 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
12
+
13
+ import itertools
14
+ from typing import Any, Dict, List, Union
15
+ from webwidgets.utility.sanitizing import sanitize_html_text
16
+
17
+
18
+ class HTMLNode:
19
+ """Represents an HTML node (for example, a div or a span).
20
+ """
21
+
22
+ one_line: bool = False
23
+
24
+ def __init__(self, children: List['HTMLNode'] = [], attributes: Dict[str, str] = {}):
25
+ """Creates an HTMLNode with optional children and attributes.
26
+
27
+ :param children: List of child HTML nodes. Defaults to an empty list.
28
+ :param attributes: Dictionary of attributes for the node. Defaults to an empty dictionary.
29
+ """
30
+ self.children = children
31
+ self.attributes = attributes
32
+
33
+ def _get_tag_name(self) -> str:
34
+ """Returns the tag name of the HTML node.
35
+
36
+ The tag name of a node object is the name of its class in lowercase.
37
+
38
+ :return: The tag name of the HTML node.
39
+ :rtype: str
40
+ """
41
+ return self.__class__.__name__.lower()
42
+
43
+ def _render_attributes(self) -> str:
44
+ """Renders the attributes of the HTML node into a string that can be added to the start tag.
45
+
46
+ :return: A string containing all attribute key-value pairs separated by spaces.
47
+ :rtype: str
48
+ """
49
+ return ' '.join(
50
+ f'{key}="{value}"' for key, value in self.attributes.items()
51
+ )
52
+
53
+ def add(self, child: 'HTMLNode') -> None:
54
+ """
55
+ Adds a child to the HTML node.
56
+
57
+ :param child: The child to be added.
58
+ """
59
+ self.children.append(child)
60
+
61
+ @property
62
+ def start_tag(self) -> str:
63
+ """Returns the opening tag of the HTML node, including any attributes.
64
+
65
+ :return: A string containing the opening tag of the element with its attributes.
66
+ :rtype: str
67
+ """
68
+ # Rendering attributes
69
+ attributes = self._render_attributes()
70
+ maybe_space = ' ' if attributes else ''
71
+
72
+ # Building start tag
73
+ return f"<{self._get_tag_name()}{maybe_space}{attributes}>"
74
+
75
+ @property
76
+ def end_tag(self) -> str:
77
+ """Returns the closing tag of the HTML node.
78
+
79
+ :return: A string containing the closing tag of the element.
80
+ :rtype: str
81
+ """
82
+ return f"</{self._get_tag_name()}>"
83
+
84
+ def to_html(self, collapse_empty: bool = True,
85
+ indent_size: int = 4, indent_level: int = 0,
86
+ force_one_line: bool = False, return_lines: bool = False,
87
+ **kwargs: Any) -> Union[str, List[str]]:
88
+ """Converts the HTML node into HTML code.
89
+
90
+ :param collapse_empty: If True, collapses empty elements into a single line.
91
+ Defaults to True.
92
+ :type collapse_empty: bool
93
+ :param indent_size: The number of spaces to use for each indentation level.
94
+ :type indent_size: int
95
+ :param indent_level: The current level of indentation in the HTML output.
96
+ :type indent_level: int
97
+ :param force_one_line: If True, forces all child elements to be rendered on a single line without additional
98
+ indentation. Defaults to False.
99
+ :type force_one_line: bool
100
+ :param return_lines: Whether to return the lines of HTML code individually. Defaults to False.
101
+ :type return_lines: bool
102
+ :param **kwargs: Additional keyword arguments to pass down to child elements.
103
+ :type **kwargs: Any
104
+ :return: A string containing the HTML representation of the element if
105
+ `return_lines` is `False` (default), or the list of individual lines
106
+ from that HTML code if `return_lines` is `True`.
107
+ :rtype: str or List[str]
108
+ """
109
+ # Opening the element
110
+ indentation = "" if force_one_line else ' ' * indent_size * indent_level
111
+ html_lines = [indentation + self.start_tag]
112
+
113
+ # If content must be in one line
114
+ if self.one_line or force_one_line or (collapse_empty
115
+ and not self.children):
116
+ html_lines += list(itertools.chain.from_iterable(
117
+ [c.to_html(collapse_empty=collapse_empty,
118
+ indent_level=0, force_one_line=True, return_lines=True,
119
+ **kwargs)
120
+ for c in self.children]))
121
+ html_lines += [self.end_tag]
122
+ html_lines = [''.join(html_lines)] # Flattening the line
123
+
124
+ # If content spans multi-line
125
+ else:
126
+ html_lines += list(itertools.chain.from_iterable(
127
+ [c.to_html(collapse_empty=collapse_empty,
128
+ indent_size=indent_size,
129
+ indent_level=indent_level + 1,
130
+ return_lines=True,
131
+ **kwargs)
132
+ for c in self.children]))
133
+ html_lines += [indentation + self.end_tag]
134
+ html_lines = [l for l in html_lines if any(
135
+ c != ' ' for c in l)] # Trimming empty lines
136
+
137
+ # If return_lines is True, return a list of lines
138
+ if return_lines:
139
+ return html_lines
140
+
141
+ # Otherwise, return a single string
142
+ return '\n'.join(html_lines)
143
+
144
+
145
+ def no_start_tag(cls):
146
+ """Decorator to remove the start tag from an HTMLNode subclass.
147
+
148
+ :param cls: A subclass of HTMLNode whose start tag should be removed.
149
+ :return: The given class with an empty start tag.
150
+ """
151
+ cls.start_tag = property(
152
+ lambda _: '', doc="This element does not have a start tag")
153
+ return cls
154
+
155
+
156
+ def no_end_tag(cls):
157
+ """Decorator to remove the end tag from an HTMLNode subclass.
158
+
159
+ :param cls: A subclass of HTMLNode whose end tag should be removed.
160
+ :return: The given class with an empty end tag.
161
+ """
162
+ cls.end_tag = property(
163
+ lambda _: '', doc="This element does not have an end tag")
164
+ return cls
165
+
166
+
167
+ @no_start_tag
168
+ @no_end_tag
169
+ class RawText(HTMLNode):
170
+ """A raw text node that contains text without any HTML tags."""
171
+
172
+ one_line = True
173
+
174
+ def __init__(self, text: str):
175
+ """Creates a raw text node.
176
+
177
+ :param text: The text content of the node. It will be sanitized in
178
+ :py:meth:`RawText.to_html` before being written into HTML code.
179
+ :type text: str
180
+ """
181
+ super().__init__()
182
+ self.text = text
183
+
184
+ def to_html(self, indent_size: int = 4, indent_level: int = 0,
185
+ return_lines: bool = False, replace_all_entities: bool = False,
186
+ **kwargs: Any) -> Union[str, List[str]]:
187
+ """Converts the raw text node to HTML.
188
+
189
+ The text is sanitized by the :py:func:`sanitize_html_text` function before
190
+ being written into HTML code.
191
+
192
+ :param indent_size: See :py:meth:`HTMLNode.to_html`.
193
+ :type indent_size: int
194
+ :param indent_level: See :py:meth:`HTMLNode.to_html`.
195
+ :type indent_level: int
196
+ :param return_lines: See :py:meth:`HTMLNode.to_html`.
197
+ :type return_lines: bool
198
+ :param replace_all_entities: See :py:func:`sanitize_html_text`.
199
+ :type replace_all_entities: bool
200
+ :param kwargs: Other keyword arguments. These are ignored.
201
+ :type kwargs: Any
202
+ :return: See :py:meth:`HTMLNode.to_html`.
203
+ :rtype: str or List[str]
204
+ """
205
+ sanitized = sanitize_html_text(
206
+ self.text, replace_all_entities=replace_all_entities)
207
+ line = ' ' * indent_size * indent_level + sanitized
208
+ if return_lines:
209
+ return [line]
210
+ return line
@@ -0,0 +1,13 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
12
+
13
+ from .sanitizing import *
@@ -0,0 +1,132 @@
1
+ # =======================================================================
2
+ #
3
+ # This file is part of WebWidgets, a Python package for designing web
4
+ # UIs.
5
+ #
6
+ # You should have received a copy of the MIT License along with
7
+ # WebWidgets. If not, see <https://opensource.org/license/mit>.
8
+ #
9
+ # Copyright(C) 2025, mlaasri
10
+ #
11
+ # =======================================================================
12
+
13
+ from html.entities import html5 as HTML_ENTITIES
14
+ import re
15
+ from typing import Tuple
16
+
17
+
18
+ # Maps characters to their corresponding character references. If a character can be
19
+ # represented by multiple entities, the preferred one is placed first in the tuple.
20
+ # Preference is given to the shortest one with a semicolon, in lowercase if possible
21
+ # (e.g. "&amp;").
22
+ CHAR_TO_HTML_ENTITIES = {v: sorted([
23
+ k for k in HTML_ENTITIES if HTML_ENTITIES[k] == v
24
+ ], key=len) for v in HTML_ENTITIES.values()}
25
+ for _, entities in CHAR_TO_HTML_ENTITIES.items():
26
+ e = next((e for e in entities if ';' in e), entities[0])
27
+ i = entities.index(e.lower() if e.lower() in entities else e)
28
+ entities[i], entities[0] = entities[0], entities[i]
29
+ CHAR_TO_HTML_ENTITIES = {k: tuple(v)
30
+ for k, v in CHAR_TO_HTML_ENTITIES.items()}
31
+
32
+
33
+ # Regular expression mathing all isolated '&' characters that are not part of an
34
+ # HTML entity.
35
+ _REGEX_AMP = re.compile(f"&(?!({'|'.join(HTML_ENTITIES.keys())}))")
36
+
37
+
38
+ # Regular expression matching all isolated ';' characters that are not part of an
39
+ # HTML entity. The expression essentially concatenates one lookbehind per entity.
40
+ _REGEXP_SEMI = re.compile(
41
+ ''.join(f"(?<!&{e.replace(';', '')})"
42
+ for e in HTML_ENTITIES if ';' in e) + ';')
43
+
44
+
45
+ # Entities that are always replaced during sanitization. These are: <, >, /,
46
+ # according to rule 13.1.2.6 of the HTML5 specification, as well as single quotes
47
+ # ', double quotes ", and new line characters '\n'.
48
+ # Source: https://html.spec.whatwg.org/multipage/syntax.html#cdata-rcdata-restrictions
49
+ _ALWAYS_SANITIZED = ("\u003C", "\u003E", "\u002F", "'", "\"", "\n")
50
+
51
+
52
+ # Entities other than new line characters '\n' (which require special treatment)
53
+ # that are always replaced during sanitization.
54
+ _ALWAYS_SANITIZED_BUT_NEW_LINES = tuple(
55
+ e for e in _ALWAYS_SANITIZED if e != '\n')
56
+
57
+
58
+ # Entities other than the ampersand and semicolon (which require special treatment
59
+ # because they are part of other entities) that are replaced by default during
60
+ # sanitization but can also be skipped for speed. This set of entities consists of
61
+ # all remaining entities but the ampersand and semicolon.
62
+ _OPTIONALLY_SANITIZED_BUT_AMP_SEMI = tuple(
63
+ set(CHAR_TO_HTML_ENTITIES.keys()) - set(_ALWAYS_SANITIZED) - set({'&', ';'}))
64
+
65
+
66
+ def replace_html_entities(text: str, characters: Tuple[str]) -> str:
67
+ """Replaces characters with their corresponding HTML entities in the given text.
68
+
69
+ If a character can be represented by multiple entities, preference is given to
70
+ the shortest one that contains a semicolon, in lowercase if possible.
71
+
72
+ :param text: The input text containing HTML entities.
73
+ :type text: str
74
+ :param characters: The characters to be replaced by their HTML entity. Usually
75
+ each item in the tuple is a single character, but some entities span
76
+ multiple characters.
77
+ :type characters: Tuple[str]
78
+ :return: The text with HTML entities replaced.
79
+ :rtype: str
80
+ """
81
+ for c in characters:
82
+ entity = CHAR_TO_HTML_ENTITIES[c][0] # Preferred is first
83
+ text = text.replace(c, '&' + entity)
84
+ return text
85
+
86
+
87
+ def sanitize_html_text(text: str, replace_all_entities: bool = False) -> str:
88
+ """Sanitizes raw HTML text by replacing certain characters with HTML-friendly equivalents.
89
+
90
+ Sanitization affects the following characters:
91
+ - `<`, `/`, and `>`, replaced with their corresponding HTML entities `lt;`,
92
+ `gt;`, and `sol;` according to rule 13.1.2.6 of the HTML5 specification
93
+ (see source:
94
+ https://html.spec.whatwg.org/multipage/syntax.html#cdata-rcdata-restrictions)
95
+ - single quotes `'` and double quotes `"`, replaced with their corresponding
96
+ HTML entities `apos;` and `quot;`
97
+ - new line characters '\\n', replaced with `br` tags
98
+ - if `replace_all_entities` is True, every character that can be represented by
99
+ an HTML entity is replaced with that entity. If a character can be
100
+ represented by multiple entities, preference is given to the shortest one
101
+ that contains a semicolon, in lowercase if possible.
102
+
103
+ See https://html.spec.whatwg.org/multipage/named-characters.html for a list of
104
+ all supported entities.
105
+
106
+ :param text: The raw HTML text that needs sanitization.
107
+ :type text: str
108
+ :param replace_all_entities: Whether to replace every character that can be
109
+ represented by an HTML entity. Use False to skip non-mandatory characters
110
+ and increase speed. Default is False.
111
+ :type replace_all_entities: bool
112
+ :return: The sanitized HTML text.
113
+ :rtype: str
114
+ """
115
+ # We start with all optional HTML entities, which enables us to replace all '&'
116
+ # and ';' before subsequently introducing more of them.
117
+ if replace_all_entities:
118
+
119
+ # Replacing '&' ONLY when not part of an HTML entity itself
120
+ text = _REGEX_AMP.sub('&amp;', text)
121
+
122
+ # Replacing ';' ONLY when not part of an HTML entity itself
123
+ text = _REGEXP_SEMI.sub('&semi;', text)
124
+
125
+ # Replacing the remaining HTML entities
126
+ text = replace_html_entities(text, _OPTIONALLY_SANITIZED_BUT_AMP_SEMI)
127
+
128
+ # Then we replace all mandatory HTML entities
129
+ text = replace_html_entities(text, _ALWAYS_SANITIZED_BUT_NEW_LINES)
130
+ text = text.replace('\n', '<br>') # Has to be last because of < and >
131
+
132
+ return text