thonny-html-highlight 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,83 @@
1
+ Metadata-Version: 2.3
2
+ Name: thonny-html-highlight
3
+ Version: 0.1.0
4
+ Summary: Thonny plugin: syntax highlighting for HTML files
5
+ Author: Geoff Matheson
6
+ Author-email: Geoff Matheson <geoff.matheson@education.vic.gov.au>
7
+ Requires-Dist: thonny>=5.0
8
+ Requires-Python: >=3.13
9
+ Description-Content-Type: text/markdown
10
+
11
+ # thonny-html-highlight
12
+
13
+ Syntax highlighting for HTML files in the [Thonny](https://thonny.org) IDE.
14
+
15
+ By default Thonny treats `.html` and `.htm` files as plain text. This plugin adds colour highlighting for:
16
+
17
+ - Tag names (`div`, `span`, `a`, …)
18
+ - Attribute names (`class`, `href`, `data-value`, …)
19
+ - Attribute values (`"foo"`, `'bar'`, …)
20
+ - HTML comments (`<!-- … -->`)
21
+ - DOCTYPE declarations (`<!DOCTYPE html>`)
22
+ - Entity references (`&amp;`, `&#169;`, `&#xA9;`, …)
23
+ - Angle-bracket punctuation
24
+
25
+ ## Requirements
26
+
27
+ - Thonny 5.0 or later
28
+ - Python 3.13 or later (bundled with Thonny 5)
29
+
30
+ ## Installation
31
+
32
+ ### Via Thonny's built-in package manager (recommended)
33
+
34
+ 1. Open Thonny.
35
+ 2. Go to **Tools → Manage packages…**
36
+ 3. Search for `thonny-html-highlight`.
37
+ 4. Click **Install**.
38
+ 5. Restart Thonny.
39
+
40
+ ### Via pip / uv (command line)
41
+
42
+ ```bash
43
+ # Using pip
44
+ pip install thonny-html-highlight
45
+
46
+ # Using uv
47
+ uv pip install thonny-html-highlight
48
+ ```
49
+
50
+ Then restart Thonny.
51
+
52
+ ### From source (development)
53
+
54
+ ```bash
55
+ git clone https://github.com/mrmatho/thonny-html-highlight.git
56
+ cd thonny-html-highlight
57
+ uv pip install -e .
58
+ ```
59
+
60
+ Restart Thonny after installing.
61
+
62
+ ## Usage
63
+
64
+ Open any `.html` or `.htm` file in Thonny — highlighting is applied automatically. No configuration is required.
65
+
66
+ ## Known limitations
67
+
68
+ - Content inside `<script>` and `<style>` tags is treated as HTML, which may produce incorrect highlighting for JavaScript or CSS within those blocks.
69
+ - Colours follow Thonny's default light theme. Integration with custom syntax themes is planned for a future release.
70
+
71
+ ## Development
72
+
73
+ ```bash
74
+ # Run tests
75
+ uv run pytest
76
+
77
+ # Run tests with verbose output
78
+ uv run pytest -v
79
+ ```
80
+
81
+ ## Licence
82
+
83
+ MIT
@@ -0,0 +1,73 @@
1
+ # thonny-html-highlight
2
+
3
+ Syntax highlighting for HTML files in the [Thonny](https://thonny.org) IDE.
4
+
5
+ By default Thonny treats `.html` and `.htm` files as plain text. This plugin adds colour highlighting for:
6
+
7
+ - Tag names (`div`, `span`, `a`, …)
8
+ - Attribute names (`class`, `href`, `data-value`, …)
9
+ - Attribute values (`"foo"`, `'bar'`, …)
10
+ - HTML comments (`<!-- … -->`)
11
+ - DOCTYPE declarations (`<!DOCTYPE html>`)
12
+ - Entity references (`&amp;`, `&#169;`, `&#xA9;`, …)
13
+ - Angle-bracket punctuation
14
+
15
+ ## Requirements
16
+
17
+ - Thonny 5.0 or later
18
+ - Python 3.13 or later (bundled with Thonny 5)
19
+
20
+ ## Installation
21
+
22
+ ### Via Thonny's built-in package manager (recommended)
23
+
24
+ 1. Open Thonny.
25
+ 2. Go to **Tools → Manage packages…**
26
+ 3. Search for `thonny-html-highlight`.
27
+ 4. Click **Install**.
28
+ 5. Restart Thonny.
29
+
30
+ ### Via pip / uv (command line)
31
+
32
+ ```bash
33
+ # Using pip
34
+ pip install thonny-html-highlight
35
+
36
+ # Using uv
37
+ uv pip install thonny-html-highlight
38
+ ```
39
+
40
+ Then restart Thonny.
41
+
42
+ ### From source (development)
43
+
44
+ ```bash
45
+ git clone https://github.com/mrmatho/thonny-html-highlight.git
46
+ cd thonny-html-highlight
47
+ uv pip install -e .
48
+ ```
49
+
50
+ Restart Thonny after installing.
51
+
52
+ ## Usage
53
+
54
+ Open any `.html` or `.htm` file in Thonny — highlighting is applied automatically. No configuration is required.
55
+
56
+ ## Known limitations
57
+
58
+ - Content inside `<script>` and `<style>` tags is treated as HTML, which may produce incorrect highlighting for JavaScript or CSS within those blocks.
59
+ - Colours follow Thonny's default light theme. Integration with custom syntax themes is planned for a future release.
60
+
61
+ ## Development
62
+
63
+ ```bash
64
+ # Run tests
65
+ uv run pytest
66
+
67
+ # Run tests with verbose output
68
+ uv run pytest -v
69
+ ```
70
+
71
+ ## Licence
72
+
73
+ MIT
@@ -0,0 +1,24 @@
1
+ [project]
2
+ name = "thonny-html-highlight"
3
+ version = "0.1.0"
4
+ description = "Thonny plugin: syntax highlighting for HTML files"
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "Geoff Matheson", email = "geoff.matheson@education.vic.gov.au" }
8
+ ]
9
+ requires-python = ">=3.13"
10
+ dependencies = [
11
+ "thonny>=5.0",
12
+ ]
13
+
14
+ [build-system]
15
+ requires = ["uv_build>=0.9.7,<0.10.0"]
16
+ build-backend = "uv_build"
17
+
18
+ [tool.uv.build-backend]
19
+ module-name = "thonnycontrib.html_highlight"
20
+ module-root = "src"
21
+
22
+ [tool.pytest.ini_options]
23
+ testpaths = ["tests"]
24
+ pythonpath = ["src"]
@@ -0,0 +1,112 @@
1
+ """Thonny plugin: HTML syntax highlighting for ``.html`` / ``.htm`` files.
2
+
3
+ When Thonny loads this plugin it calls :func:`load_plugin`, which binds
4
+ event handlers onto the ``EditorCodeViewText`` widget class. On every text
5
+ change in any editor, the handler checks whether the file is HTML (by
6
+ inspecting its path) and, if so, keeps an :class:`.HtmlHighlighter` instance
7
+ alive on the widget and schedules a re-highlight.
8
+
9
+ Setting ``file_type = "html"`` on the widget tells Thonny to use plain-text
10
+ editing behaviour (simple Return, standard Backspace) rather than Python-aware
11
+ behaviour. It also gives future extensions (auto-indentation, tag
12
+ auto-closing) a clean hook point.
13
+
14
+ Widget hierarchy assumed by :func:`_get_editor_path`:
15
+
16
+ ``EditorCodeViewText`` → ``CodeView`` → ``BaseEditor``
17
+
18
+ ``BaseEditor.get_target_path()`` returns the file path or ``None`` for
19
+ untitled buffers.
20
+ """
21
+ from __future__ import annotations
22
+
23
+ import tkinter as tk
24
+
25
+ from .highlighter import HtmlHighlighter
26
+
27
+ _HTML_EXTENSIONS = (".html", ".htm")
28
+
29
+
30
+ # ---------------------------------------------------------------------------
31
+ # Helpers
32
+ # ---------------------------------------------------------------------------
33
+
34
+
35
+ def _get_editor_path(text: tk.Text) -> str | None:
36
+ """Return the file path for *text*'s editor, or ``None`` if unavailable."""
37
+ try:
38
+ editor = text.master.master # CodeView → BaseEditor
39
+ return editor.get_target_path()
40
+ except AttributeError:
41
+ return None
42
+
43
+
44
+ def _is_html_file(text: tk.Text) -> bool:
45
+ """Return ``True`` when *text* is displaying an HTML file."""
46
+ path = _get_editor_path(text)
47
+ return path is not None and path.lower().endswith(_HTML_EXTENSIONS)
48
+
49
+
50
+ # ---------------------------------------------------------------------------
51
+ # Event handlers
52
+ # ---------------------------------------------------------------------------
53
+
54
+
55
+ def _on_text_change(event: tk.Event) -> None:
56
+ """Called on every ``<<TextChange>>`` in any editor text widget."""
57
+ text = event.widget
58
+ if not isinstance(text, tk.Text):
59
+ return
60
+
61
+ if not _is_html_file(text):
62
+ return
63
+
64
+ # Ensure this widget has a highlighter.
65
+ if not hasattr(text, "_html_highlighter"):
66
+ text._html_highlighter = HtmlHighlighter(text)
67
+
68
+ # Keep file_type set to "html" so Thonny uses plain-text editing
69
+ # behaviour. update_file_type() resets this on save/rename, so we
70
+ # re-assert it on each text change.
71
+ if getattr(text, "file_type", None) != "html":
72
+ if hasattr(text, "set_file_type"):
73
+ text.set_file_type("html")
74
+
75
+ text._html_highlighter.schedule_update()
76
+
77
+
78
+ def _on_appearance_change(event: tk.Event) -> None:
79
+ """Called when the user switches Thonny's syntax theme.
80
+
81
+ Reconfigures tag colours and re-highlights every open HTML editor so
82
+ that the new theme takes effect immediately.
83
+ """
84
+ from thonny import get_workbench # noqa: PLC0415 — deferred to avoid import at test time
85
+
86
+ try:
87
+ notebook = get_workbench().get_editor_notebook()
88
+ for editor in notebook.get_all_editors():
89
+ try:
90
+ text = editor._code_view.text
91
+ hl: HtmlHighlighter | None = getattr(text, "_html_highlighter", None)
92
+ if hl is not None:
93
+ hl.configure_tags()
94
+ hl.schedule_update()
95
+ except Exception:
96
+ pass
97
+ except Exception:
98
+ pass
99
+
100
+
101
+ # ---------------------------------------------------------------------------
102
+ # Plugin entry point
103
+ # ---------------------------------------------------------------------------
104
+
105
+
106
+ def load_plugin() -> None:
107
+ """Register the HTML highlighting plugin with Thonny's workbench."""
108
+ from thonny import get_workbench # noqa: PLC0415 — deferred to avoid import at test time
109
+
110
+ wb = get_workbench()
111
+ wb.bind_class("EditorCodeViewText", "<<TextChange>>", _on_text_change, True)
112
+ wb.bind("<<UpdateAppearance>>", _on_appearance_change, True)
@@ -0,0 +1,149 @@
1
+ """Applies HTML syntax highlighting to a Thonny editor text widget.
2
+
3
+ The :class:`HtmlHighlighter` owns all interaction with the tkinter Text
4
+ widget. It reads tokens from :mod:`.tokenizer` and applies named tags so
5
+ that Thonny's theme system can override colours in the future.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import tkinter as tk
10
+ from typing import TYPE_CHECKING, List
11
+
12
+ from .tokenizer import Token, offsets_to_tkindices, tokenize_html
13
+
14
+ if TYPE_CHECKING:
15
+ pass # kept for future type-only imports (e.g. SyntaxText)
16
+
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # Tag configuration
20
+ # ---------------------------------------------------------------------------
21
+
22
+ # Default colours for each token category. These are intentionally chosen
23
+ # to complement Thonny's built-in light theme; a future version will read
24
+ # values from Thonny's syntax-theme API so that dark themes work correctly.
25
+ _TAG_COLORS: dict[str, dict] = {
26
+ "html_comment": {"foreground": "#808080"},
27
+ "html_doctype": {"foreground": "#999999"},
28
+ "html_tag_name": {"foreground": "#000080"},
29
+ "html_attr_name": {"foreground": "#912B6C"},
30
+ "html_attr_value":{"foreground": "#007A00"},
31
+ "html_entity": {"foreground": "#007070"},
32
+ "html_bracket": {"foreground": "#606060"},
33
+ }
34
+
35
+ # Map from tokenizer token type → tkinter tag name.
36
+ _TOKEN_TO_TAG: dict[str, str] = {
37
+ "comment": "html_comment",
38
+ "doctype": "html_doctype",
39
+ "tag_name": "html_tag_name",
40
+ "attr_name": "html_attr_name",
41
+ "attr_value":"html_attr_value",
42
+ "entity": "html_entity",
43
+ "bracket": "html_bracket",
44
+ }
45
+
46
+ ALL_HTML_TAGS: List[str] = list(_TOKEN_TO_TAG.values())
47
+
48
+
49
+ # ---------------------------------------------------------------------------
50
+ # Highlighter
51
+ # ---------------------------------------------------------------------------
52
+
53
+
54
+ class HtmlHighlighter:
55
+ """Manages HTML syntax highlighting for a single editor text widget.
56
+
57
+ One instance is created per text widget the first time an HTML file is
58
+ opened in that widget. It is stored as ``text._html_highlighter`` so
59
+ that it is garbage-collected when the widget is destroyed.
60
+
61
+ Updating is *scheduled* rather than immediate: when :meth:`schedule_update`
62
+ is called (e.g. on every keystroke), it posts a single ``after_idle``
63
+ callback, deduplicating rapid successive calls.
64
+ """
65
+
66
+ def __init__(self, text: tk.Text) -> None:
67
+ self._text = text
68
+ self._update_scheduled = False
69
+ self.configure_tags()
70
+
71
+ # ------------------------------------------------------------------
72
+ # Public interface
73
+ # ------------------------------------------------------------------
74
+
75
+ def configure_tags(self) -> None:
76
+ """Configure tkinter tag appearance.
77
+
78
+ Called once on initialisation and again whenever the Thonny
79
+ appearance changes (``<<UpdateAppearance>>`` event).
80
+
81
+ .. todo::
82
+ Read colours from Thonny's syntax-theme API so that dark
83
+ themes are respected automatically.
84
+ """
85
+ for tag, opts in _TAG_COLORS.items():
86
+ self._text.tag_configure(tag, **opts)
87
+
88
+ # HTML tags must be raised above the default text tag so they
89
+ # are visible. They sit below the selection tag so selections
90
+ # still look normal.
91
+ for tag in ALL_HTML_TAGS:
92
+ try:
93
+ self._text.tag_raise(tag)
94
+ except tk.TclError:
95
+ pass # tag may not exist yet on first call
96
+
97
+ def schedule_update(self) -> None:
98
+ """Schedule a re-highlight to run when the event loop is idle.
99
+
100
+ Multiple calls before the idle callback fires are collapsed into
101
+ one update, preventing redundant work during rapid typing.
102
+ """
103
+ if not self._update_scheduled:
104
+ self._update_scheduled = True
105
+ self._text.after_idle(self._do_update)
106
+
107
+ def update(self) -> None:
108
+ """Re-tokenise and re-highlight the full widget content immediately."""
109
+ self._update_scheduled = False
110
+ self._apply_highlighting()
111
+
112
+ # ------------------------------------------------------------------
113
+ # Private helpers
114
+ # ------------------------------------------------------------------
115
+
116
+ def _do_update(self) -> None:
117
+ """Idle callback — delegates to :meth:`update`."""
118
+ self.update()
119
+
120
+ def _apply_highlighting(self) -> None:
121
+ # Remove all existing HTML tags in one pass before adding new ones.
122
+ for tag in ALL_HTML_TAGS:
123
+ self._text.tag_remove(tag, "1.0", "end")
124
+
125
+ content = self._text.get("1.0", "end-1c")
126
+ if not content:
127
+ return
128
+
129
+ tokens: List[Token] = tokenize_html(content)
130
+ if not tokens:
131
+ return
132
+
133
+ # Collect every start/end offset so we can compute all tkinter
134
+ # indices in a single O(n) pass rather than one pass per token.
135
+ all_offsets = []
136
+ for token in tokens:
137
+ all_offsets.append(token.start)
138
+ all_offsets.append(token.end)
139
+
140
+ index_map = offsets_to_tkindices(content, all_offsets)
141
+
142
+ for token in tokens:
143
+ tag = _TOKEN_TO_TAG.get(token.type)
144
+ if tag is None:
145
+ continue
146
+ start_idx = index_map[token.start]
147
+ end_idx = index_map[token.end]
148
+ if start_idx != end_idx:
149
+ self._text.tag_add(tag, start_idx, end_idx)
@@ -0,0 +1,188 @@
1
+ """HTML tokenizer for syntax highlighting.
2
+
3
+ Contains only pure functions — no tkinter or Thonny imports — so every
4
+ function here can be unit-tested in complete isolation.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import re
9
+ from typing import Dict, List, NamedTuple
10
+
11
+
12
+ class Token(NamedTuple):
13
+ """A single syntax token within an HTML document.
14
+
15
+ Attributes:
16
+ type: One of ``'comment'``, ``'doctype'``, ``'entity'``,
17
+ ``'bracket'``, ``'tag_name'``, ``'attr_name'``,
18
+ ``'attr_value'``.
19
+ start: Inclusive start offset in the source string.
20
+ end: Exclusive end offset in the source string.
21
+ """
22
+
23
+ type: str
24
+ start: int
25
+ end: int
26
+
27
+
28
+ # ---------------------------------------------------------------------------
29
+ # Compiled regular expressions
30
+ # ---------------------------------------------------------------------------
31
+
32
+ # Matches the four top-level HTML constructs.
33
+ #
34
+ # The 'tag' alternative handles both opening and closing tags (including
35
+ # self-closing). Quoted attribute values are matched explicitly so that a
36
+ # '>' character inside a value does not prematurely end the tag match.
37
+ _TOP_LEVEL_RE = re.compile(
38
+ r"(?P<comment><!--[\s\S]*?-->)"
39
+ r"|(?P<doctype><!DOCTYPE[^>]*>)"
40
+ r"|(?P<entity>&(?:[a-zA-Z][a-zA-Z0-9]*|#\d+|#x[0-9a-fA-F]+);)"
41
+ r'|(?P<tag></?[a-zA-Z][a-zA-Z0-9_:-]*(?:"[^"]*"|\'[^\']*\'|[^>])*>)',
42
+ re.DOTALL | re.IGNORECASE,
43
+ )
44
+
45
+ # Parses the internal structure of a single matched tag string.
46
+ _TAG_RE = re.compile(
47
+ r'^<(?P<slash>/?)(?P<name>[a-zA-Z][a-zA-Z0-9_:-]*)'
48
+ r'(?P<attrs>(?:"[^"]*"|\'[^\']*\'|(?!/>)[^>])*)'
49
+ r'(?P<end>/?>)$',
50
+ re.DOTALL,
51
+ )
52
+
53
+ # Finds individual attribute name + optional value within an attrs string.
54
+ # The hyphen is placed at the end of the second character class so it is
55
+ # treated as a literal rather than a range operator.
56
+ _ATTR_RE = re.compile(
57
+ r'(?P<attr_name>[a-zA-Z_:][a-zA-Z0-9_.:-]*)'
58
+ r'(?:\s*=\s*(?P<attr_value>"[^"]*"|\'[^\']*\'|[^\s>]*))?'
59
+ )
60
+
61
+ # Names of the top-level alternatives, in the order they appear in _TOP_LEVEL_RE.
62
+ _TOP_LEVEL_GROUPS = ("comment", "doctype", "entity", "tag")
63
+
64
+
65
+ # ---------------------------------------------------------------------------
66
+ # Public API
67
+ # ---------------------------------------------------------------------------
68
+
69
+
70
+ def tokenize_html(text: str) -> List[Token]:
71
+ """Return a list of :class:`Token` objects for *text*, sorted by ``start``.
72
+
73
+ Tokens cover HTML comments, DOCTYPE declarations, entity references, tag
74
+ brackets, tag names, attribute names, and attribute values.
75
+
76
+ The function is intentionally lenient: malformed or incomplete HTML
77
+ produces fewer tokens rather than raising an exception.
78
+
79
+ .. note::
80
+ Content inside ``<script>`` and ``<style>`` tags is currently treated
81
+ as plain HTML. Full script/style awareness is planned for a future
82
+ version.
83
+ """
84
+ tokens: List[Token] = []
85
+
86
+ for match in _TOP_LEVEL_RE.finditer(text):
87
+ group = _first_matched_group(match, _TOP_LEVEL_GROUPS)
88
+
89
+ if group == "comment":
90
+ tokens.append(Token("comment", match.start(), match.end()))
91
+
92
+ elif group == "doctype":
93
+ tokens.append(Token("doctype", match.start(), match.end()))
94
+
95
+ elif group == "entity":
96
+ tokens.append(Token("entity", match.start(), match.end()))
97
+
98
+ elif group == "tag":
99
+ _tokenize_tag(text, match.start(), match.end(), tokens)
100
+
101
+ tokens.sort(key=lambda t: t.start)
102
+ return tokens
103
+
104
+
105
+ def offsets_to_tkindices(text: str, offsets: List[int]) -> Dict[int, str]:
106
+ """Convert character offsets into tkinter ``"line.col"`` index strings.
107
+
108
+ Returns a :class:`dict` mapping each offset to its index string. Runs
109
+ in *O(n + k)* where *n* is ``len(text)`` and *k* is ``len(offsets)``.
110
+
111
+ Tkinter indices are 1-based for lines and 0-based for columns:
112
+ offset 0 → ``"1.0"``, the position *before* the first character.
113
+ """
114
+ if not offsets:
115
+ return {}
116
+
117
+ sorted_offsets = sorted(set(offsets))
118
+ result: Dict[int, str] = {}
119
+ line, col, pos = 1, 0, 0
120
+
121
+ for target in sorted_offsets:
122
+ while pos < target:
123
+ if text[pos] == "\n":
124
+ line += 1
125
+ col = 0
126
+ else:
127
+ col += 1
128
+ pos += 1
129
+ result[target] = f"{line}.{col}"
130
+
131
+ return result
132
+
133
+
134
+ # ---------------------------------------------------------------------------
135
+ # Private helpers
136
+ # ---------------------------------------------------------------------------
137
+
138
+
139
+ def _first_matched_group(match: re.Match, names: tuple) -> str | None:
140
+ """Return the first name in *names* whose group actually matched."""
141
+ for name in names:
142
+ if match.group(name) is not None:
143
+ return name
144
+ return None
145
+
146
+
147
+ def _tokenize_tag(
148
+ text: str, start: int, end: int, tokens: List[Token]
149
+ ) -> None:
150
+ """Append bracket/tag_name/attr_name/attr_value tokens for a single tag."""
151
+ tag_str = text[start:end]
152
+ m = _TAG_RE.match(tag_str)
153
+ if m is None:
154
+ # Unrecognised shape — skip rather than crash.
155
+ return
156
+
157
+ slash = m.group("slash")
158
+ name = m.group("name")
159
+ attrs = m.group("attrs")
160
+ closing_bracket = m.group("end")
161
+
162
+ # Opening bracket: '<' (1 char) or '</' (2 chars)
163
+ prefix_len = 2 if slash else 1
164
+ bracket_end = start + prefix_len
165
+ tokens.append(Token("bracket", start, bracket_end))
166
+
167
+ # Tag name
168
+ name_end = bracket_end + len(name)
169
+ tokens.append(Token("tag_name", bracket_end, name_end))
170
+
171
+ # Attributes (offsets relative to start of `attrs` within `text`)
172
+ attrs_offset = name_end
173
+ for attr_m in _ATTR_RE.finditer(attrs):
174
+ tokens.append(Token(
175
+ "attr_name",
176
+ attrs_offset + attr_m.start("attr_name"),
177
+ attrs_offset + attr_m.end("attr_name"),
178
+ ))
179
+ if attr_m.group("attr_value") is not None:
180
+ tokens.append(Token(
181
+ "attr_value",
182
+ attrs_offset + attr_m.start("attr_value"),
183
+ attrs_offset + attr_m.end("attr_value"),
184
+ ))
185
+
186
+ # Closing bracket: '>' or '/>'
187
+ close_start = end - len(closing_bracket)
188
+ tokens.append(Token("bracket", close_start, end))