sphinx-gp-opengraph 0.0.1a10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,228 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+ #poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ #pdm.lock
116
+ #pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ #pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # SageMath parsed files
135
+ *.sage.py
136
+
137
+ # Environments
138
+ .env
139
+ .envrc
140
+ .venv
141
+ env/
142
+ venv/
143
+ ENV/
144
+ env.bak/
145
+ venv.bak/
146
+
147
+ # Spyder project settings
148
+ .spyderproject
149
+ .spyproject
150
+
151
+ # Rope project settings
152
+ .ropeproject
153
+
154
+ # mkdocs documentation
155
+ /site
156
+
157
+ # mypy
158
+ .mypy_cache/
159
+ .dmypy.json
160
+ dmypy.json
161
+
162
+ # Pyre type checker
163
+ .pyre/
164
+
165
+ # pytype static type analyzer
166
+ .pytype/
167
+
168
+ # Cython debug symbols
169
+ cython_debug/
170
+
171
+ # PyCharm
172
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
175
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176
+ #.idea/
177
+
178
+ # Abstra
179
+ # Abstra is an AI-powered process automation framework.
180
+ # Ignore directories containing user credentials, local state, and settings.
181
+ # Learn more at https://abstra.io/docs
182
+ .abstra/
183
+
184
+ # Visual Studio Code
185
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
188
+ # you could uncomment the following to ignore the entire vscode folder
189
+ # .vscode/
190
+
191
+ # Ruff stuff:
192
+ .ruff_cache/
193
+
194
+ # PyPI configuration file
195
+ .pypirc
196
+
197
+ # Cursor
198
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
199
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
200
+ # refer to https://docs.cursor.com/context/ignore-files
201
+ .cursorignore
202
+ .cursorindexingignore
203
+
204
+ # Marimo
205
+ marimo/_static/
206
+ marimo/_lsp/
207
+ __marimo__/
208
+
209
+ # Generated by sphinx_fonts extension (downloaded at build time)
210
+ docs/_static/fonts/
211
+ docs/_static/css/fonts.css
212
+
213
+ # Claude Code
214
+ **/CLAUDE.local.md
215
+ **/CLAUDE.*.md
216
+ **/.claude/settings.local.json
217
+
218
+ # Playwright MCP
219
+ .playwright-mcp/
220
+
221
+ # Repo-local pytest mirror (do not track — validator-only)
222
+ out/
223
+
224
+ # Misc
225
+ .vim/
226
+ *.lprof
227
+ pip-wheel-metadata/
228
+ monkeytype.sqlite3
@@ -0,0 +1,173 @@
1
+ Metadata-Version: 2.4
2
+ Name: sphinx-gp-opengraph
3
+ Version: 0.0.1a10
4
+ Summary: OpenGraph and Twitter meta-tag emission for Sphinx — matplotlib-free
5
+ Project-URL: Repository, https://github.com/git-pull/gp-sphinx
6
+ Author-email: Tony Narlock <tony@git-pull.com>
7
+ License: MIT
8
+ Keywords: documentation,meta,opengraph,social,sphinx
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Framework :: Sphinx
11
+ Classifier: Framework :: Sphinx :: Extension
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Programming Language :: Python :: 3.14
20
+ Classifier: Topic :: Documentation
21
+ Classifier: Topic :: Documentation :: Sphinx
22
+ Classifier: Typing :: Typed
23
+ Requires-Python: <4.0,>=3.10
24
+ Requires-Dist: sphinx>=8.1
25
+ Description-Content-Type: text/markdown
26
+
27
+ # sphinx-gp-opengraph
28
+
29
+ OpenGraph meta-tag emission for Sphinx — a drop-in replacement for
30
+ [`sphinxext-opengraph`](https://github.com/sphinx-doc/sphinxext-opengraph)
31
+ that ships every `ogp_*` config key the upstream supports, minus the
32
+ matplotlib-based social-card generator. No image-rendering dependencies,
33
+ no system-fontconfig surprises.
34
+
35
+ Part of the [gp-sphinx](https://github.com/git-pull/gp-sphinx)
36
+ documentation platform.
37
+
38
+ ## Install
39
+
40
+ ```console
41
+ $ pip install sphinx-gp-opengraph
42
+ ```
43
+
44
+ When you depend on gp-sphinx, this extension is already loaded — see
45
+ [Auto-derived values](#auto-derived-values-when-used-with-gp-sphinx)
46
+ below. The full config-key reference is auto-generated on the
47
+ [package docs page](https://gp-sphinx.git-pull.com/packages/sphinx-gp-opengraph/)
48
+ from the live `app.add_config_value()` registrations.
49
+
50
+ ## Minimum viable conf.py
51
+
52
+ ```python
53
+ extensions = [
54
+ "sphinx_gp_opengraph",
55
+ ]
56
+ ```
57
+
58
+ ```python
59
+ ogp_site_url = "https://example.com/"
60
+ ```
61
+
62
+ ```python
63
+ ogp_image = "_static/og-default.png"
64
+ ```
65
+
66
+ A 1200×630 PNG works on Slack, Facebook, LinkedIn, and X/Twitter
67
+ unfurlers. With these three values set, every page rendered by an
68
+ HTML-family builder gains `og:title`, `og:type`, `og:url`,
69
+ `og:site_name`, `og:description`, `og:image`, and `og:image:alt`. A
70
+ matching `<meta name="description">` is emitted when the page does not
71
+ already define one.
72
+
73
+ ## Auto-derived values when used with gp-sphinx
74
+
75
+ Projects that build through {py:func}`gp_sphinx.config.merge_sphinx_config`
76
+ do not need to set `ogp_site_url`, `ogp_site_name`, or `ogp_image`
77
+ manually. Pass `docs_url=` to `merge_sphinx_config()` and gp-sphinx
78
+ fills all three from that one value. See [the sphinx-gp-opengraph package
79
+ page](../../docs/packages/sphinx-gp-opengraph.md) for the integration story
80
+ and [`configuration.md`](../../docs/configuration.md#from-docs_url)
81
+ for the canonical mapping table.
82
+
83
+ ## Per-page overrides
84
+
85
+ Set front-matter fields to override the site-wide defaults on a single
86
+ page. MyST syntax shown; reST field-list syntax behaves the same way.
87
+
88
+ ```markdown
89
+ ---
90
+ ogp_description_length: 160
91
+ og:image: _static/og/this-page.png
92
+ og:image:alt: A tailored hero for this page
93
+ ---
94
+
95
+ # Page title
96
+
97
+ Body paragraph that becomes og:description.
98
+ ```
99
+
100
+ | Field | Effect |
101
+ | --- | --- |
102
+ | `og:image` | Replace the site-default image for this page |
103
+ | `og:image:alt` | Replace the alt text for this page |
104
+ | `ogp_description_length` | Override the description-length cap for this page |
105
+ | `ogp_disable: true` | Skip OpenGraph emission entirely on this page |
106
+
107
+ Any other `og:*` field-list entry is forwarded to the page head verbatim,
108
+ so `og:type`, `og:audio`, etc. work without code changes.
109
+
110
+ ## Twitter cards
111
+
112
+ sphinx-gp-opengraph does not register a separate `twitter_*` namespace;
113
+ crawlers fall back to `og:*` for most fields. Append explicit Twitter
114
+ markup through `ogp_custom_meta_tags` when you need it:
115
+
116
+ ```python
117
+ ogp_custom_meta_tags = [
118
+ '<meta name="twitter:card" content="summary_large_image" />',
119
+ '<meta property="og:image:width" content="1200" />',
120
+ '<meta property="og:image:height" content="630" />',
121
+ ]
122
+ ```
123
+
124
+ ## Migration from `sphinxext-opengraph`
125
+
126
+ Configuration is drop-in compatible — every `ogp_*` key is registered
127
+ with the same name, type, and default — with one behavioural change:
128
+
129
+ - **`ogp_social_cards` is accepted but ignored.** sphinx-gp-opengraph does not
130
+ bundle the matplotlib-based card generator the upstream ships. Setting
131
+ the value emits one `WARNING` at `config-inited`:
132
+
133
+ ```text
134
+ sphinx-gp-opengraph: ogp_social_cards ignored — sphinx-gp-opengraph ships no card generator
135
+ ```
136
+
137
+ Grep your build log for `ogp_social_cards ignored` to find this
138
+ warning. The replacement workflow lives in the next section.
139
+
140
+ The recommended replacement is one static PNG per page. Drop them under
141
+ `_static/og/` and point the per-page `og:image` field-list entry at
142
+ each one. The downstream UX is the same as upstream's auto-generated
143
+ cards — just explicit, and with no build-time dependency on matplotlib
144
+ or PIL.
145
+
146
+ ```text
147
+ docs/
148
+ ├── _static/
149
+ │ └── og/
150
+ │ ├── default.png
151
+ │ ├── quickstart.png
152
+ │ └── reference.png
153
+ ├── quickstart.md
154
+ └── reference.md
155
+ ```
156
+
157
+ ```markdown
158
+ ---
159
+ og:image: _static/og/quickstart.png
160
+ ---
161
+
162
+ # Quickstart
163
+ ```
164
+
165
+ ## See also
166
+
167
+ - [sphinx-gp-sitemap](https://github.com/git-pull/gp-sphinx/tree/main/packages/sphinx-gp-sitemap)
168
+ — companion package for `sitemap.xml` emission
169
+ - [gp-sphinx](https://github.com/git-pull/gp-sphinx) — the umbrella
170
+ docs platform; auto-derives `ogp_site_url`, `ogp_site_name`, and
171
+ `ogp_image` from a single `docs_url` argument
172
+ - [sphinx-gp-opengraph package page](https://gp-sphinx.git-pull.com/packages/sphinx-gp-opengraph/)
173
+ — integration story, event hooks, and how-it-works
@@ -0,0 +1,147 @@
1
+ # sphinx-gp-opengraph
2
+
3
+ OpenGraph meta-tag emission for Sphinx — a drop-in replacement for
4
+ [`sphinxext-opengraph`](https://github.com/sphinx-doc/sphinxext-opengraph)
5
+ that ships every `ogp_*` config key the upstream supports, minus the
6
+ matplotlib-based social-card generator. No image-rendering dependencies,
7
+ no system-fontconfig surprises.
8
+
9
+ Part of the [gp-sphinx](https://github.com/git-pull/gp-sphinx)
10
+ documentation platform.
11
+
12
+ ## Install
13
+
14
+ ```console
15
+ $ pip install sphinx-gp-opengraph
16
+ ```
17
+
18
+ When you depend on gp-sphinx, this extension is already loaded — see
19
+ [Auto-derived values](#auto-derived-values-when-used-with-gp-sphinx)
20
+ below. The full config-key reference is auto-generated on the
21
+ [package docs page](https://gp-sphinx.git-pull.com/packages/sphinx-gp-opengraph/)
22
+ from the live `app.add_config_value()` registrations.
23
+
24
+ ## Minimum viable conf.py
25
+
26
+ ```python
27
+ extensions = [
28
+ "sphinx_gp_opengraph",
29
+ ]
30
+ ```
31
+
32
+ ```python
33
+ ogp_site_url = "https://example.com/"
34
+ ```
35
+
36
+ ```python
37
+ ogp_image = "_static/og-default.png"
38
+ ```
39
+
40
+ A 1200×630 PNG works on Slack, Facebook, LinkedIn, and X/Twitter
41
+ unfurlers. With these three values set, every page rendered by an
42
+ HTML-family builder gains `og:title`, `og:type`, `og:url`,
43
+ `og:site_name`, `og:description`, `og:image`, and `og:image:alt`. A
44
+ matching `<meta name="description">` is emitted when the page does not
45
+ already define one.
46
+
47
+ ## Auto-derived values when used with gp-sphinx
48
+
49
+ Projects that build through {py:func}`gp_sphinx.config.merge_sphinx_config`
50
+ do not need to set `ogp_site_url`, `ogp_site_name`, or `ogp_image`
51
+ manually. Pass `docs_url=` to `merge_sphinx_config()` and gp-sphinx
52
+ fills all three from that one value. See [the sphinx-gp-opengraph package
53
+ page](../../docs/packages/sphinx-gp-opengraph.md) for the integration story
54
+ and [`configuration.md`](../../docs/configuration.md#from-docs_url)
55
+ for the canonical mapping table.
56
+
57
+ ## Per-page overrides
58
+
59
+ Set front-matter fields to override the site-wide defaults on a single
60
+ page. MyST syntax shown; reST field-list syntax behaves the same way.
61
+
62
+ ```markdown
63
+ ---
64
+ ogp_description_length: 160
65
+ og:image: _static/og/this-page.png
66
+ og:image:alt: A tailored hero for this page
67
+ ---
68
+
69
+ # Page title
70
+
71
+ Body paragraph that becomes og:description.
72
+ ```
73
+
74
+ | Field | Effect |
75
+ | --- | --- |
76
+ | `og:image` | Replace the site-default image for this page |
77
+ | `og:image:alt` | Replace the alt text for this page |
78
+ | `ogp_description_length` | Override the description-length cap for this page |
79
+ | `ogp_disable: true` | Skip OpenGraph emission entirely on this page |
80
+
81
+ Any other `og:*` field-list entry is forwarded to the page head verbatim,
82
+ so `og:type`, `og:audio`, etc. work without code changes.
83
+
84
+ ## Twitter cards
85
+
86
+ sphinx-gp-opengraph does not register a separate `twitter_*` namespace;
87
+ crawlers fall back to `og:*` for most fields. Append explicit Twitter
88
+ markup through `ogp_custom_meta_tags` when you need it:
89
+
90
+ ```python
91
+ ogp_custom_meta_tags = [
92
+ '<meta name="twitter:card" content="summary_large_image" />',
93
+ '<meta property="og:image:width" content="1200" />',
94
+ '<meta property="og:image:height" content="630" />',
95
+ ]
96
+ ```
97
+
98
+ ## Migration from `sphinxext-opengraph`
99
+
100
+ Configuration is drop-in compatible — every `ogp_*` key is registered
101
+ with the same name, type, and default — with one behavioural change:
102
+
103
+ - **`ogp_social_cards` is accepted but ignored.** sphinx-gp-opengraph does not
104
+ bundle the matplotlib-based card generator the upstream ships. Setting
105
+ the value emits one `WARNING` at `config-inited`:
106
+
107
+ ```text
108
+ sphinx-gp-opengraph: ogp_social_cards ignored — sphinx-gp-opengraph ships no card generator
109
+ ```
110
+
111
+ Grep your build log for `ogp_social_cards ignored` to find this
112
+ warning. The replacement workflow lives in the next section.
113
+
114
+ The recommended replacement is one static PNG per page. Drop them under
115
+ `_static/og/` and point the per-page `og:image` field-list entry at
116
+ each one. The downstream UX is the same as upstream's auto-generated
117
+ cards — just explicit, and with no build-time dependency on matplotlib
118
+ or PIL.
119
+
120
+ ```text
121
+ docs/
122
+ ├── _static/
123
+ │ └── og/
124
+ │ ├── default.png
125
+ │ ├── quickstart.png
126
+ │ └── reference.png
127
+ ├── quickstart.md
128
+ └── reference.md
129
+ ```
130
+
131
+ ```markdown
132
+ ---
133
+ og:image: _static/og/quickstart.png
134
+ ---
135
+
136
+ # Quickstart
137
+ ```
138
+
139
+ ## See also
140
+
141
+ - [sphinx-gp-sitemap](https://github.com/git-pull/gp-sphinx/tree/main/packages/sphinx-gp-sitemap)
142
+ — companion package for `sitemap.xml` emission
143
+ - [gp-sphinx](https://github.com/git-pull/gp-sphinx) — the umbrella
144
+ docs platform; auto-derives `ogp_site_url`, `ogp_site_name`, and
145
+ `ogp_image` from a single `docs_url` argument
146
+ - [sphinx-gp-opengraph package page](https://gp-sphinx.git-pull.com/packages/sphinx-gp-opengraph/)
147
+ — integration story, event hooks, and how-it-works
@@ -0,0 +1,40 @@
1
+ [project]
2
+ name = "sphinx-gp-opengraph"
3
+ version = "0.0.1a10"
4
+ description = "OpenGraph and Twitter meta-tag emission for Sphinx — matplotlib-free"
5
+ requires-python = ">=3.10,<4.0"
6
+ authors = [
7
+ {name = "Tony Narlock", email = "tony@git-pull.com"}
8
+ ]
9
+ license = { text = "MIT" }
10
+ classifiers = [
11
+ "Development Status :: 3 - Alpha",
12
+ "License :: OSI Approved :: MIT License",
13
+ "Framework :: Sphinx",
14
+ "Framework :: Sphinx :: Extension",
15
+ "Intended Audience :: Developers",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.10",
18
+ "Programming Language :: Python :: 3.11",
19
+ "Programming Language :: Python :: 3.12",
20
+ "Programming Language :: Python :: 3.13",
21
+ "Programming Language :: Python :: 3.14",
22
+ "Topic :: Documentation",
23
+ "Topic :: Documentation :: Sphinx",
24
+ "Typing :: Typed",
25
+ ]
26
+ readme = "README.md"
27
+ keywords = ["sphinx", "opengraph", "meta", "social", "documentation"]
28
+ dependencies = [
29
+ "sphinx>=8.1",
30
+ ]
31
+
32
+ [project.urls]
33
+ Repository = "https://github.com/git-pull/gp-sphinx"
34
+
35
+ [build-system]
36
+ requires = ["hatchling"]
37
+ build-backend = "hatchling.build"
38
+
39
+ [tool.hatch.build.targets.wheel]
40
+ packages = ["src/sphinx_gp_opengraph"]
@@ -0,0 +1,423 @@
1
+ """OpenGraph and Twitter meta-tag emission for Sphinx.
2
+
3
+ Drop-in replacement for ``sphinxext-opengraph`` with the same ``ogp_*``
4
+ configuration surface, minus the matplotlib-based social-card generator.
5
+ The ``ogp_social_cards`` config value is still accepted (so existing
6
+ ``conf.py`` files do not error), but setting it emits a one-line warning
7
+ directing users to the static-image alternative.
8
+
9
+ The ``setup()`` registers every ``ogp_*`` config value and connects the
10
+ ``html-page-context`` hook that emits OpenGraph and Twitter ``<meta>``
11
+ tags alongside an optional ``<meta name="description">``.
12
+
13
+ Examples
14
+ --------
15
+ >>> from sphinx_gp_opengraph import setup
16
+ >>> callable(setup)
17
+ True
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import html
23
+ import logging
24
+ import os
25
+ import pathlib
26
+ import types
27
+ import typing as t
28
+ import urllib.parse
29
+
30
+ from docutils import nodes
31
+ from sphinx.application import Sphinx
32
+
33
+ from sphinx_gp_opengraph._description import get_description
34
+ from sphinx_gp_opengraph._meta import get_meta_description
35
+ from sphinx_gp_opengraph._title import get_title
36
+
37
+ if t.TYPE_CHECKING:
38
+ from sphinx.builders import Builder
39
+ from sphinx.config import Config
40
+ from sphinx.util.typing import ExtensionMetadata
41
+
42
+ logger = logging.getLogger(__name__)
43
+ logger.addHandler(logging.NullHandler())
44
+
45
+ _EXTENSION_VERSION = "0.0.1a10"
46
+
47
+ DEFAULT_DESCRIPTION_LENGTH = 200
48
+
49
+ # A selection from
50
+ # https://www.iana.org/assignments/media-types/media-types.xhtml#image
51
+ IMAGE_MIME_TYPES: frozenset[str] = frozenset(
52
+ {"gif", "apng", "webp", "jpeg", "jpg", "png", "bmp", "heic", "heif", "tiff"},
53
+ )
54
+
55
+ __all__ = [
56
+ "DEFAULT_DESCRIPTION_LENGTH",
57
+ "IMAGE_MIME_TYPES",
58
+ "setup",
59
+ ]
60
+
61
+
62
+ def html_page_context(
63
+ app: Sphinx,
64
+ pagename: str,
65
+ templatename: str,
66
+ context: dict[str, t.Any],
67
+ doctree: nodes.document,
68
+ ) -> None:
69
+ """Inject OpenGraph / Twitter meta tags into ``context['metatags']``.
70
+
71
+ Skipped for the ``epub`` builder and for pages without a resolved
72
+ doctree (e.g. rendered search indexes).
73
+ """
74
+ del pagename, templatename # sourced from ``context`` when needed
75
+ if app.builder.name == "epub":
76
+ return
77
+ if not doctree:
78
+ return
79
+ context["metatags"] += get_tags(
80
+ context,
81
+ doctree,
82
+ config=app.config,
83
+ builder=app.builder,
84
+ )
85
+
86
+
87
+ def get_tags(
88
+ context: dict[str, t.Any],
89
+ doctree: nodes.document,
90
+ *,
91
+ config: Config,
92
+ builder: Builder,
93
+ ) -> str:
94
+ """Compose the block of ``<meta>`` tags for one page.
95
+
96
+ Parameters
97
+ ----------
98
+ context : dict[str, Any]
99
+ Sphinx HTML page context (provides ``title``, ``pagename``,
100
+ ``meta`` field-list, and existing ``metatags`` string).
101
+ doctree : docutils.nodes.document
102
+ Resolved doctree for the page, walked to extract the description.
103
+ config : sphinx.config.Config
104
+ Project configuration (sources all ``ogp_*`` values).
105
+ builder : sphinx.builders.Builder
106
+ Active HTML-family builder (used for per-page URL resolution).
107
+
108
+ Returns
109
+ -------
110
+ str
111
+ Newline-terminated block of ``<meta>`` tags ready to append to
112
+ ``context['metatags']``. Empty when the page sets
113
+ ``ogp_disable`` in its field list.
114
+ """
115
+ fields: dict[str, t.Any] = context.get("meta") or {}
116
+ if "ogp_disable" in fields:
117
+ return ""
118
+
119
+ tags: dict[str, str] = {}
120
+ meta_tags: dict[str, str] = {} # Non-og <meta name="..."> tags
121
+
122
+ try:
123
+ desc_len = int(
124
+ fields.get("ogp_description_length", config.ogp_description_length),
125
+ )
126
+ except ValueError:
127
+ desc_len = DEFAULT_DESCRIPTION_LENGTH
128
+
129
+ title, title_excluding_html = get_title(context["title"])
130
+ description = get_description(doctree, desc_len, {title, title_excluding_html})
131
+
132
+ tags["og:title"] = title
133
+ tags["og:type"] = config.ogp_type
134
+
135
+ if not config.ogp_site_url and os.getenv("READTHEDOCS"):
136
+ ogp_site_url = _ambient_site_url()
137
+ else:
138
+ ogp_site_url = config.ogp_site_url
139
+
140
+ ogp_canonical_url = config.ogp_canonical_url or ogp_site_url
141
+
142
+ page_url = urllib.parse.urljoin(
143
+ ogp_canonical_url,
144
+ builder.get_target_uri(context["pagename"]),
145
+ )
146
+ tags["og:url"] = page_url
147
+
148
+ site_name = _resolve_site_name(config)
149
+ if site_name:
150
+ tags["og:site_name"] = site_name
151
+
152
+ if description:
153
+ tags["og:description"] = description
154
+ if config.ogp_enable_meta_description and not get_meta_description(
155
+ context["metatags"],
156
+ ):
157
+ meta_tags["description"] = description
158
+
159
+ image_url, ogp_image_alt, ogp_use_first_image = _resolve_image(fields, config)
160
+
161
+ first_image = None
162
+ if ogp_use_first_image:
163
+ found = doctree.next_node(nodes.image)
164
+ if (
165
+ found
166
+ and pathlib.Path(found.get("uri", "")).suffix[1:].lower()
167
+ in IMAGE_MIME_TYPES
168
+ ):
169
+ first_image = found
170
+ image_url = found["uri"]
171
+ ogp_image_alt = found.get("alt")
172
+
173
+ if image_url:
174
+ if "og:image" not in fields:
175
+ image_url_parsed = urllib.parse.urlparse(image_url)
176
+ if not image_url_parsed.scheme:
177
+ root = page_url if first_image else ogp_site_url
178
+ image_url = urllib.parse.urljoin(root, image_url_parsed.path)
179
+ tags["og:image"] = image_url
180
+
181
+ if isinstance(ogp_image_alt, str):
182
+ tags["og:image:alt"] = ogp_image_alt
183
+ elif ogp_image_alt is None and site_name:
184
+ tags["og:image:alt"] = site_name
185
+ elif ogp_image_alt is None and title:
186
+ tags["og:image:alt"] = title
187
+
188
+ fields.pop("og:image:alt", None)
189
+
190
+ # Arbitrary og:* overrides supplied through MyST / field-list frontmatter
191
+ tags.update({k: v for k, v in fields.items() if k.startswith("og:")})
192
+
193
+ return (
194
+ "\n".join(
195
+ [_make_tag(p, c) for p, c in tags.items()]
196
+ + [_make_tag(p, c, "name") for p, c in meta_tags.items()]
197
+ + list(config.ogp_custom_meta_tags),
198
+ )
199
+ + "\n"
200
+ )
201
+
202
+
203
+ def _ambient_site_url() -> str:
204
+ """Derive a site URL from ReadTheDocs env when ``ogp_site_url`` is unset."""
205
+ rtd_canonical_url = os.getenv("READTHEDOCS_CANONICAL_URL")
206
+ if not rtd_canonical_url:
207
+ msg = "ReadTheDocs did not provide a valid canonical URL"
208
+ raise RuntimeError(msg)
209
+ parsed = urllib.parse.urlsplit(rtd_canonical_url)
210
+ return urllib.parse.urlunsplit(
211
+ (parsed.scheme, parsed.netloc, parsed.path, "", ""),
212
+ )
213
+
214
+
215
+ def _resolve_site_name(config: Config) -> str | None:
216
+ """Return the resolved site name or ``None`` when explicitly disabled."""
217
+ if config.ogp_site_name is False:
218
+ return None
219
+ if config.ogp_site_name is None:
220
+ return t.cast("str", config.project)
221
+ return t.cast("str", config.ogp_site_name)
222
+
223
+
224
+ def _resolve_image(
225
+ fields: dict[str, t.Any],
226
+ config: Config,
227
+ ) -> tuple[str | None, str | bool | None, bool]:
228
+ """Return (image_url, alt_text, use_first_image) for this page.
229
+
230
+ Per-page field-list ``og:image`` wins; otherwise fall back to
231
+ ``config.ogp_image`` / ``config.ogp_use_first_image``.
232
+ """
233
+ if "og:image" in fields:
234
+ image_url: str | None = fields["og:image"]
235
+ ogp_use_first_image = False
236
+ ogp_image_alt: str | bool | None = fields.get("og:image:alt")
237
+ fields.pop("og:image", None)
238
+ else:
239
+ image_url = config.ogp_image
240
+ ogp_use_first_image = bool(config.ogp_use_first_image)
241
+ ogp_image_alt = fields.get("og:image:alt", config.ogp_image_alt)
242
+ return image_url, ogp_image_alt, ogp_use_first_image
243
+
244
+
245
+ def _make_tag(
246
+ property_: str,
247
+ content: str,
248
+ attr: t.Literal["property", "name"] = "property",
249
+ ) -> str:
250
+ """Render one ``<meta>`` tag, HTML-escaping ``&``, ``<``, ``>``, and quotes.
251
+
252
+ Centralising the escape here is the boundary that keeps every meta tag
253
+ safe — titles, site names, descriptions, image alts, and custom
254
+ field-list values all flow through this function. Per-source escaping
255
+ (e.g. pre-escaping the description) would either leave other paths
256
+ unsafe or double-escape (``&`` → ``&amp;`` → ``&amp;amp;``).
257
+ """
258
+ safe_content = html.escape(content, quote=True)
259
+ return f'<meta {attr}="{property_}" content="{safe_content}" />'
260
+
261
+
262
+ def _warn_if_social_cards_used(app: Sphinx, config: Config) -> None:
263
+ """Emit a one-line deprecation warning when ``ogp_social_cards`` is set.
264
+
265
+ sphinx-gp-opengraph deliberately omits the matplotlib-based card generator
266
+ upstream ships. The ``ogp_social_cards`` config value remains
267
+ registered so existing ``conf.py`` files do not error — but its value
268
+ is ignored. Users who want per-page social preview images should
269
+ provide static PNGs and point ``ogp_image`` (plus per-page
270
+ ``og:image`` frontmatter) at them.
271
+ """
272
+ del app # unused; required by Sphinx's config-inited signature
273
+ if config.ogp_social_cards:
274
+ logger.warning(
275
+ "sphinx-gp-opengraph: ogp_social_cards ignored — "
276
+ "sphinx-gp-opengraph ships no card generator",
277
+ )
278
+
279
+
280
+ def setup(app: Sphinx) -> ExtensionMetadata:
281
+ """Register config values and connect the html-page-context hook.
282
+
283
+ Parameters
284
+ ----------
285
+ app : Sphinx
286
+ Sphinx application instance.
287
+
288
+ Returns
289
+ -------
290
+ ExtensionMetadata
291
+ Extension metadata — version and parallel-build flags.
292
+
293
+ Examples
294
+ --------
295
+ >>> from sphinx_gp_opengraph import setup
296
+ >>> callable(setup)
297
+ True
298
+ """
299
+ # ogp_site_url="" allows relative URLs by default. Not officially
300
+ # supported by OGP but matches upstream sphinxext-opengraph.
301
+ app.add_config_value(
302
+ "ogp_site_url",
303
+ "",
304
+ "html",
305
+ types=frozenset({str}),
306
+ description=(
307
+ "Site base URL joined with each page's relative path to form "
308
+ "``og:url``. Required for absolute URLs; auto-derived from "
309
+ "``docs_url`` under gp-sphinx."
310
+ ),
311
+ )
312
+ app.add_config_value(
313
+ "ogp_canonical_url",
314
+ "",
315
+ "html",
316
+ types=frozenset({str}),
317
+ description=(
318
+ "Separate canonical URL used to build ``og:url``; falls back "
319
+ "to ``ogp_site_url`` when empty."
320
+ ),
321
+ )
322
+ app.add_config_value(
323
+ "ogp_description_length",
324
+ DEFAULT_DESCRIPTION_LENGTH,
325
+ "html",
326
+ types=frozenset({int}),
327
+ description=(
328
+ "Truncation cap (characters) applied to ``og:description`` "
329
+ "after extracting the first body paragraph."
330
+ ),
331
+ )
332
+ app.add_config_value(
333
+ "ogp_image",
334
+ None,
335
+ "html",
336
+ types=frozenset({str, types.NoneType}),
337
+ description=(
338
+ "Site-default OpenGraph image path or absolute URL. "
339
+ "Auto-derived from ``docs_url`` under gp-sphinx; per-page "
340
+ "``og:image`` front-matter overrides."
341
+ ),
342
+ )
343
+ app.add_config_value(
344
+ "ogp_image_alt",
345
+ None,
346
+ "html",
347
+ types=frozenset({str, bool, types.NoneType}),
348
+ description=(
349
+ "Alt text for ``ogp_image``. Falls back to ``og:site_name`` "
350
+ "then ``og:title``; ``False`` suppresses the alt tag entirely."
351
+ ),
352
+ )
353
+ app.add_config_value(
354
+ "ogp_use_first_image",
355
+ False,
356
+ "html",
357
+ types=frozenset({bool}),
358
+ description=(
359
+ "When ``True`` and no per-page override is set, use the "
360
+ "first in-page image as ``og:image``."
361
+ ),
362
+ )
363
+ app.add_config_value(
364
+ "ogp_type",
365
+ "website",
366
+ "html",
367
+ types=frozenset({str}),
368
+ description="Value emitted as the ``og:type`` tag.",
369
+ )
370
+ app.add_config_value(
371
+ "ogp_site_name",
372
+ None,
373
+ "html",
374
+ types=frozenset({str, bool, types.NoneType}),
375
+ description=(
376
+ "Value emitted as ``og:site_name``. Defaults to the Sphinx "
377
+ "``project`` name; ``False`` suppresses the tag."
378
+ ),
379
+ )
380
+ # Accepted-but-ignored: warned about in _warn_if_social_cards_used.
381
+ app.add_config_value(
382
+ "ogp_social_cards",
383
+ None,
384
+ "html",
385
+ types=frozenset({dict, types.NoneType}),
386
+ description=(
387
+ "Accepted-but-ignored compatibility shim for upstream "
388
+ "``sphinxext-opengraph``. Setting any value emits a one-line "
389
+ "WARNING at ``config-inited``; provide a static PNG via "
390
+ "``ogp_image`` or per-page ``og:image`` instead."
391
+ ),
392
+ )
393
+ app.add_config_value(
394
+ "ogp_custom_meta_tags",
395
+ (),
396
+ "html",
397
+ types=frozenset({list, tuple}),
398
+ description=(
399
+ "Raw ``<meta>`` tag strings appended verbatim after the "
400
+ "structured ``og:*`` block — the supported escape hatch for "
401
+ "Twitter card declarations and image-dimension hints."
402
+ ),
403
+ )
404
+ app.add_config_value(
405
+ "ogp_enable_meta_description",
406
+ True,
407
+ "html",
408
+ types=frozenset({bool}),
409
+ description=(
410
+ 'When ``True``, emit a ``<meta name="description">`` '
411
+ "mirroring ``og:description`` unless the page already "
412
+ "defines one."
413
+ ),
414
+ )
415
+
416
+ app.connect("html-page-context", html_page_context)
417
+ app.connect("config-inited", _warn_if_social_cards_used)
418
+
419
+ return {
420
+ "version": _EXTENSION_VERSION,
421
+ "parallel_read_safe": True,
422
+ "parallel_write_safe": True,
423
+ }
@@ -0,0 +1,169 @@
1
+ """Extract a plain-text description from a Sphinx doctree.
2
+
3
+ ``get_description`` walks a resolved doctree and returns the first chunk of
4
+ prose that Sphinx would render as the page's visible body, suitable for
5
+ inclusion in an ``og:description`` meta tag. Admonitions, code blocks, and
6
+ invisible nodes are skipped; nested lists are flattened into comma-joined
7
+ text; the result is truncated to ``description_length`` characters with a
8
+ trailing ellipsis.
9
+
10
+ Ported verbatim from ``sphinxext.opengraph._description_parser`` (v0.13.0).
11
+
12
+ Examples
13
+ --------
14
+ >>> from sphinx_gp_opengraph._description import get_description, DescriptionParser
15
+ >>> callable(get_description)
16
+ True
17
+ >>> issubclass(DescriptionParser, object)
18
+ True
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import string
24
+ import typing as t
25
+
26
+ from docutils import nodes
27
+
28
+ if t.TYPE_CHECKING:
29
+ from collections.abc import Set
30
+
31
+
32
+ def get_description(
33
+ doctree: nodes.document,
34
+ description_length: int,
35
+ known_titles: Set[str] = frozenset(),
36
+ ) -> str:
37
+ """Return a plain-text description extracted from ``doctree``.
38
+
39
+ Parameters
40
+ ----------
41
+ doctree : docutils.nodes.document
42
+ Resolved Sphinx doctree for one page.
43
+ description_length : int
44
+ Maximum number of characters to return.
45
+ known_titles : collections.abc.Set[str]
46
+ Titles to treat as the page title (skipped from the description).
47
+
48
+ Returns
49
+ -------
50
+ str
51
+ Flattened, HTML-escaped description, truncated to
52
+ ``description_length`` with a trailing ``...`` when truncated.
53
+ """
54
+ mcv = DescriptionParser(
55
+ doctree,
56
+ desc_len=description_length,
57
+ known_titles=known_titles,
58
+ )
59
+ doctree.walkabout(mcv)
60
+ return mcv.description
61
+
62
+
63
+ class DescriptionParser(nodes.NodeVisitor):
64
+ """Walk a doctree and accumulate a text description.
65
+
66
+ Skips admonitions, invisible nodes, raw blocks, and literal blocks.
67
+ Titles are separated by colons; list elements by commas; sequential
68
+ lists by periods.
69
+
70
+ Parameters
71
+ ----------
72
+ document : docutils.nodes.document
73
+ The document being walked.
74
+ desc_len : int
75
+ Maximum character count for the resulting description.
76
+ known_titles : collections.abc.Set[str]
77
+ Titles treated as the page title; the first such title encountered
78
+ is skipped.
79
+ """
80
+
81
+ def __init__(
82
+ self,
83
+ document: nodes.document,
84
+ *,
85
+ desc_len: int,
86
+ known_titles: Set[str] = frozenset(),
87
+ ) -> None:
88
+ super().__init__(document)
89
+ self.description = ""
90
+ self.desc_len = desc_len
91
+ self.list_level = 0
92
+ self.known_titles = known_titles
93
+ self.first_title_found = False
94
+
95
+ # Exceptions can't be raised from dispatch_departure()
96
+ # This is used to loop the stop call back to the next dispatch_visit()
97
+ self.stop = False
98
+
99
+ def dispatch_visit(self, node: nodes.Node) -> None:
100
+ """Accumulate text from ``node`` unless it should be skipped."""
101
+ if self.stop:
102
+ raise nodes.StopTraversal
103
+
104
+ # Skip comments & all admonitions
105
+ if isinstance(node, (nodes.Admonition, nodes.Invisible)):
106
+ raise nodes.SkipNode
107
+
108
+ # Mark start of nested lists
109
+ if isinstance(node, nodes.Sequential):
110
+ self.list_level += 1
111
+ if self.list_level > 1:
112
+ self.description += "-"
113
+
114
+ # Skip the first title if it's the title of the page
115
+ if not self.first_title_found and isinstance(node, nodes.title):
116
+ self.first_title_found = True
117
+ if node.astext() in self.known_titles:
118
+ raise nodes.SkipNode
119
+
120
+ if isinstance(node, nodes.raw) or isinstance(node.parent, nodes.literal_block):
121
+ raise nodes.SkipNode
122
+
123
+ # Only include leaf nodes in the description
124
+ if len(node.children) == 0:
125
+ text = node.astext().replace("\r", "").replace("\n", " ").strip()
126
+
127
+ # HTML escaping happens once at the boundary in _make_tag; doing
128
+ # it here too would double-escape (``&`` → ``&amp;`` →
129
+ # ``&amp;amp;``).
130
+
131
+ # Remove double spaces
132
+ while text.find(" ") != -1:
133
+ text = text.replace(" ", " ")
134
+
135
+ # Put a space between elements if one does not already exist.
136
+ if (
137
+ len(self.description) > 0
138
+ and len(text) > 0
139
+ and self.description[-1] not in string.whitespace
140
+ and text[0] not in string.whitespace + string.punctuation
141
+ ):
142
+ self.description += " "
143
+
144
+ self.description += text
145
+
146
+ def dispatch_departure(self, node: nodes.Node) -> None:
147
+ """Emit separators and enforce the length cap when leaving nodes."""
148
+ # Separate title from text
149
+ if isinstance(node, nodes.title):
150
+ self.description += ":"
151
+
152
+ # Separate list elements
153
+ if isinstance(node, nodes.Part):
154
+ self.description += ","
155
+
156
+ # Separate end of list from text
157
+ if isinstance(node, nodes.Sequential):
158
+ if self.description and self.description[-1] == ",":
159
+ self.description = self.description[:-1]
160
+ self.description += "."
161
+ self.list_level -= 1
162
+
163
+ # Check for length
164
+ if len(self.description) > self.desc_len:
165
+ self.description = self.description[: self.desc_len]
166
+ if self.desc_len >= 3:
167
+ self.description = self.description[:-3] + "..."
168
+
169
+ self.stop = True
@@ -0,0 +1,59 @@
1
+ """Detect a pre-existing ``<meta name="description">`` in collected meta tags.
2
+
3
+ Ported verbatim from ``sphinxext.opengraph._meta_parser`` (v0.13.0), with
4
+ a narrowed return type annotation (upstream declared ``bool`` but actually
5
+ returns ``str | bool | None``).
6
+
7
+ Examples
8
+ --------
9
+ >>> from sphinx_gp_opengraph._meta import get_meta_description
10
+ >>> get_meta_description('<meta name="description" content="hello">')
11
+ 'hello'
12
+ >>> get_meta_description('<meta name="other" content="hi">') is None
13
+ True
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import html.parser
19
+
20
+
21
+ def get_meta_description(meta_tags: str) -> str | bool | None:
22
+ """Return the ``content`` of an existing description meta tag, if any.
23
+
24
+ Parameters
25
+ ----------
26
+ meta_tags : str
27
+ Concatenated ``<meta ...>`` tags (as produced by Sphinx).
28
+
29
+ Returns
30
+ -------
31
+ str | bool | None
32
+ The content string when a matching meta tag carries a ``content``
33
+ attribute; ``True`` when a description tag is present but has no
34
+ content attribute; ``None`` otherwise.
35
+ """
36
+ htp = HTMLTextParser()
37
+ htp.feed(meta_tags)
38
+ htp.close()
39
+
40
+ return htp.meta_description
41
+
42
+
43
+ class HTMLTextParser(html.parser.HTMLParser):
44
+ """Flag the presence (and content) of a ``<meta name="description">``."""
45
+
46
+ def __init__(self) -> None:
47
+ super().__init__()
48
+ self.meta_description: str | bool | None = None
49
+
50
+ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
51
+ """Capture the description content when the matching meta opens."""
52
+ # For example:
53
+ # attrs = [("content", "My manual description"), ("name", "description")]
54
+ if ("name", "description") in attrs:
55
+ self.meta_description = True
56
+ for name, value in attrs:
57
+ if name == "content":
58
+ self.meta_description = value
59
+ break
@@ -0,0 +1,94 @@
1
+ """Extract plain text from an HTML-formatted Sphinx title.
2
+
3
+ Ported verbatim from ``sphinxext.opengraph._title_parser`` (v0.13.0).
4
+
5
+ Examples
6
+ --------
7
+ >>> from sphinx_gp_opengraph._title import get_title
8
+ >>> get_title("<em>libtmux</em>-mcp")
9
+ ('libtmux-mcp', '-mcp')
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import html.parser
15
+
16
+
17
+ def get_title(title: str) -> tuple[str, str]:
18
+ """Return ``(all_text, text_outside_tags)`` parsed from a title string.
19
+
20
+ Parameters
21
+ ----------
22
+ title : str
23
+ Title text that may contain HTML markup (e.g. an ``<em>`` span
24
+ added by a Sphinx transform).
25
+
26
+ Returns
27
+ -------
28
+ tuple[str, str]
29
+ Full text (tags stripped) and the subset that appeared outside
30
+ any HTML tag. The second element is used when a title has been
31
+ decorated with visual affordances (icons, wrappers) that should
32
+ be stripped for search-engine metadata.
33
+ """
34
+ htp = HTMLTextParser()
35
+ htp.feed(title)
36
+ htp.close()
37
+
38
+ return htp.text, htp.text_outside_tags
39
+
40
+
41
+ _VOID_ELEMENTS = frozenset(
42
+ {
43
+ "area",
44
+ "base",
45
+ "br",
46
+ "col",
47
+ "embed",
48
+ "hr",
49
+ "img",
50
+ "input",
51
+ "link",
52
+ "meta",
53
+ "param",
54
+ "source",
55
+ "track",
56
+ "wbr",
57
+ },
58
+ )
59
+
60
+
61
+ class HTMLTextParser(html.parser.HTMLParser):
62
+ """Track text-inside-tags vs text-outside-tags while parsing HTML."""
63
+
64
+ def __init__(self) -> None:
65
+ super().__init__()
66
+ # All text found
67
+ self.text = ""
68
+ # Only text outside of html tags
69
+ self.text_outside_tags = ""
70
+ self.level = 0
71
+
72
+ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
73
+ """Increase the tag-nesting level (ignoring void elements)."""
74
+ if tag not in _VOID_ELEMENTS:
75
+ self.level += 1
76
+
77
+ def handle_endtag(self, tag: str) -> None:
78
+ """Decrease the tag-nesting level (ignoring void elements).
79
+
80
+ ``html.parser.HTMLParser`` routes XHTML self-closing forms like
81
+ ``<br/>`` through ``handle_startendtag``, whose default impl
82
+ calls both ``handle_starttag`` and ``handle_endtag``. Filtering
83
+ only the start path would leave the unbalanced end decrement,
84
+ sending ``self.level`` negative and dropping every subsequent
85
+ chunk from ``text_outside_tags``.
86
+ """
87
+ if tag not in _VOID_ELEMENTS:
88
+ self.level -= 1
89
+
90
+ def handle_data(self, data: str) -> None:
91
+ """Accumulate text, tracking whether it fell outside any tag."""
92
+ self.text += data
93
+ if self.level == 0:
94
+ self.text_outside_tags += data