sphinx-gp-llms 0.0.1a24__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sphinx_gp_llms-0.0.1a24/.gitignore +233 -0
- sphinx_gp_llms-0.0.1a24/PKG-INFO +32 -0
- sphinx_gp_llms-0.0.1a24/README.md +6 -0
- sphinx_gp_llms-0.0.1a24/pyproject.toml +43 -0
- sphinx_gp_llms-0.0.1a24/src/sphinx_gp_llms/__init__.py +218 -0
- sphinx_gp_llms-0.0.1a24/src/sphinx_gp_llms/_description.py +87 -0
- sphinx_gp_llms-0.0.1a24/src/sphinx_gp_llms/_docs_json.py +188 -0
- sphinx_gp_llms-0.0.1a24/src/sphinx_gp_llms/_llms_full_txt.py +84 -0
- sphinx_gp_llms-0.0.1a24/src/sphinx_gp_llms/_llms_txt.py +96 -0
- sphinx_gp_llms-0.0.1a24/src/sphinx_gp_llms/_md_twins.py +72 -0
- sphinx_gp_llms-0.0.1a24/src/sphinx_gp_llms/_toctree.py +79 -0
- sphinx_gp_llms-0.0.1a24/src/sphinx_gp_llms/py.typed +0 -0
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
# Node
|
|
2
|
+
node_modules/
|
|
3
|
+
*.tsbuildinfo
|
|
4
|
+
.vitest-cache/
|
|
5
|
+
|
|
6
|
+
# Byte-compiled / optimized / DLL files
|
|
7
|
+
__pycache__/
|
|
8
|
+
*.py[codz]
|
|
9
|
+
*$py.class
|
|
10
|
+
|
|
11
|
+
# C extensions
|
|
12
|
+
*.so
|
|
13
|
+
|
|
14
|
+
# Distribution / packaging
|
|
15
|
+
.Python
|
|
16
|
+
build/
|
|
17
|
+
develop-eggs/
|
|
18
|
+
dist/
|
|
19
|
+
downloads/
|
|
20
|
+
eggs/
|
|
21
|
+
.eggs/
|
|
22
|
+
lib/
|
|
23
|
+
lib64/
|
|
24
|
+
parts/
|
|
25
|
+
sdist/
|
|
26
|
+
var/
|
|
27
|
+
wheels/
|
|
28
|
+
share/python-wheels/
|
|
29
|
+
*.egg-info/
|
|
30
|
+
.installed.cfg
|
|
31
|
+
*.egg
|
|
32
|
+
MANIFEST
|
|
33
|
+
|
|
34
|
+
# PyInstaller
|
|
35
|
+
# Usually these files are written by a python script from a template
|
|
36
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
37
|
+
*.manifest
|
|
38
|
+
*.spec
|
|
39
|
+
|
|
40
|
+
# Installer logs
|
|
41
|
+
pip-log.txt
|
|
42
|
+
pip-delete-this-directory.txt
|
|
43
|
+
|
|
44
|
+
# Unit test / coverage reports
|
|
45
|
+
htmlcov/
|
|
46
|
+
.tox/
|
|
47
|
+
.nox/
|
|
48
|
+
.coverage
|
|
49
|
+
.coverage.*
|
|
50
|
+
.cache
|
|
51
|
+
nosetests.xml
|
|
52
|
+
coverage.xml
|
|
53
|
+
*.cover
|
|
54
|
+
*.py.cover
|
|
55
|
+
.hypothesis/
|
|
56
|
+
.pytest_cache/
|
|
57
|
+
cover/
|
|
58
|
+
|
|
59
|
+
# Translations
|
|
60
|
+
*.mo
|
|
61
|
+
*.pot
|
|
62
|
+
|
|
63
|
+
# Django stuff:
|
|
64
|
+
*.log
|
|
65
|
+
local_settings.py
|
|
66
|
+
db.sqlite3
|
|
67
|
+
db.sqlite3-journal
|
|
68
|
+
|
|
69
|
+
# Flask stuff:
|
|
70
|
+
instance/
|
|
71
|
+
.webassets-cache
|
|
72
|
+
|
|
73
|
+
# Scrapy stuff:
|
|
74
|
+
.scrapy
|
|
75
|
+
|
|
76
|
+
# Sphinx documentation
|
|
77
|
+
docs/_build/
|
|
78
|
+
|
|
79
|
+
# PyBuilder
|
|
80
|
+
.pybuilder/
|
|
81
|
+
target/
|
|
82
|
+
|
|
83
|
+
# Jupyter Notebook
|
|
84
|
+
.ipynb_checkpoints
|
|
85
|
+
|
|
86
|
+
# IPython
|
|
87
|
+
profile_default/
|
|
88
|
+
ipython_config.py
|
|
89
|
+
|
|
90
|
+
# pyenv
|
|
91
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
92
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
93
|
+
# .python-version
|
|
94
|
+
|
|
95
|
+
# pipenv
|
|
96
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
97
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
98
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
99
|
+
# install all needed dependencies.
|
|
100
|
+
#Pipfile.lock
|
|
101
|
+
|
|
102
|
+
# UV
|
|
103
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
104
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
105
|
+
# commonly ignored for libraries.
|
|
106
|
+
#uv.lock
|
|
107
|
+
|
|
108
|
+
# poetry
|
|
109
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
110
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
111
|
+
# commonly ignored for libraries.
|
|
112
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
113
|
+
#poetry.lock
|
|
114
|
+
#poetry.toml
|
|
115
|
+
|
|
116
|
+
# pdm
|
|
117
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
118
|
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
|
119
|
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
|
120
|
+
#pdm.lock
|
|
121
|
+
#pdm.toml
|
|
122
|
+
.pdm-python
|
|
123
|
+
.pdm-build/
|
|
124
|
+
|
|
125
|
+
# pixi
|
|
126
|
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
|
127
|
+
#pixi.lock
|
|
128
|
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
|
129
|
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
|
130
|
+
.pixi
|
|
131
|
+
|
|
132
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
133
|
+
__pypackages__/
|
|
134
|
+
|
|
135
|
+
# Celery stuff
|
|
136
|
+
celerybeat-schedule
|
|
137
|
+
celerybeat.pid
|
|
138
|
+
|
|
139
|
+
# SageMath parsed files
|
|
140
|
+
*.sage.py
|
|
141
|
+
|
|
142
|
+
# Environments
|
|
143
|
+
.env
|
|
144
|
+
.envrc
|
|
145
|
+
.venv
|
|
146
|
+
env/
|
|
147
|
+
venv/
|
|
148
|
+
ENV/
|
|
149
|
+
env.bak/
|
|
150
|
+
venv.bak/
|
|
151
|
+
|
|
152
|
+
# Spyder project settings
|
|
153
|
+
.spyderproject
|
|
154
|
+
.spyproject
|
|
155
|
+
|
|
156
|
+
# Rope project settings
|
|
157
|
+
.ropeproject
|
|
158
|
+
|
|
159
|
+
# mkdocs documentation
|
|
160
|
+
/site
|
|
161
|
+
|
|
162
|
+
# mypy
|
|
163
|
+
.mypy_cache/
|
|
164
|
+
.dmypy.json
|
|
165
|
+
dmypy.json
|
|
166
|
+
|
|
167
|
+
# Pyre type checker
|
|
168
|
+
.pyre/
|
|
169
|
+
|
|
170
|
+
# pytype static type analyzer
|
|
171
|
+
.pytype/
|
|
172
|
+
|
|
173
|
+
# Cython debug symbols
|
|
174
|
+
cython_debug/
|
|
175
|
+
|
|
176
|
+
# PyCharm
|
|
177
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
178
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
179
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
180
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
181
|
+
#.idea/
|
|
182
|
+
|
|
183
|
+
# Abstra
|
|
184
|
+
# Abstra is an AI-powered process automation framework.
|
|
185
|
+
# Ignore directories containing user credentials, local state, and settings.
|
|
186
|
+
# Learn more at https://abstra.io/docs
|
|
187
|
+
.abstra/
|
|
188
|
+
|
|
189
|
+
# Visual Studio Code
|
|
190
|
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
|
191
|
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
|
192
|
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
|
193
|
+
# you could uncomment the following to ignore the entire vscode folder
|
|
194
|
+
# .vscode/
|
|
195
|
+
|
|
196
|
+
# Ruff stuff:
|
|
197
|
+
.ruff_cache/
|
|
198
|
+
|
|
199
|
+
# PyPI configuration file
|
|
200
|
+
.pypirc
|
|
201
|
+
|
|
202
|
+
# Cursor
|
|
203
|
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
|
204
|
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
|
205
|
+
# refer to https://docs.cursor.com/context/ignore-files
|
|
206
|
+
.cursorignore
|
|
207
|
+
.cursorindexingignore
|
|
208
|
+
|
|
209
|
+
# Marimo
|
|
210
|
+
marimo/_static/
|
|
211
|
+
marimo/_lsp/
|
|
212
|
+
__marimo__/
|
|
213
|
+
|
|
214
|
+
# Generated by sphinx_fonts extension (downloaded at build time)
|
|
215
|
+
docs/_static/fonts/
|
|
216
|
+
docs/_static/css/fonts.css
|
|
217
|
+
|
|
218
|
+
# Claude Code
|
|
219
|
+
**/CLAUDE.local.md
|
|
220
|
+
**/CLAUDE.*.md
|
|
221
|
+
**/.claude/settings.local.json
|
|
222
|
+
|
|
223
|
+
# Playwright MCP
|
|
224
|
+
.playwright-mcp/
|
|
225
|
+
|
|
226
|
+
# Repo-local pytest mirror (do not track — validator-only)
|
|
227
|
+
out/
|
|
228
|
+
|
|
229
|
+
# Misc
|
|
230
|
+
.vim/
|
|
231
|
+
*.lprof
|
|
232
|
+
pip-wheel-metadata/
|
|
233
|
+
monkeytype.sqlite3
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sphinx-gp-llms
|
|
3
|
+
Version: 0.0.1a24
|
|
4
|
+
Summary: LLM-friendly documentation outputs for Sphinx — llms.txt, llms-full.txt, docs.json, per-page Markdown
|
|
5
|
+
Project-URL: Repository, https://github.com/git-pull/gp-sphinx
|
|
6
|
+
Author-email: Tony Narlock <tony@git-pull.com>
|
|
7
|
+
License: MIT
|
|
8
|
+
Keywords: ai,documentation,llm,llms-txt,sphinx
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Framework :: Sphinx
|
|
11
|
+
Classifier: Framework :: Sphinx :: Extension
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
20
|
+
Classifier: Topic :: Documentation
|
|
21
|
+
Classifier: Topic :: Documentation :: Sphinx
|
|
22
|
+
Classifier: Typing :: Typed
|
|
23
|
+
Requires-Python: <4.0,>=3.10
|
|
24
|
+
Requires-Dist: sphinx>=8.1
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# sphinx-gp-llms
|
|
28
|
+
|
|
29
|
+
LLM-friendly documentation outputs for Sphinx.
|
|
30
|
+
|
|
31
|
+
Generates `llms.txt`, `llms-full.txt`, `docs.json`, and per-page `.md`
|
|
32
|
+
twin files during the standard HTML build.
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "sphinx-gp-llms"
|
|
3
|
+
version = "0.0.1a24"
|
|
4
|
+
description = "LLM-friendly documentation outputs for Sphinx — llms.txt, llms-full.txt, docs.json, per-page Markdown"
|
|
5
|
+
requires-python = ">=3.10,<4.0"
|
|
6
|
+
authors = [
|
|
7
|
+
{name = "Tony Narlock", email = "tony@git-pull.com"}
|
|
8
|
+
]
|
|
9
|
+
license = { text = "MIT" }
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Development Status :: 3 - Alpha",
|
|
12
|
+
"License :: OSI Approved :: MIT License",
|
|
13
|
+
"Framework :: Sphinx",
|
|
14
|
+
"Framework :: Sphinx :: Extension",
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3.10",
|
|
18
|
+
"Programming Language :: Python :: 3.11",
|
|
19
|
+
"Programming Language :: Python :: 3.12",
|
|
20
|
+
"Programming Language :: Python :: 3.13",
|
|
21
|
+
"Programming Language :: Python :: 3.14",
|
|
22
|
+
"Topic :: Documentation",
|
|
23
|
+
"Topic :: Documentation :: Sphinx",
|
|
24
|
+
"Typing :: Typed",
|
|
25
|
+
]
|
|
26
|
+
readme = "README.md"
|
|
27
|
+
keywords = ["sphinx", "llm", "documentation", "ai", "llms-txt"]
|
|
28
|
+
dependencies = [
|
|
29
|
+
"sphinx>=8.1",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[project.urls]
|
|
33
|
+
Repository = "https://github.com/git-pull/gp-sphinx"
|
|
34
|
+
|
|
35
|
+
[build-system]
|
|
36
|
+
requires = ["hatchling"]
|
|
37
|
+
build-backend = "hatchling.build"
|
|
38
|
+
|
|
39
|
+
[tool.hatch.build.targets.wheel]
|
|
40
|
+
packages = ["src/sphinx_gp_llms"]
|
|
41
|
+
|
|
42
|
+
[tool.gp-sphinx.docs]
|
|
43
|
+
showcase = ["dependents"]
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
"""LLM-friendly documentation outputs for Sphinx.
|
|
2
|
+
|
|
3
|
+
Generates ``llms.txt``, ``llms-full.txt``, ``docs.json``, and per-page
|
|
4
|
+
``.md`` twin files during the standard HTML build, following conventions
|
|
5
|
+
established by llmstxt.org (Jeremy Howard / Answer.AI), Cloudflare
|
|
6
|
+
("Markdown for Agents"), Mintlify, and Lakebed (Ping).
|
|
7
|
+
|
|
8
|
+
The extension hooks into ``build-finished`` to write output files and
|
|
9
|
+
``html-page-context`` to inject footer link variables into the template
|
|
10
|
+
context.
|
|
11
|
+
|
|
12
|
+
Examples
|
|
13
|
+
--------
|
|
14
|
+
>>> from sphinx_gp_llms import setup
|
|
15
|
+
>>> callable(setup)
|
|
16
|
+
True
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import contextlib
|
|
22
|
+
import logging
|
|
23
|
+
import typing as t
|
|
24
|
+
|
|
25
|
+
from sphinx.errors import ExtensionError
|
|
26
|
+
from sphinx.util.logging import getLogger
|
|
27
|
+
|
|
28
|
+
if t.TYPE_CHECKING:
|
|
29
|
+
from docutils import nodes
|
|
30
|
+
from sphinx.application import Sphinx
|
|
31
|
+
from sphinx.util.typing import ExtensionMetadata
|
|
32
|
+
|
|
33
|
+
_EXTENSION_VERSION = "0.0.1a24"
|
|
34
|
+
|
|
35
|
+
logger = getLogger(__name__)
|
|
36
|
+
logging.getLogger(__name__).addHandler(logging.NullHandler())
|
|
37
|
+
|
|
38
|
+
__all__ = ["setup"]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def setup(app: Sphinx) -> ExtensionMetadata:
|
|
42
|
+
"""Register config values and connect build hooks.
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
app : Sphinx
|
|
47
|
+
Sphinx application instance.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
ExtensionMetadata
|
|
52
|
+
Extension metadata with version and parallel-build flags.
|
|
53
|
+
|
|
54
|
+
Examples
|
|
55
|
+
--------
|
|
56
|
+
>>> from sphinx_gp_llms import setup
|
|
57
|
+
>>> callable(setup)
|
|
58
|
+
True
|
|
59
|
+
"""
|
|
60
|
+
app.add_config_value(
|
|
61
|
+
"llms_generate_txt",
|
|
62
|
+
default=True,
|
|
63
|
+
rebuild="",
|
|
64
|
+
types=frozenset({bool}),
|
|
65
|
+
description="Enable llms.txt generation.",
|
|
66
|
+
)
|
|
67
|
+
app.add_config_value(
|
|
68
|
+
"llms_generate_full",
|
|
69
|
+
default=True,
|
|
70
|
+
rebuild="",
|
|
71
|
+
types=frozenset({bool}),
|
|
72
|
+
description="Enable llms-full.txt generation.",
|
|
73
|
+
)
|
|
74
|
+
app.add_config_value(
|
|
75
|
+
"llms_generate_json",
|
|
76
|
+
default=True,
|
|
77
|
+
rebuild="",
|
|
78
|
+
types=frozenset({bool}),
|
|
79
|
+
description="Enable docs.json agent manifest generation.",
|
|
80
|
+
)
|
|
81
|
+
app.add_config_value(
|
|
82
|
+
"llms_generate_md_twins",
|
|
83
|
+
default=True,
|
|
84
|
+
rebuild="",
|
|
85
|
+
types=frozenset({bool}),
|
|
86
|
+
description="Enable per-page .md twin file generation.",
|
|
87
|
+
)
|
|
88
|
+
app.add_config_value(
|
|
89
|
+
"llms_txt_filename",
|
|
90
|
+
default="llms.txt",
|
|
91
|
+
rebuild="",
|
|
92
|
+
types=frozenset({str}),
|
|
93
|
+
description="Output filename for the llms.txt index.",
|
|
94
|
+
)
|
|
95
|
+
app.add_config_value(
|
|
96
|
+
"llms_full_filename",
|
|
97
|
+
default="llms-full.txt",
|
|
98
|
+
rebuild="",
|
|
99
|
+
types=frozenset({str}),
|
|
100
|
+
description="Output filename for the concatenated full-content file.",
|
|
101
|
+
)
|
|
102
|
+
app.add_config_value(
|
|
103
|
+
"llms_json_filename",
|
|
104
|
+
default="docs.json",
|
|
105
|
+
rebuild="",
|
|
106
|
+
types=frozenset({str}),
|
|
107
|
+
description="Output filename for the docs.json agent manifest.",
|
|
108
|
+
)
|
|
109
|
+
app.add_config_value(
|
|
110
|
+
"llms_excludes",
|
|
111
|
+
default=[],
|
|
112
|
+
rebuild="",
|
|
113
|
+
types=frozenset({list}),
|
|
114
|
+
description=(
|
|
115
|
+
"fnmatch patterns matched against each page's relative URL. "
|
|
116
|
+
"Matched pages are excluded from all LLM outputs."
|
|
117
|
+
),
|
|
118
|
+
)
|
|
119
|
+
app.add_config_value(
|
|
120
|
+
"llms_description_length",
|
|
121
|
+
default=200,
|
|
122
|
+
rebuild="",
|
|
123
|
+
types=frozenset({int}),
|
|
124
|
+
description="Maximum character length for page descriptions.",
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
with contextlib.suppress(ExtensionError):
|
|
128
|
+
app.add_config_value(
|
|
129
|
+
"site_url",
|
|
130
|
+
default=None,
|
|
131
|
+
rebuild="",
|
|
132
|
+
types=frozenset({str, type(None)}),
|
|
133
|
+
description=(
|
|
134
|
+
"Site base URL — registered defensively; "
|
|
135
|
+
"sphinx-gp-sitemap usually registers this first."
|
|
136
|
+
),
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
app.connect("build-finished", _write_llm_outputs)
|
|
140
|
+
app.connect("html-page-context", _inject_llms_context)
|
|
141
|
+
|
|
142
|
+
return {
|
|
143
|
+
"version": _EXTENSION_VERSION,
|
|
144
|
+
"parallel_read_safe": True,
|
|
145
|
+
"parallel_write_safe": True,
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _resolve_site_url(app: Sphinx) -> str | None:
|
|
150
|
+
"""Resolve site URL from config, normalizing trailing slash."""
|
|
151
|
+
url: str | None = getattr(app.config, "site_url", None) or getattr(
|
|
152
|
+
app.config, "html_baseurl", None
|
|
153
|
+
)
|
|
154
|
+
if not url:
|
|
155
|
+
return None
|
|
156
|
+
return url if url.endswith("/") else url + "/"
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _write_llm_outputs(app: Sphinx, exception: BaseException | None) -> None:
|
|
160
|
+
"""Generate all enabled LLM output files at build-finished."""
|
|
161
|
+
if exception is not None:
|
|
162
|
+
return
|
|
163
|
+
|
|
164
|
+
if not hasattr(app.builder, "get_target_uri"):
|
|
165
|
+
return
|
|
166
|
+
|
|
167
|
+
site_url = _resolve_site_url(app)
|
|
168
|
+
if not site_url:
|
|
169
|
+
logger.info(
|
|
170
|
+
"sphinx-gp-llms: skipped — site_url and html_baseurl both unset",
|
|
171
|
+
type="llms",
|
|
172
|
+
subtype="configuration",
|
|
173
|
+
)
|
|
174
|
+
return
|
|
175
|
+
|
|
176
|
+
if app.config.llms_generate_txt:
|
|
177
|
+
from sphinx_gp_llms._llms_txt import write_llms_txt
|
|
178
|
+
|
|
179
|
+
write_llms_txt(app, site_url)
|
|
180
|
+
|
|
181
|
+
if app.config.llms_generate_full:
|
|
182
|
+
from sphinx_gp_llms._llms_full_txt import write_llms_full_txt
|
|
183
|
+
|
|
184
|
+
write_llms_full_txt(app, site_url)
|
|
185
|
+
|
|
186
|
+
if app.config.llms_generate_json:
|
|
187
|
+
from sphinx_gp_llms._docs_json import write_docs_json
|
|
188
|
+
|
|
189
|
+
write_docs_json(app, site_url)
|
|
190
|
+
|
|
191
|
+
if app.config.llms_generate_md_twins:
|
|
192
|
+
from sphinx_gp_llms._md_twins import write_md_twins
|
|
193
|
+
|
|
194
|
+
write_md_twins(app)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _inject_llms_context(
|
|
198
|
+
app: Sphinx,
|
|
199
|
+
pagename: str,
|
|
200
|
+
templatename: str,
|
|
201
|
+
context: dict[str, t.Any],
|
|
202
|
+
doctree: nodes.document | None,
|
|
203
|
+
) -> None:
|
|
204
|
+
"""Add LLM output link variables to the Jinja2 template context."""
|
|
205
|
+
del templatename, doctree
|
|
206
|
+
|
|
207
|
+
site_url = _resolve_site_url(app)
|
|
208
|
+
if not site_url:
|
|
209
|
+
return
|
|
210
|
+
|
|
211
|
+
if app.config.llms_generate_md_twins:
|
|
212
|
+
context["llms_md_url"] = pagename + ".md"
|
|
213
|
+
if app.config.llms_generate_txt:
|
|
214
|
+
context["llms_txt_url"] = app.config.llms_txt_filename
|
|
215
|
+
if app.config.llms_generate_full:
|
|
216
|
+
context["llms_full_url"] = app.config.llms_full_filename
|
|
217
|
+
if app.config.llms_generate_json:
|
|
218
|
+
context["llms_json_url"] = app.config.llms_json_filename
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""First-paragraph extraction from Sphinx doctrees.
|
|
2
|
+
|
|
3
|
+
Provides a lightweight description extractor that walks a doctree and
|
|
4
|
+
returns the text of the first body paragraph, suitable for use in
|
|
5
|
+
``llms.txt`` link descriptions and ``docs.json`` page summaries.
|
|
6
|
+
|
|
7
|
+
Examples
|
|
8
|
+
--------
|
|
9
|
+
>>> from sphinx_gp_llms._description import get_first_paragraph
|
|
10
|
+
>>> callable(get_first_paragraph)
|
|
11
|
+
True
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import typing as t
|
|
17
|
+
|
|
18
|
+
from docutils import nodes
|
|
19
|
+
|
|
20
|
+
if t.TYPE_CHECKING:
|
|
21
|
+
from sphinx.application import Sphinx
|
|
22
|
+
|
|
23
|
+
_SKIP_PARENTS = (
|
|
24
|
+
nodes.Admonition,
|
|
25
|
+
nodes.field_list,
|
|
26
|
+
nodes.sidebar,
|
|
27
|
+
nodes.topic,
|
|
28
|
+
nodes.comment,
|
|
29
|
+
nodes.footnote,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _is_body_paragraph(node: nodes.paragraph) -> bool:
|
|
34
|
+
"""Return True when *node* is a direct section-child paragraph."""
|
|
35
|
+
parent = node.parent
|
|
36
|
+
while parent is not None:
|
|
37
|
+
if isinstance(parent, _SKIP_PARENTS):
|
|
38
|
+
return False
|
|
39
|
+
if isinstance(parent, nodes.section):
|
|
40
|
+
return True
|
|
41
|
+
parent = parent.parent
|
|
42
|
+
return True
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_first_paragraph(
|
|
46
|
+
app: Sphinx,
|
|
47
|
+
docname: str,
|
|
48
|
+
max_length: int = 200,
|
|
49
|
+
) -> str:
|
|
50
|
+
"""Extract the first body paragraph from a page's doctree.
|
|
51
|
+
|
|
52
|
+
Parameters
|
|
53
|
+
----------
|
|
54
|
+
app : Sphinx
|
|
55
|
+
Sphinx application instance.
|
|
56
|
+
docname : str
|
|
57
|
+
Document name (without extension).
|
|
58
|
+
max_length : int
|
|
59
|
+
Maximum characters to return.
|
|
60
|
+
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
str
|
|
64
|
+
Flattened paragraph text, truncated with ``...`` when exceeding
|
|
65
|
+
*max_length*.
|
|
66
|
+
|
|
67
|
+
Examples
|
|
68
|
+
--------
|
|
69
|
+
>>> from sphinx_gp_llms._description import get_first_paragraph
|
|
70
|
+
>>> callable(get_first_paragraph)
|
|
71
|
+
True
|
|
72
|
+
"""
|
|
73
|
+
doctree = app.env.get_doctree(docname)
|
|
74
|
+
title_text = ""
|
|
75
|
+
if docname in app.env.titles:
|
|
76
|
+
title_text = app.env.titles[docname].astext()
|
|
77
|
+
|
|
78
|
+
for node in doctree.findall(nodes.paragraph):
|
|
79
|
+
if not _is_body_paragraph(node):
|
|
80
|
+
continue
|
|
81
|
+
text = node.astext().replace("\n", " ").strip()
|
|
82
|
+
if not text or text == title_text:
|
|
83
|
+
continue
|
|
84
|
+
if len(text) > max_length:
|
|
85
|
+
return text[: max_length - 3] + "..."
|
|
86
|
+
return text
|
|
87
|
+
return ""
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""Generate ``docs.json`` — an agent-oriented documentation manifest.
|
|
2
|
+
|
|
3
|
+
Follows the agent-manifest convention established by Lakebed (Ping,
|
|
4
|
+
``github.com/pingdotgg/span``). The manifest provides structured
|
|
5
|
+
metadata including ``agentEntrypoints``, a flat ``pages[]`` array with
|
|
6
|
+
per-page ``markdownUrl`` and ``headings[]`` outlines.
|
|
7
|
+
|
|
8
|
+
Examples
|
|
9
|
+
--------
|
|
10
|
+
>>> from sphinx_gp_llms._docs_json import write_docs_json
|
|
11
|
+
>>> callable(write_docs_json)
|
|
12
|
+
True
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import fnmatch
|
|
18
|
+
import json
|
|
19
|
+
import pathlib
|
|
20
|
+
import typing as t
|
|
21
|
+
|
|
22
|
+
from docutils import nodes
|
|
23
|
+
from sphinx import addnodes
|
|
24
|
+
from sphinx.util.logging import getLogger
|
|
25
|
+
|
|
26
|
+
from sphinx_gp_llms._description import get_first_paragraph
|
|
27
|
+
from sphinx_gp_llms._toctree import extract_toctree_sections
|
|
28
|
+
|
|
29
|
+
if t.TYPE_CHECKING:
|
|
30
|
+
from sphinx.application import Sphinx
|
|
31
|
+
|
|
32
|
+
logger = getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class _Heading(t.TypedDict):
|
|
36
|
+
id: str
|
|
37
|
+
level: int
|
|
38
|
+
text: str
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class _Page(t.TypedDict):
|
|
42
|
+
title: str
|
|
43
|
+
description: str
|
|
44
|
+
section: str
|
|
45
|
+
url: str
|
|
46
|
+
markdownUrl: str
|
|
47
|
+
headings: list[_Heading]
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class _AgentEntrypoints(t.TypedDict):
|
|
51
|
+
manifest: str
|
|
52
|
+
llms: str
|
|
53
|
+
llmsFull: str
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class _DocsManifest(t.TypedDict):
|
|
57
|
+
name: str
|
|
58
|
+
url: str
|
|
59
|
+
description: str
|
|
60
|
+
sourceRepository: str
|
|
61
|
+
agentEntrypoints: _AgentEntrypoints
|
|
62
|
+
pages: list[_Page]
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def write_docs_json(app: Sphinx, site_url: str) -> None:
|
|
66
|
+
"""Write ``docs.json`` to the build output directory.
|
|
67
|
+
|
|
68
|
+
Parameters
|
|
69
|
+
----------
|
|
70
|
+
app : Sphinx
|
|
71
|
+
Sphinx application instance.
|
|
72
|
+
site_url : str
|
|
73
|
+
Normalized site base URL with trailing slash.
|
|
74
|
+
|
|
75
|
+
Examples
|
|
76
|
+
--------
|
|
77
|
+
>>> from sphinx_gp_llms._docs_json import write_docs_json
|
|
78
|
+
>>> callable(write_docs_json)
|
|
79
|
+
True
|
|
80
|
+
"""
|
|
81
|
+
excludes: list[str] = list(app.config.llms_excludes)
|
|
82
|
+
sections = extract_toctree_sections(app)
|
|
83
|
+
|
|
84
|
+
section_map: dict[str, str] = {}
|
|
85
|
+
for section in sections:
|
|
86
|
+
caption = section.caption or "Documentation"
|
|
87
|
+
for docname in section.docnames:
|
|
88
|
+
section_map[docname] = caption
|
|
89
|
+
|
|
90
|
+
pages: list[_Page] = []
|
|
91
|
+
for docname in sorted(app.env.found_docs):
|
|
92
|
+
uri = app.builder.get_target_uri(docname)
|
|
93
|
+
if _is_excluded(uri, excludes):
|
|
94
|
+
continue
|
|
95
|
+
|
|
96
|
+
title_node = app.env.titles.get(docname)
|
|
97
|
+
if title_node is None:
|
|
98
|
+
continue
|
|
99
|
+
title = title_node.astext()
|
|
100
|
+
desc = get_first_paragraph(app, docname, app.config.llms_description_length)
|
|
101
|
+
headings = _extract_headings(app, docname)
|
|
102
|
+
|
|
103
|
+
pages.append(
|
|
104
|
+
_Page(
|
|
105
|
+
title=title,
|
|
106
|
+
description=desc,
|
|
107
|
+
section=section_map.get(docname, ""),
|
|
108
|
+
url="/" + uri,
|
|
109
|
+
markdownUrl="/" + docname + ".md",
|
|
110
|
+
headings=headings,
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
source_repo = _get_source_repository(app)
|
|
115
|
+
root_desc = get_first_paragraph(
|
|
116
|
+
app, app.config.root_doc, app.config.llms_description_length
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
manifest = _DocsManifest(
|
|
120
|
+
name=app.config.project,
|
|
121
|
+
url=site_url.rstrip("/"),
|
|
122
|
+
description=root_desc,
|
|
123
|
+
sourceRepository=source_repo,
|
|
124
|
+
agentEntrypoints=_AgentEntrypoints(
|
|
125
|
+
manifest="/" + app.config.llms_json_filename,
|
|
126
|
+
llms="/" + app.config.llms_txt_filename,
|
|
127
|
+
llmsFull="/" + app.config.llms_full_filename,
|
|
128
|
+
),
|
|
129
|
+
pages=pages,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
output = pathlib.Path(app.outdir) / app.config.llms_json_filename
|
|
133
|
+
output.write_text(
|
|
134
|
+
json.dumps(manifest, indent=2, ensure_ascii=False) + "\n",
|
|
135
|
+
encoding="utf-8",
|
|
136
|
+
)
|
|
137
|
+
logger.info(
|
|
138
|
+
"sphinx-gp-llms: %s generated at %s",
|
|
139
|
+
app.config.llms_json_filename,
|
|
140
|
+
output,
|
|
141
|
+
type="llms",
|
|
142
|
+
subtype="information",
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _extract_headings(app: Sphinx, docname: str) -> list[_Heading]:
|
|
147
|
+
"""Extract heading id/level/text from the table-of-contents tree."""
|
|
148
|
+
toc = app.env.tocs.get(docname)
|
|
149
|
+
if toc is None:
|
|
150
|
+
return []
|
|
151
|
+
headings: list[_Heading] = []
|
|
152
|
+
_walk_toc(toc, level=1, headings=headings)
|
|
153
|
+
return headings
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _walk_toc(
|
|
157
|
+
node: nodes.Node,
|
|
158
|
+
level: int,
|
|
159
|
+
headings: list[_Heading],
|
|
160
|
+
) -> None:
|
|
161
|
+
"""Recursively walk a toc bullet_list, collecting headings."""
|
|
162
|
+
if isinstance(node, nodes.bullet_list):
|
|
163
|
+
for item in node.children:
|
|
164
|
+
_walk_toc(item, level, headings)
|
|
165
|
+
elif isinstance(node, nodes.list_item):
|
|
166
|
+
for child in node.children:
|
|
167
|
+
if isinstance(child, addnodes.compact_paragraph):
|
|
168
|
+
for ref in child.findall(nodes.reference):
|
|
169
|
+
anchor = ref.get("anchorname", "")
|
|
170
|
+
text = ref.astext()
|
|
171
|
+
heading_id = anchor.lstrip("#") if anchor else ""
|
|
172
|
+
if text:
|
|
173
|
+
headings.append(_Heading(id=heading_id, level=level, text=text))
|
|
174
|
+
elif isinstance(child, nodes.bullet_list):
|
|
175
|
+
_walk_toc(child, level + 1, headings)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _get_source_repository(app: Sphinx) -> str:
|
|
179
|
+
"""Read source_repository from theme options."""
|
|
180
|
+
theme_opts = getattr(app.config, "html_theme_options", None)
|
|
181
|
+
if isinstance(theme_opts, dict):
|
|
182
|
+
return str(theme_opts.get("source_repository", ""))
|
|
183
|
+
return ""
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _is_excluded(uri: str, patterns: list[str]) -> bool:
|
|
187
|
+
"""Return True when *uri* matches any fnmatch pattern."""
|
|
188
|
+
return any(fnmatch.fnmatch(uri, p) for p in patterns)
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Generate ``llms-full.txt`` — concatenated full-content Markdown.
|
|
2
|
+
|
|
3
|
+
Community convention adopted by Anthropic, Cloudflare, Mintlify, and
|
|
4
|
+
GitBook. Each page's source content is included under a title header
|
|
5
|
+
with a source URL reference, separated by ``---`` dividers.
|
|
6
|
+
|
|
7
|
+
Examples
|
|
8
|
+
--------
|
|
9
|
+
>>> from sphinx_gp_llms._llms_full_txt import write_llms_full_txt
|
|
10
|
+
>>> callable(write_llms_full_txt)
|
|
11
|
+
True
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import fnmatch
|
|
17
|
+
import pathlib
|
|
18
|
+
import typing as t
|
|
19
|
+
|
|
20
|
+
from sphinx.util.logging import getLogger
|
|
21
|
+
|
|
22
|
+
if t.TYPE_CHECKING:
|
|
23
|
+
from sphinx.application import Sphinx
|
|
24
|
+
|
|
25
|
+
logger = getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def write_llms_full_txt(app: Sphinx, site_url: str) -> None:
|
|
29
|
+
"""Write ``llms-full.txt`` to the build output directory.
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
app : Sphinx
|
|
34
|
+
Sphinx application instance.
|
|
35
|
+
site_url : str
|
|
36
|
+
Normalized site base URL with trailing slash.
|
|
37
|
+
|
|
38
|
+
Examples
|
|
39
|
+
--------
|
|
40
|
+
>>> from sphinx_gp_llms._llms_full_txt import write_llms_full_txt
|
|
41
|
+
>>> callable(write_llms_full_txt)
|
|
42
|
+
True
|
|
43
|
+
"""
|
|
44
|
+
excludes: list[str] = list(app.config.llms_excludes)
|
|
45
|
+
parts: list[str] = []
|
|
46
|
+
|
|
47
|
+
for docname in sorted(app.env.found_docs):
|
|
48
|
+
uri = app.builder.get_target_uri(docname)
|
|
49
|
+
if _is_excluded(uri, excludes):
|
|
50
|
+
continue
|
|
51
|
+
|
|
52
|
+
title_node = app.env.titles.get(docname)
|
|
53
|
+
title = title_node.astext() if title_node is not None else docname
|
|
54
|
+
url = site_url + uri
|
|
55
|
+
source_path = pathlib.Path(app.env.doc2path(docname))
|
|
56
|
+
|
|
57
|
+
parts.append(f"# {title}")
|
|
58
|
+
parts.append(f"Source: {url}")
|
|
59
|
+
parts.append("")
|
|
60
|
+
|
|
61
|
+
try:
|
|
62
|
+
content = source_path.read_text(encoding="utf-8")
|
|
63
|
+
parts.append(content.rstrip())
|
|
64
|
+
except (OSError, UnicodeDecodeError):
|
|
65
|
+
parts.append(f"(source not available for {docname})")
|
|
66
|
+
|
|
67
|
+
parts.append("")
|
|
68
|
+
parts.append("---")
|
|
69
|
+
parts.append("")
|
|
70
|
+
|
|
71
|
+
output = pathlib.Path(app.outdir) / app.config.llms_full_filename
|
|
72
|
+
output.write_text("\n".join(parts), encoding="utf-8")
|
|
73
|
+
logger.info(
|
|
74
|
+
"sphinx-gp-llms: %s generated at %s",
|
|
75
|
+
app.config.llms_full_filename,
|
|
76
|
+
output,
|
|
77
|
+
type="llms",
|
|
78
|
+
subtype="information",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _is_excluded(uri: str, patterns: list[str]) -> bool:
|
|
83
|
+
"""Return True when *uri* matches any fnmatch pattern."""
|
|
84
|
+
return any(fnmatch.fnmatch(uri, p) for p in patterns)
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""Generate ``llms.txt`` — a structured Markdown index for LLM agents.
|
|
2
|
+
|
|
3
|
+
Follows the specification at https://llmstxt.org/ (Jeremy Howard,
|
|
4
|
+
Answer.AI, September 2024). The file uses H1 for the project name,
|
|
5
|
+
a blockquote summary, and H2 sections of bulleted ``[title](url)``
|
|
6
|
+
links grouped by toctree caption.
|
|
7
|
+
|
|
8
|
+
Examples
|
|
9
|
+
--------
|
|
10
|
+
>>> from sphinx_gp_llms._llms_txt import write_llms_txt
|
|
11
|
+
>>> callable(write_llms_txt)
|
|
12
|
+
True
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import fnmatch
|
|
18
|
+
import pathlib
|
|
19
|
+
import typing as t
|
|
20
|
+
|
|
21
|
+
from sphinx.util.logging import getLogger
|
|
22
|
+
|
|
23
|
+
from sphinx_gp_llms._description import get_first_paragraph
|
|
24
|
+
from sphinx_gp_llms._toctree import extract_toctree_sections
|
|
25
|
+
|
|
26
|
+
if t.TYPE_CHECKING:
|
|
27
|
+
from sphinx.application import Sphinx
|
|
28
|
+
|
|
29
|
+
logger = getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def write_llms_txt(app: Sphinx, site_url: str) -> None:
|
|
33
|
+
"""Write ``llms.txt`` to the build output directory.
|
|
34
|
+
|
|
35
|
+
Parameters
|
|
36
|
+
----------
|
|
37
|
+
app : Sphinx
|
|
38
|
+
Sphinx application instance.
|
|
39
|
+
site_url : str
|
|
40
|
+
Normalized site base URL with trailing slash.
|
|
41
|
+
|
|
42
|
+
Examples
|
|
43
|
+
--------
|
|
44
|
+
>>> from sphinx_gp_llms._llms_txt import write_llms_txt
|
|
45
|
+
>>> callable(write_llms_txt)
|
|
46
|
+
True
|
|
47
|
+
"""
|
|
48
|
+
excludes: list[str] = list(app.config.llms_excludes)
|
|
49
|
+
sections = extract_toctree_sections(app)
|
|
50
|
+
lines: list[str] = []
|
|
51
|
+
|
|
52
|
+
lines.append(f"# {app.config.project}")
|
|
53
|
+
lines.append("")
|
|
54
|
+
|
|
55
|
+
max_len: int = app.config.llms_description_length
|
|
56
|
+
desc = get_first_paragraph(app, app.config.root_doc, max_len)
|
|
57
|
+
if desc:
|
|
58
|
+
lines.append(f"> {desc}")
|
|
59
|
+
lines.append("")
|
|
60
|
+
|
|
61
|
+
for section in sections:
|
|
62
|
+
section_name = section.caption or "Documentation"
|
|
63
|
+
lines.append(f"## {section_name}")
|
|
64
|
+
lines.append("")
|
|
65
|
+
for docname in section.docnames:
|
|
66
|
+
uri = app.builder.get_target_uri(docname)
|
|
67
|
+
if _is_excluded(uri, excludes):
|
|
68
|
+
continue
|
|
69
|
+
title_node = app.env.titles.get(docname)
|
|
70
|
+
if title_node is None:
|
|
71
|
+
continue
|
|
72
|
+
title = title_node.astext()
|
|
73
|
+
url = site_url + uri
|
|
74
|
+
page_desc = get_first_paragraph(
|
|
75
|
+
app, docname, app.config.llms_description_length
|
|
76
|
+
)
|
|
77
|
+
entry = f"- [{title}]({url})"
|
|
78
|
+
if page_desc:
|
|
79
|
+
entry += f": {page_desc}"
|
|
80
|
+
lines.append(entry)
|
|
81
|
+
lines.append("")
|
|
82
|
+
|
|
83
|
+
output = pathlib.Path(app.outdir) / app.config.llms_txt_filename
|
|
84
|
+
output.write_text("\n".join(lines), encoding="utf-8")
|
|
85
|
+
logger.info(
|
|
86
|
+
"sphinx-gp-llms: %s generated at %s",
|
|
87
|
+
app.config.llms_txt_filename,
|
|
88
|
+
output,
|
|
89
|
+
type="llms",
|
|
90
|
+
subtype="information",
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _is_excluded(uri: str, patterns: list[str]) -> bool:
|
|
95
|
+
"""Return True when *uri* matches any fnmatch pattern."""
|
|
96
|
+
return any(fnmatch.fnmatch(uri, p) for p in patterns)
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""Generate per-page ``.md`` twin files alongside HTML output.
|
|
2
|
+
|
|
3
|
+
Implements the per-page Markdown endpoint convention popularized by
|
|
4
|
+
Mintlify, Cloudflare ("Markdown for Agents"), Stripe, and Vercel.
|
|
5
|
+
Each HTML page at ``/path/page.html`` gets a Markdown sibling at
|
|
6
|
+
``/path/page.md`` containing the original source content.
|
|
7
|
+
|
|
8
|
+
Examples
|
|
9
|
+
--------
|
|
10
|
+
>>> from sphinx_gp_llms._md_twins import write_md_twins
|
|
11
|
+
>>> callable(write_md_twins)
|
|
12
|
+
True
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import fnmatch
|
|
18
|
+
import pathlib
|
|
19
|
+
import shutil
|
|
20
|
+
import typing as t
|
|
21
|
+
|
|
22
|
+
from sphinx.util.logging import getLogger
|
|
23
|
+
|
|
24
|
+
if t.TYPE_CHECKING:
|
|
25
|
+
from sphinx.application import Sphinx
|
|
26
|
+
|
|
27
|
+
logger = getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def write_md_twins(app: Sphinx) -> None:
|
|
31
|
+
"""Copy source files as ``.md`` siblings in the build output directory.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
app : Sphinx
|
|
36
|
+
Sphinx application instance.
|
|
37
|
+
|
|
38
|
+
Examples
|
|
39
|
+
--------
|
|
40
|
+
>>> from sphinx_gp_llms._md_twins import write_md_twins
|
|
41
|
+
>>> callable(write_md_twins)
|
|
42
|
+
True
|
|
43
|
+
"""
|
|
44
|
+
excludes: list[str] = list(app.config.llms_excludes)
|
|
45
|
+
outdir = pathlib.Path(app.outdir)
|
|
46
|
+
count = 0
|
|
47
|
+
|
|
48
|
+
for docname in sorted(app.env.found_docs):
|
|
49
|
+
uri = app.builder.get_target_uri(docname)
|
|
50
|
+
if _is_excluded(uri, excludes):
|
|
51
|
+
continue
|
|
52
|
+
|
|
53
|
+
source_path = pathlib.Path(app.env.doc2path(docname))
|
|
54
|
+
if not source_path.exists():
|
|
55
|
+
continue
|
|
56
|
+
|
|
57
|
+
target = outdir / (docname + ".md")
|
|
58
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
59
|
+
shutil.copy2(source_path, target)
|
|
60
|
+
count += 1
|
|
61
|
+
|
|
62
|
+
logger.info(
|
|
63
|
+
"sphinx-gp-llms: %d .md twin files written",
|
|
64
|
+
count,
|
|
65
|
+
type="llms",
|
|
66
|
+
subtype="information",
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _is_excluded(uri: str, patterns: list[str]) -> bool:
|
|
71
|
+
"""Return True when *uri* matches any fnmatch pattern."""
|
|
72
|
+
return any(fnmatch.fnmatch(uri, p) for p in patterns)
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""Toctree section extraction for llms.txt grouping.
|
|
2
|
+
|
|
3
|
+
Walks the root document's doctree to find ``toctree`` directives and
|
|
4
|
+
their ``:caption:`` options, producing a flat list of sections suitable
|
|
5
|
+
for the H2-delimited structure of ``llms.txt``.
|
|
6
|
+
|
|
7
|
+
Examples
|
|
8
|
+
--------
|
|
9
|
+
>>> from sphinx_gp_llms._toctree import ToctreeSection
|
|
10
|
+
>>> s = ToctreeSection(caption="Guide", docnames=["quickstart"])
|
|
11
|
+
>>> s.caption
|
|
12
|
+
'Guide'
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import typing as t
|
|
18
|
+
|
|
19
|
+
from sphinx import addnodes
|
|
20
|
+
|
|
21
|
+
if t.TYPE_CHECKING:
|
|
22
|
+
from sphinx.application import Sphinx
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ToctreeSection(t.NamedTuple):
|
|
26
|
+
"""One section of pages grouped by toctree caption.
|
|
27
|
+
|
|
28
|
+
Examples
|
|
29
|
+
--------
|
|
30
|
+
>>> ToctreeSection(caption="API", docnames=["api/index"])
|
|
31
|
+
ToctreeSection(caption='API', docnames=['api/index'])
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
caption: str | None
|
|
35
|
+
docnames: list[str]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def extract_toctree_sections(app: Sphinx) -> list[ToctreeSection]:
|
|
39
|
+
"""Walk the root document's toctree nodes and group pages by caption.
|
|
40
|
+
|
|
41
|
+
Parameters
|
|
42
|
+
----------
|
|
43
|
+
app : Sphinx
|
|
44
|
+
Sphinx application instance (must have a built environment).
|
|
45
|
+
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
48
|
+
list[ToctreeSection]
|
|
49
|
+
Sections in document order. Pages not referenced by any
|
|
50
|
+
toctree in the root document get a ``caption=None`` fallback
|
|
51
|
+
section at the end.
|
|
52
|
+
|
|
53
|
+
Examples
|
|
54
|
+
--------
|
|
55
|
+
>>> from sphinx_gp_llms._toctree import extract_toctree_sections
|
|
56
|
+
>>> callable(extract_toctree_sections)
|
|
57
|
+
True
|
|
58
|
+
"""
|
|
59
|
+
root_doc = app.config.root_doc
|
|
60
|
+
doctree = app.env.get_doctree(root_doc)
|
|
61
|
+
|
|
62
|
+
sections: list[ToctreeSection] = []
|
|
63
|
+
assigned: set[str] = set()
|
|
64
|
+
|
|
65
|
+
for toctree_node in doctree.findall(addnodes.toctree):
|
|
66
|
+
caption = toctree_node.get("caption")
|
|
67
|
+
docnames: list[str] = []
|
|
68
|
+
for _title, docname in toctree_node["entries"]:
|
|
69
|
+
if docname and docname in app.env.found_docs and docname not in assigned:
|
|
70
|
+
docnames.append(docname)
|
|
71
|
+
assigned.add(docname)
|
|
72
|
+
if docnames:
|
|
73
|
+
sections.append(ToctreeSection(caption=caption, docnames=docnames))
|
|
74
|
+
|
|
75
|
+
remaining = sorted(app.env.found_docs - assigned - {root_doc})
|
|
76
|
+
if remaining:
|
|
77
|
+
sections.append(ToctreeSection(caption=None, docnames=remaining))
|
|
78
|
+
|
|
79
|
+
return sections
|
|
File without changes
|