sphinx-gp-llms 0.0.1a24__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,233 @@
1
+ # Node
2
+ node_modules/
3
+ *.tsbuildinfo
4
+ .vitest-cache/
5
+
6
+ # Byte-compiled / optimized / DLL files
7
+ __pycache__/
8
+ *.py[codz]
9
+ *$py.class
10
+
11
+ # C extensions
12
+ *.so
13
+
14
+ # Distribution / packaging
15
+ .Python
16
+ build/
17
+ develop-eggs/
18
+ dist/
19
+ downloads/
20
+ eggs/
21
+ .eggs/
22
+ lib/
23
+ lib64/
24
+ parts/
25
+ sdist/
26
+ var/
27
+ wheels/
28
+ share/python-wheels/
29
+ *.egg-info/
30
+ .installed.cfg
31
+ *.egg
32
+ MANIFEST
33
+
34
+ # PyInstaller
35
+ # Usually these files are written by a python script from a template
36
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
37
+ *.manifest
38
+ *.spec
39
+
40
+ # Installer logs
41
+ pip-log.txt
42
+ pip-delete-this-directory.txt
43
+
44
+ # Unit test / coverage reports
45
+ htmlcov/
46
+ .tox/
47
+ .nox/
48
+ .coverage
49
+ .coverage.*
50
+ .cache
51
+ nosetests.xml
52
+ coverage.xml
53
+ *.cover
54
+ *.py.cover
55
+ .hypothesis/
56
+ .pytest_cache/
57
+ cover/
58
+
59
+ # Translations
60
+ *.mo
61
+ *.pot
62
+
63
+ # Django stuff:
64
+ *.log
65
+ local_settings.py
66
+ db.sqlite3
67
+ db.sqlite3-journal
68
+
69
+ # Flask stuff:
70
+ instance/
71
+ .webassets-cache
72
+
73
+ # Scrapy stuff:
74
+ .scrapy
75
+
76
+ # Sphinx documentation
77
+ docs/_build/
78
+
79
+ # PyBuilder
80
+ .pybuilder/
81
+ target/
82
+
83
+ # Jupyter Notebook
84
+ .ipynb_checkpoints
85
+
86
+ # IPython
87
+ profile_default/
88
+ ipython_config.py
89
+
90
+ # pyenv
91
+ # For a library or package, you might want to ignore these files since the code is
92
+ # intended to run in multiple environments; otherwise, check them in:
93
+ # .python-version
94
+
95
+ # pipenv
96
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
97
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
98
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
99
+ # install all needed dependencies.
100
+ #Pipfile.lock
101
+
102
+ # UV
103
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
104
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
105
+ # commonly ignored for libraries.
106
+ #uv.lock
107
+
108
+ # poetry
109
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
110
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
111
+ # commonly ignored for libraries.
112
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
113
+ #poetry.lock
114
+ #poetry.toml
115
+
116
+ # pdm
117
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
118
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
119
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
120
+ #pdm.lock
121
+ #pdm.toml
122
+ .pdm-python
123
+ .pdm-build/
124
+
125
+ # pixi
126
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
127
+ #pixi.lock
128
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
129
+ # in the .venv directory. It is recommended not to include this directory in version control.
130
+ .pixi
131
+
132
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
133
+ __pypackages__/
134
+
135
+ # Celery stuff
136
+ celerybeat-schedule
137
+ celerybeat.pid
138
+
139
+ # SageMath parsed files
140
+ *.sage.py
141
+
142
+ # Environments
143
+ .env
144
+ .envrc
145
+ .venv
146
+ env/
147
+ venv/
148
+ ENV/
149
+ env.bak/
150
+ venv.bak/
151
+
152
+ # Spyder project settings
153
+ .spyderproject
154
+ .spyproject
155
+
156
+ # Rope project settings
157
+ .ropeproject
158
+
159
+ # mkdocs documentation
160
+ /site
161
+
162
+ # mypy
163
+ .mypy_cache/
164
+ .dmypy.json
165
+ dmypy.json
166
+
167
+ # Pyre type checker
168
+ .pyre/
169
+
170
+ # pytype static type analyzer
171
+ .pytype/
172
+
173
+ # Cython debug symbols
174
+ cython_debug/
175
+
176
+ # PyCharm
177
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
178
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
179
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
180
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
181
+ #.idea/
182
+
183
+ # Abstra
184
+ # Abstra is an AI-powered process automation framework.
185
+ # Ignore directories containing user credentials, local state, and settings.
186
+ # Learn more at https://abstra.io/docs
187
+ .abstra/
188
+
189
+ # Visual Studio Code
190
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
191
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
192
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
193
+ # you could uncomment the following to ignore the entire vscode folder
194
+ # .vscode/
195
+
196
+ # Ruff stuff:
197
+ .ruff_cache/
198
+
199
+ # PyPI configuration file
200
+ .pypirc
201
+
202
+ # Cursor
203
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
204
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
205
+ # refer to https://docs.cursor.com/context/ignore-files
206
+ .cursorignore
207
+ .cursorindexingignore
208
+
209
+ # Marimo
210
+ marimo/_static/
211
+ marimo/_lsp/
212
+ __marimo__/
213
+
214
+ # Generated by sphinx_fonts extension (downloaded at build time)
215
+ docs/_static/fonts/
216
+ docs/_static/css/fonts.css
217
+
218
+ # Claude Code
219
+ **/CLAUDE.local.md
220
+ **/CLAUDE.*.md
221
+ **/.claude/settings.local.json
222
+
223
+ # Playwright MCP
224
+ .playwright-mcp/
225
+
226
+ # Repo-local pytest mirror (do not track — validator-only)
227
+ out/
228
+
229
+ # Misc
230
+ .vim/
231
+ *.lprof
232
+ pip-wheel-metadata/
233
+ monkeytype.sqlite3
@@ -0,0 +1,32 @@
1
+ Metadata-Version: 2.4
2
+ Name: sphinx-gp-llms
3
+ Version: 0.0.1a24
4
+ Summary: LLM-friendly documentation outputs for Sphinx — llms.txt, llms-full.txt, docs.json, per-page Markdown
5
+ Project-URL: Repository, https://github.com/git-pull/gp-sphinx
6
+ Author-email: Tony Narlock <tony@git-pull.com>
7
+ License: MIT
8
+ Keywords: ai,documentation,llm,llms-txt,sphinx
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Framework :: Sphinx
11
+ Classifier: Framework :: Sphinx :: Extension
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Programming Language :: Python :: 3.14
20
+ Classifier: Topic :: Documentation
21
+ Classifier: Topic :: Documentation :: Sphinx
22
+ Classifier: Typing :: Typed
23
+ Requires-Python: <4.0,>=3.10
24
+ Requires-Dist: sphinx>=8.1
25
+ Description-Content-Type: text/markdown
26
+
27
+ # sphinx-gp-llms
28
+
29
+ LLM-friendly documentation outputs for Sphinx.
30
+
31
+ Generates `llms.txt`, `llms-full.txt`, `docs.json`, and per-page `.md`
32
+ twin files during the standard HTML build.
@@ -0,0 +1,6 @@
1
+ # sphinx-gp-llms
2
+
3
+ LLM-friendly documentation outputs for Sphinx.
4
+
5
+ Generates `llms.txt`, `llms-full.txt`, `docs.json`, and per-page `.md`
6
+ twin files during the standard HTML build.
@@ -0,0 +1,43 @@
1
+ [project]
2
+ name = "sphinx-gp-llms"
3
+ version = "0.0.1a24"
4
+ description = "LLM-friendly documentation outputs for Sphinx — llms.txt, llms-full.txt, docs.json, per-page Markdown"
5
+ requires-python = ">=3.10,<4.0"
6
+ authors = [
7
+ {name = "Tony Narlock", email = "tony@git-pull.com"}
8
+ ]
9
+ license = { text = "MIT" }
10
+ classifiers = [
11
+ "Development Status :: 3 - Alpha",
12
+ "License :: OSI Approved :: MIT License",
13
+ "Framework :: Sphinx",
14
+ "Framework :: Sphinx :: Extension",
15
+ "Intended Audience :: Developers",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.10",
18
+ "Programming Language :: Python :: 3.11",
19
+ "Programming Language :: Python :: 3.12",
20
+ "Programming Language :: Python :: 3.13",
21
+ "Programming Language :: Python :: 3.14",
22
+ "Topic :: Documentation",
23
+ "Topic :: Documentation :: Sphinx",
24
+ "Typing :: Typed",
25
+ ]
26
+ readme = "README.md"
27
+ keywords = ["sphinx", "llm", "documentation", "ai", "llms-txt"]
28
+ dependencies = [
29
+ "sphinx>=8.1",
30
+ ]
31
+
32
+ [project.urls]
33
+ Repository = "https://github.com/git-pull/gp-sphinx"
34
+
35
+ [build-system]
36
+ requires = ["hatchling"]
37
+ build-backend = "hatchling.build"
38
+
39
+ [tool.hatch.build.targets.wheel]
40
+ packages = ["src/sphinx_gp_llms"]
41
+
42
+ [tool.gp-sphinx.docs]
43
+ showcase = ["dependents"]
@@ -0,0 +1,218 @@
1
+ """LLM-friendly documentation outputs for Sphinx.
2
+
3
+ Generates ``llms.txt``, ``llms-full.txt``, ``docs.json``, and per-page
4
+ ``.md`` twin files during the standard HTML build, following conventions
5
+ established by llmstxt.org (Jeremy Howard / Answer.AI), Cloudflare
6
+ ("Markdown for Agents"), Mintlify, and Lakebed (Ping).
7
+
8
+ The extension hooks into ``build-finished`` to write output files and
9
+ ``html-page-context`` to inject footer link variables into the template
10
+ context.
11
+
12
+ Examples
13
+ --------
14
+ >>> from sphinx_gp_llms import setup
15
+ >>> callable(setup)
16
+ True
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import contextlib
22
+ import logging
23
+ import typing as t
24
+
25
+ from sphinx.errors import ExtensionError
26
+ from sphinx.util.logging import getLogger
27
+
28
+ if t.TYPE_CHECKING:
29
+ from docutils import nodes
30
+ from sphinx.application import Sphinx
31
+ from sphinx.util.typing import ExtensionMetadata
32
+
33
+ _EXTENSION_VERSION = "0.0.1a24"
34
+
35
+ logger = getLogger(__name__)
36
+ logging.getLogger(__name__).addHandler(logging.NullHandler())
37
+
38
+ __all__ = ["setup"]
39
+
40
+
41
+ def setup(app: Sphinx) -> ExtensionMetadata:
42
+ """Register config values and connect build hooks.
43
+
44
+ Parameters
45
+ ----------
46
+ app : Sphinx
47
+ Sphinx application instance.
48
+
49
+ Returns
50
+ -------
51
+ ExtensionMetadata
52
+ Extension metadata with version and parallel-build flags.
53
+
54
+ Examples
55
+ --------
56
+ >>> from sphinx_gp_llms import setup
57
+ >>> callable(setup)
58
+ True
59
+ """
60
+ app.add_config_value(
61
+ "llms_generate_txt",
62
+ default=True,
63
+ rebuild="",
64
+ types=frozenset({bool}),
65
+ description="Enable llms.txt generation.",
66
+ )
67
+ app.add_config_value(
68
+ "llms_generate_full",
69
+ default=True,
70
+ rebuild="",
71
+ types=frozenset({bool}),
72
+ description="Enable llms-full.txt generation.",
73
+ )
74
+ app.add_config_value(
75
+ "llms_generate_json",
76
+ default=True,
77
+ rebuild="",
78
+ types=frozenset({bool}),
79
+ description="Enable docs.json agent manifest generation.",
80
+ )
81
+ app.add_config_value(
82
+ "llms_generate_md_twins",
83
+ default=True,
84
+ rebuild="",
85
+ types=frozenset({bool}),
86
+ description="Enable per-page .md twin file generation.",
87
+ )
88
+ app.add_config_value(
89
+ "llms_txt_filename",
90
+ default="llms.txt",
91
+ rebuild="",
92
+ types=frozenset({str}),
93
+ description="Output filename for the llms.txt index.",
94
+ )
95
+ app.add_config_value(
96
+ "llms_full_filename",
97
+ default="llms-full.txt",
98
+ rebuild="",
99
+ types=frozenset({str}),
100
+ description="Output filename for the concatenated full-content file.",
101
+ )
102
+ app.add_config_value(
103
+ "llms_json_filename",
104
+ default="docs.json",
105
+ rebuild="",
106
+ types=frozenset({str}),
107
+ description="Output filename for the docs.json agent manifest.",
108
+ )
109
+ app.add_config_value(
110
+ "llms_excludes",
111
+ default=[],
112
+ rebuild="",
113
+ types=frozenset({list}),
114
+ description=(
115
+ "fnmatch patterns matched against each page's relative URL. "
116
+ "Matched pages are excluded from all LLM outputs."
117
+ ),
118
+ )
119
+ app.add_config_value(
120
+ "llms_description_length",
121
+ default=200,
122
+ rebuild="",
123
+ types=frozenset({int}),
124
+ description="Maximum character length for page descriptions.",
125
+ )
126
+
127
+ with contextlib.suppress(ExtensionError):
128
+ app.add_config_value(
129
+ "site_url",
130
+ default=None,
131
+ rebuild="",
132
+ types=frozenset({str, type(None)}),
133
+ description=(
134
+ "Site base URL — registered defensively; "
135
+ "sphinx-gp-sitemap usually registers this first."
136
+ ),
137
+ )
138
+
139
+ app.connect("build-finished", _write_llm_outputs)
140
+ app.connect("html-page-context", _inject_llms_context)
141
+
142
+ return {
143
+ "version": _EXTENSION_VERSION,
144
+ "parallel_read_safe": True,
145
+ "parallel_write_safe": True,
146
+ }
147
+
148
+
149
+ def _resolve_site_url(app: Sphinx) -> str | None:
150
+ """Resolve site URL from config, normalizing trailing slash."""
151
+ url: str | None = getattr(app.config, "site_url", None) or getattr(
152
+ app.config, "html_baseurl", None
153
+ )
154
+ if not url:
155
+ return None
156
+ return url if url.endswith("/") else url + "/"
157
+
158
+
159
+ def _write_llm_outputs(app: Sphinx, exception: BaseException | None) -> None:
160
+ """Generate all enabled LLM output files at build-finished."""
161
+ if exception is not None:
162
+ return
163
+
164
+ if not hasattr(app.builder, "get_target_uri"):
165
+ return
166
+
167
+ site_url = _resolve_site_url(app)
168
+ if not site_url:
169
+ logger.info(
170
+ "sphinx-gp-llms: skipped — site_url and html_baseurl both unset",
171
+ type="llms",
172
+ subtype="configuration",
173
+ )
174
+ return
175
+
176
+ if app.config.llms_generate_txt:
177
+ from sphinx_gp_llms._llms_txt import write_llms_txt
178
+
179
+ write_llms_txt(app, site_url)
180
+
181
+ if app.config.llms_generate_full:
182
+ from sphinx_gp_llms._llms_full_txt import write_llms_full_txt
183
+
184
+ write_llms_full_txt(app, site_url)
185
+
186
+ if app.config.llms_generate_json:
187
+ from sphinx_gp_llms._docs_json import write_docs_json
188
+
189
+ write_docs_json(app, site_url)
190
+
191
+ if app.config.llms_generate_md_twins:
192
+ from sphinx_gp_llms._md_twins import write_md_twins
193
+
194
+ write_md_twins(app)
195
+
196
+
197
+ def _inject_llms_context(
198
+ app: Sphinx,
199
+ pagename: str,
200
+ templatename: str,
201
+ context: dict[str, t.Any],
202
+ doctree: nodes.document | None,
203
+ ) -> None:
204
+ """Add LLM output link variables to the Jinja2 template context."""
205
+ del templatename, doctree
206
+
207
+ site_url = _resolve_site_url(app)
208
+ if not site_url:
209
+ return
210
+
211
+ if app.config.llms_generate_md_twins:
212
+ context["llms_md_url"] = pagename + ".md"
213
+ if app.config.llms_generate_txt:
214
+ context["llms_txt_url"] = app.config.llms_txt_filename
215
+ if app.config.llms_generate_full:
216
+ context["llms_full_url"] = app.config.llms_full_filename
217
+ if app.config.llms_generate_json:
218
+ context["llms_json_url"] = app.config.llms_json_filename
@@ -0,0 +1,87 @@
1
+ """First-paragraph extraction from Sphinx doctrees.
2
+
3
+ Provides a lightweight description extractor that walks a doctree and
4
+ returns the text of the first body paragraph, suitable for use in
5
+ ``llms.txt`` link descriptions and ``docs.json`` page summaries.
6
+
7
+ Examples
8
+ --------
9
+ >>> from sphinx_gp_llms._description import get_first_paragraph
10
+ >>> callable(get_first_paragraph)
11
+ True
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import typing as t
17
+
18
+ from docutils import nodes
19
+
20
+ if t.TYPE_CHECKING:
21
+ from sphinx.application import Sphinx
22
+
23
+ _SKIP_PARENTS = (
24
+ nodes.Admonition,
25
+ nodes.field_list,
26
+ nodes.sidebar,
27
+ nodes.topic,
28
+ nodes.comment,
29
+ nodes.footnote,
30
+ )
31
+
32
+
33
+ def _is_body_paragraph(node: nodes.paragraph) -> bool:
34
+ """Return True when *node* is a direct section-child paragraph."""
35
+ parent = node.parent
36
+ while parent is not None:
37
+ if isinstance(parent, _SKIP_PARENTS):
38
+ return False
39
+ if isinstance(parent, nodes.section):
40
+ return True
41
+ parent = parent.parent
42
+ return True
43
+
44
+
45
+ def get_first_paragraph(
46
+ app: Sphinx,
47
+ docname: str,
48
+ max_length: int = 200,
49
+ ) -> str:
50
+ """Extract the first body paragraph from a page's doctree.
51
+
52
+ Parameters
53
+ ----------
54
+ app : Sphinx
55
+ Sphinx application instance.
56
+ docname : str
57
+ Document name (without extension).
58
+ max_length : int
59
+ Maximum characters to return.
60
+
61
+ Returns
62
+ -------
63
+ str
64
+ Flattened paragraph text, truncated with ``...`` when exceeding
65
+ *max_length*.
66
+
67
+ Examples
68
+ --------
69
+ >>> from sphinx_gp_llms._description import get_first_paragraph
70
+ >>> callable(get_first_paragraph)
71
+ True
72
+ """
73
+ doctree = app.env.get_doctree(docname)
74
+ title_text = ""
75
+ if docname in app.env.titles:
76
+ title_text = app.env.titles[docname].astext()
77
+
78
+ for node in doctree.findall(nodes.paragraph):
79
+ if not _is_body_paragraph(node):
80
+ continue
81
+ text = node.astext().replace("\n", " ").strip()
82
+ if not text or text == title_text:
83
+ continue
84
+ if len(text) > max_length:
85
+ return text[: max_length - 3] + "..."
86
+ return text
87
+ return ""
@@ -0,0 +1,188 @@
1
+ """Generate ``docs.json`` — an agent-oriented documentation manifest.
2
+
3
+ Follows the agent-manifest convention established by Lakebed (Ping,
4
+ ``github.com/pingdotgg/span``). The manifest provides structured
5
+ metadata including ``agentEntrypoints``, a flat ``pages[]`` array with
6
+ per-page ``markdownUrl`` and ``headings[]`` outlines.
7
+
8
+ Examples
9
+ --------
10
+ >>> from sphinx_gp_llms._docs_json import write_docs_json
11
+ >>> callable(write_docs_json)
12
+ True
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import fnmatch
18
+ import json
19
+ import pathlib
20
+ import typing as t
21
+
22
+ from docutils import nodes
23
+ from sphinx import addnodes
24
+ from sphinx.util.logging import getLogger
25
+
26
+ from sphinx_gp_llms._description import get_first_paragraph
27
+ from sphinx_gp_llms._toctree import extract_toctree_sections
28
+
29
+ if t.TYPE_CHECKING:
30
+ from sphinx.application import Sphinx
31
+
32
+ logger = getLogger(__name__)
33
+
34
+
35
+ class _Heading(t.TypedDict):
36
+ id: str
37
+ level: int
38
+ text: str
39
+
40
+
41
+ class _Page(t.TypedDict):
42
+ title: str
43
+ description: str
44
+ section: str
45
+ url: str
46
+ markdownUrl: str
47
+ headings: list[_Heading]
48
+
49
+
50
+ class _AgentEntrypoints(t.TypedDict):
51
+ manifest: str
52
+ llms: str
53
+ llmsFull: str
54
+
55
+
56
+ class _DocsManifest(t.TypedDict):
57
+ name: str
58
+ url: str
59
+ description: str
60
+ sourceRepository: str
61
+ agentEntrypoints: _AgentEntrypoints
62
+ pages: list[_Page]
63
+
64
+
65
+ def write_docs_json(app: Sphinx, site_url: str) -> None:
66
+ """Write ``docs.json`` to the build output directory.
67
+
68
+ Parameters
69
+ ----------
70
+ app : Sphinx
71
+ Sphinx application instance.
72
+ site_url : str
73
+ Normalized site base URL with trailing slash.
74
+
75
+ Examples
76
+ --------
77
+ >>> from sphinx_gp_llms._docs_json import write_docs_json
78
+ >>> callable(write_docs_json)
79
+ True
80
+ """
81
+ excludes: list[str] = list(app.config.llms_excludes)
82
+ sections = extract_toctree_sections(app)
83
+
84
+ section_map: dict[str, str] = {}
85
+ for section in sections:
86
+ caption = section.caption or "Documentation"
87
+ for docname in section.docnames:
88
+ section_map[docname] = caption
89
+
90
+ pages: list[_Page] = []
91
+ for docname in sorted(app.env.found_docs):
92
+ uri = app.builder.get_target_uri(docname)
93
+ if _is_excluded(uri, excludes):
94
+ continue
95
+
96
+ title_node = app.env.titles.get(docname)
97
+ if title_node is None:
98
+ continue
99
+ title = title_node.astext()
100
+ desc = get_first_paragraph(app, docname, app.config.llms_description_length)
101
+ headings = _extract_headings(app, docname)
102
+
103
+ pages.append(
104
+ _Page(
105
+ title=title,
106
+ description=desc,
107
+ section=section_map.get(docname, ""),
108
+ url="/" + uri,
109
+ markdownUrl="/" + docname + ".md",
110
+ headings=headings,
111
+ )
112
+ )
113
+
114
+ source_repo = _get_source_repository(app)
115
+ root_desc = get_first_paragraph(
116
+ app, app.config.root_doc, app.config.llms_description_length
117
+ )
118
+
119
+ manifest = _DocsManifest(
120
+ name=app.config.project,
121
+ url=site_url.rstrip("/"),
122
+ description=root_desc,
123
+ sourceRepository=source_repo,
124
+ agentEntrypoints=_AgentEntrypoints(
125
+ manifest="/" + app.config.llms_json_filename,
126
+ llms="/" + app.config.llms_txt_filename,
127
+ llmsFull="/" + app.config.llms_full_filename,
128
+ ),
129
+ pages=pages,
130
+ )
131
+
132
+ output = pathlib.Path(app.outdir) / app.config.llms_json_filename
133
+ output.write_text(
134
+ json.dumps(manifest, indent=2, ensure_ascii=False) + "\n",
135
+ encoding="utf-8",
136
+ )
137
+ logger.info(
138
+ "sphinx-gp-llms: %s generated at %s",
139
+ app.config.llms_json_filename,
140
+ output,
141
+ type="llms",
142
+ subtype="information",
143
+ )
144
+
145
+
146
+ def _extract_headings(app: Sphinx, docname: str) -> list[_Heading]:
147
+ """Extract heading id/level/text from the table-of-contents tree."""
148
+ toc = app.env.tocs.get(docname)
149
+ if toc is None:
150
+ return []
151
+ headings: list[_Heading] = []
152
+ _walk_toc(toc, level=1, headings=headings)
153
+ return headings
154
+
155
+
156
+ def _walk_toc(
157
+ node: nodes.Node,
158
+ level: int,
159
+ headings: list[_Heading],
160
+ ) -> None:
161
+ """Recursively walk a toc bullet_list, collecting headings."""
162
+ if isinstance(node, nodes.bullet_list):
163
+ for item in node.children:
164
+ _walk_toc(item, level, headings)
165
+ elif isinstance(node, nodes.list_item):
166
+ for child in node.children:
167
+ if isinstance(child, addnodes.compact_paragraph):
168
+ for ref in child.findall(nodes.reference):
169
+ anchor = ref.get("anchorname", "")
170
+ text = ref.astext()
171
+ heading_id = anchor.lstrip("#") if anchor else ""
172
+ if text:
173
+ headings.append(_Heading(id=heading_id, level=level, text=text))
174
+ elif isinstance(child, nodes.bullet_list):
175
+ _walk_toc(child, level + 1, headings)
176
+
177
+
178
+ def _get_source_repository(app: Sphinx) -> str:
179
+ """Read source_repository from theme options."""
180
+ theme_opts = getattr(app.config, "html_theme_options", None)
181
+ if isinstance(theme_opts, dict):
182
+ return str(theme_opts.get("source_repository", ""))
183
+ return ""
184
+
185
+
186
+ def _is_excluded(uri: str, patterns: list[str]) -> bool:
187
+ """Return True when *uri* matches any fnmatch pattern."""
188
+ return any(fnmatch.fnmatch(uri, p) for p in patterns)
@@ -0,0 +1,84 @@
1
+ """Generate ``llms-full.txt`` — concatenated full-content Markdown.
2
+
3
+ Community convention adopted by Anthropic, Cloudflare, Mintlify, and
4
+ GitBook. Each page's source content is included under a title header
5
+ with a source URL reference, separated by ``---`` dividers.
6
+
7
+ Examples
8
+ --------
9
+ >>> from sphinx_gp_llms._llms_full_txt import write_llms_full_txt
10
+ >>> callable(write_llms_full_txt)
11
+ True
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import fnmatch
17
+ import pathlib
18
+ import typing as t
19
+
20
+ from sphinx.util.logging import getLogger
21
+
22
+ if t.TYPE_CHECKING:
23
+ from sphinx.application import Sphinx
24
+
25
+ logger = getLogger(__name__)
26
+
27
+
28
+ def write_llms_full_txt(app: Sphinx, site_url: str) -> None:
29
+ """Write ``llms-full.txt`` to the build output directory.
30
+
31
+ Parameters
32
+ ----------
33
+ app : Sphinx
34
+ Sphinx application instance.
35
+ site_url : str
36
+ Normalized site base URL with trailing slash.
37
+
38
+ Examples
39
+ --------
40
+ >>> from sphinx_gp_llms._llms_full_txt import write_llms_full_txt
41
+ >>> callable(write_llms_full_txt)
42
+ True
43
+ """
44
+ excludes: list[str] = list(app.config.llms_excludes)
45
+ parts: list[str] = []
46
+
47
+ for docname in sorted(app.env.found_docs):
48
+ uri = app.builder.get_target_uri(docname)
49
+ if _is_excluded(uri, excludes):
50
+ continue
51
+
52
+ title_node = app.env.titles.get(docname)
53
+ title = title_node.astext() if title_node is not None else docname
54
+ url = site_url + uri
55
+ source_path = pathlib.Path(app.env.doc2path(docname))
56
+
57
+ parts.append(f"# {title}")
58
+ parts.append(f"Source: {url}")
59
+ parts.append("")
60
+
61
+ try:
62
+ content = source_path.read_text(encoding="utf-8")
63
+ parts.append(content.rstrip())
64
+ except (OSError, UnicodeDecodeError):
65
+ parts.append(f"(source not available for {docname})")
66
+
67
+ parts.append("")
68
+ parts.append("---")
69
+ parts.append("")
70
+
71
+ output = pathlib.Path(app.outdir) / app.config.llms_full_filename
72
+ output.write_text("\n".join(parts), encoding="utf-8")
73
+ logger.info(
74
+ "sphinx-gp-llms: %s generated at %s",
75
+ app.config.llms_full_filename,
76
+ output,
77
+ type="llms",
78
+ subtype="information",
79
+ )
80
+
81
+
82
+ def _is_excluded(uri: str, patterns: list[str]) -> bool:
83
+ """Return True when *uri* matches any fnmatch pattern."""
84
+ return any(fnmatch.fnmatch(uri, p) for p in patterns)
@@ -0,0 +1,96 @@
1
+ """Generate ``llms.txt`` — a structured Markdown index for LLM agents.
2
+
3
+ Follows the specification at https://llmstxt.org/ (Jeremy Howard,
4
+ Answer.AI, September 2024). The file uses H1 for the project name,
5
+ a blockquote summary, and H2 sections of bulleted ``[title](url)``
6
+ links grouped by toctree caption.
7
+
8
+ Examples
9
+ --------
10
+ >>> from sphinx_gp_llms._llms_txt import write_llms_txt
11
+ >>> callable(write_llms_txt)
12
+ True
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import fnmatch
18
+ import pathlib
19
+ import typing as t
20
+
21
+ from sphinx.util.logging import getLogger
22
+
23
+ from sphinx_gp_llms._description import get_first_paragraph
24
+ from sphinx_gp_llms._toctree import extract_toctree_sections
25
+
26
+ if t.TYPE_CHECKING:
27
+ from sphinx.application import Sphinx
28
+
29
+ logger = getLogger(__name__)
30
+
31
+
32
+ def write_llms_txt(app: Sphinx, site_url: str) -> None:
33
+ """Write ``llms.txt`` to the build output directory.
34
+
35
+ Parameters
36
+ ----------
37
+ app : Sphinx
38
+ Sphinx application instance.
39
+ site_url : str
40
+ Normalized site base URL with trailing slash.
41
+
42
+ Examples
43
+ --------
44
+ >>> from sphinx_gp_llms._llms_txt import write_llms_txt
45
+ >>> callable(write_llms_txt)
46
+ True
47
+ """
48
+ excludes: list[str] = list(app.config.llms_excludes)
49
+ sections = extract_toctree_sections(app)
50
+ lines: list[str] = []
51
+
52
+ lines.append(f"# {app.config.project}")
53
+ lines.append("")
54
+
55
+ max_len: int = app.config.llms_description_length
56
+ desc = get_first_paragraph(app, app.config.root_doc, max_len)
57
+ if desc:
58
+ lines.append(f"> {desc}")
59
+ lines.append("")
60
+
61
+ for section in sections:
62
+ section_name = section.caption or "Documentation"
63
+ lines.append(f"## {section_name}")
64
+ lines.append("")
65
+ for docname in section.docnames:
66
+ uri = app.builder.get_target_uri(docname)
67
+ if _is_excluded(uri, excludes):
68
+ continue
69
+ title_node = app.env.titles.get(docname)
70
+ if title_node is None:
71
+ continue
72
+ title = title_node.astext()
73
+ url = site_url + uri
74
+ page_desc = get_first_paragraph(
75
+ app, docname, app.config.llms_description_length
76
+ )
77
+ entry = f"- [{title}]({url})"
78
+ if page_desc:
79
+ entry += f": {page_desc}"
80
+ lines.append(entry)
81
+ lines.append("")
82
+
83
+ output = pathlib.Path(app.outdir) / app.config.llms_txt_filename
84
+ output.write_text("\n".join(lines), encoding="utf-8")
85
+ logger.info(
86
+ "sphinx-gp-llms: %s generated at %s",
87
+ app.config.llms_txt_filename,
88
+ output,
89
+ type="llms",
90
+ subtype="information",
91
+ )
92
+
93
+
94
+ def _is_excluded(uri: str, patterns: list[str]) -> bool:
95
+ """Return True when *uri* matches any fnmatch pattern."""
96
+ return any(fnmatch.fnmatch(uri, p) for p in patterns)
@@ -0,0 +1,72 @@
1
+ """Generate per-page ``.md`` twin files alongside HTML output.
2
+
3
+ Implements the per-page Markdown endpoint convention popularized by
4
+ Mintlify, Cloudflare ("Markdown for Agents"), Stripe, and Vercel.
5
+ Each HTML page at ``/path/page.html`` gets a Markdown sibling at
6
+ ``/path/page.md`` containing the original source content.
7
+
8
+ Examples
9
+ --------
10
+ >>> from sphinx_gp_llms._md_twins import write_md_twins
11
+ >>> callable(write_md_twins)
12
+ True
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import fnmatch
18
+ import pathlib
19
+ import shutil
20
+ import typing as t
21
+
22
+ from sphinx.util.logging import getLogger
23
+
24
+ if t.TYPE_CHECKING:
25
+ from sphinx.application import Sphinx
26
+
27
+ logger = getLogger(__name__)
28
+
29
+
30
+ def write_md_twins(app: Sphinx) -> None:
31
+ """Copy source files as ``.md`` siblings in the build output directory.
32
+
33
+ Parameters
34
+ ----------
35
+ app : Sphinx
36
+ Sphinx application instance.
37
+
38
+ Examples
39
+ --------
40
+ >>> from sphinx_gp_llms._md_twins import write_md_twins
41
+ >>> callable(write_md_twins)
42
+ True
43
+ """
44
+ excludes: list[str] = list(app.config.llms_excludes)
45
+ outdir = pathlib.Path(app.outdir)
46
+ count = 0
47
+
48
+ for docname in sorted(app.env.found_docs):
49
+ uri = app.builder.get_target_uri(docname)
50
+ if _is_excluded(uri, excludes):
51
+ continue
52
+
53
+ source_path = pathlib.Path(app.env.doc2path(docname))
54
+ if not source_path.exists():
55
+ continue
56
+
57
+ target = outdir / (docname + ".md")
58
+ target.parent.mkdir(parents=True, exist_ok=True)
59
+ shutil.copy2(source_path, target)
60
+ count += 1
61
+
62
+ logger.info(
63
+ "sphinx-gp-llms: %d .md twin files written",
64
+ count,
65
+ type="llms",
66
+ subtype="information",
67
+ )
68
+
69
+
70
+ def _is_excluded(uri: str, patterns: list[str]) -> bool:
71
+ """Return True when *uri* matches any fnmatch pattern."""
72
+ return any(fnmatch.fnmatch(uri, p) for p in patterns)
@@ -0,0 +1,79 @@
1
+ """Toctree section extraction for llms.txt grouping.
2
+
3
+ Walks the root document's doctree to find ``toctree`` directives and
4
+ their ``:caption:`` options, producing a flat list of sections suitable
5
+ for the H2-delimited structure of ``llms.txt``.
6
+
7
+ Examples
8
+ --------
9
+ >>> from sphinx_gp_llms._toctree import ToctreeSection
10
+ >>> s = ToctreeSection(caption="Guide", docnames=["quickstart"])
11
+ >>> s.caption
12
+ 'Guide'
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import typing as t
18
+
19
+ from sphinx import addnodes
20
+
21
+ if t.TYPE_CHECKING:
22
+ from sphinx.application import Sphinx
23
+
24
+
25
+ class ToctreeSection(t.NamedTuple):
26
+ """One section of pages grouped by toctree caption.
27
+
28
+ Examples
29
+ --------
30
+ >>> ToctreeSection(caption="API", docnames=["api/index"])
31
+ ToctreeSection(caption='API', docnames=['api/index'])
32
+ """
33
+
34
+ caption: str | None
35
+ docnames: list[str]
36
+
37
+
38
+ def extract_toctree_sections(app: Sphinx) -> list[ToctreeSection]:
39
+ """Walk the root document's toctree nodes and group pages by caption.
40
+
41
+ Parameters
42
+ ----------
43
+ app : Sphinx
44
+ Sphinx application instance (must have a built environment).
45
+
46
+ Returns
47
+ -------
48
+ list[ToctreeSection]
49
+ Sections in document order. Pages not referenced by any
50
+ toctree in the root document get a ``caption=None`` fallback
51
+ section at the end.
52
+
53
+ Examples
54
+ --------
55
+ >>> from sphinx_gp_llms._toctree import extract_toctree_sections
56
+ >>> callable(extract_toctree_sections)
57
+ True
58
+ """
59
+ root_doc = app.config.root_doc
60
+ doctree = app.env.get_doctree(root_doc)
61
+
62
+ sections: list[ToctreeSection] = []
63
+ assigned: set[str] = set()
64
+
65
+ for toctree_node in doctree.findall(addnodes.toctree):
66
+ caption = toctree_node.get("caption")
67
+ docnames: list[str] = []
68
+ for _title, docname in toctree_node["entries"]:
69
+ if docname and docname in app.env.found_docs and docname not in assigned:
70
+ docnames.append(docname)
71
+ assigned.add(docname)
72
+ if docnames:
73
+ sections.append(ToctreeSection(caption=caption, docnames=docnames))
74
+
75
+ remaining = sorted(app.env.found_docs - assigned - {root_doc})
76
+ if remaining:
77
+ sections.append(ToctreeSection(caption=None, docnames=remaining))
78
+
79
+ return sections
File without changes