studyctl 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- studyctl/__init__.py +3 -0
- studyctl/calendar.py +140 -0
- studyctl/cli/__init__.py +56 -0
- studyctl/cli/_config.py +128 -0
- studyctl/cli/_content.py +462 -0
- studyctl/cli/_lazy.py +35 -0
- studyctl/cli/_review.py +491 -0
- studyctl/cli/_schedule.py +125 -0
- studyctl/cli/_setup.py +164 -0
- studyctl/cli/_shared.py +83 -0
- studyctl/cli/_state.py +69 -0
- studyctl/cli/_sync.py +156 -0
- studyctl/cli/_web.py +228 -0
- studyctl/content/__init__.py +5 -0
- studyctl/content/markdown_converter.py +271 -0
- studyctl/content/models.py +31 -0
- studyctl/content/notebooklm_client.py +434 -0
- studyctl/content/splitter.py +159 -0
- studyctl/content/storage.py +105 -0
- studyctl/content/syllabus.py +416 -0
- studyctl/history.py +982 -0
- studyctl/maintenance.py +69 -0
- studyctl/mcp/__init__.py +1 -0
- studyctl/mcp/server.py +58 -0
- studyctl/mcp/tools.py +234 -0
- studyctl/pdf.py +89 -0
- studyctl/review_db.py +277 -0
- studyctl/review_loader.py +375 -0
- studyctl/scheduler.py +242 -0
- studyctl/services/__init__.py +6 -0
- studyctl/services/content.py +39 -0
- studyctl/services/review.py +127 -0
- studyctl/settings.py +367 -0
- studyctl/shared.py +425 -0
- studyctl/state.py +120 -0
- studyctl/sync.py +229 -0
- studyctl/tui/__main__.py +33 -0
- studyctl/tui/app.py +395 -0
- studyctl/tui/study_cards.py +396 -0
- studyctl/web/__init__.py +1 -0
- studyctl/web/app.py +68 -0
- studyctl/web/routes/__init__.py +1 -0
- studyctl/web/routes/artefacts.py +57 -0
- studyctl/web/routes/cards.py +86 -0
- studyctl/web/routes/courses.py +91 -0
- studyctl/web/routes/history.py +69 -0
- studyctl/web/server.py +260 -0
- studyctl/web/static/app.js +853 -0
- studyctl/web/static/icon-192.svg +4 -0
- studyctl/web/static/icon-512.svg +4 -0
- studyctl/web/static/index.html +50 -0
- studyctl/web/static/manifest.json +21 -0
- studyctl/web/static/style.css +657 -0
- studyctl/web/static/sw.js +14 -0
- studyctl-2.0.0.dist-info/METADATA +49 -0
- studyctl-2.0.0.dist-info/RECORD +58 -0
- studyctl-2.0.0.dist-info/WHEEL +4 -0
- studyctl-2.0.0.dist-info/entry_points.txt +3 -0
studyctl/cli/_web.py
ADDED
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
"""Web, TUI, and docs commands."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import click
|
|
8
|
+
from rich.table import Table
|
|
9
|
+
|
|
10
|
+
from studyctl.cli._shared import console
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _find_docs_dir() -> Path:
|
|
14
|
+
"""Find the docs directory relative to the package."""
|
|
15
|
+
candidate = Path(__file__).resolve().parent.parent
|
|
16
|
+
for _ in range(6):
|
|
17
|
+
if (candidate / "mkdocs.yml").exists():
|
|
18
|
+
return candidate / "docs"
|
|
19
|
+
candidate = candidate.parent
|
|
20
|
+
for p in [
|
|
21
|
+
Path.home() / "code" / "personal" / "tools" / "socratic-study-mentor" / "docs",
|
|
22
|
+
Path.home() / ".agents" / "shared",
|
|
23
|
+
]:
|
|
24
|
+
if p.exists():
|
|
25
|
+
return p
|
|
26
|
+
msg = "Could not find docs directory. Run from the repo or set STUDYCTL_DOCS_DIR."
|
|
27
|
+
raise click.ClickException(msg)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _strip_markdown(text: str) -> str:
|
|
31
|
+
"""Strip markdown formatting for TTS-friendly plain text."""
|
|
32
|
+
import re
|
|
33
|
+
|
|
34
|
+
text = re.sub(r"```[\s\S]*?```", "", text)
|
|
35
|
+
text = re.sub(r"`[^`]+`", "", text)
|
|
36
|
+
text = re.sub(r"^#{1,6}\s+", "", text, flags=re.MULTILINE)
|
|
37
|
+
text = re.sub(r"\*{1,3}([^*]+)\*{1,3}", r"\1", text)
|
|
38
|
+
text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text)
|
|
39
|
+
text = re.sub(r"<[^>]+>", "", text)
|
|
40
|
+
text = re.sub(r"^!!! \w+.*$", "", text, flags=re.MULTILINE)
|
|
41
|
+
text = re.sub(r"^\|.*\|$", "", text, flags=re.MULTILINE)
|
|
42
|
+
text = re.sub(r"^[-|: ]+$", "", text, flags=re.MULTILINE)
|
|
43
|
+
text = re.sub(r"\n{3,}", "\n\n", text)
|
|
44
|
+
return text.strip()
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@click.command()
|
|
48
|
+
@click.option("--port", "-p", default=8567, help="Port for web server")
|
|
49
|
+
@click.option("--lan", is_flag=True, help="Expose to LAN (default: localhost only)")
|
|
50
|
+
def web(port: int, lan: bool) -> None:
|
|
51
|
+
"""Launch the study PWA in your browser.
|
|
52
|
+
|
|
53
|
+
Serves flashcard and quiz review as a web app accessible from any
|
|
54
|
+
device on the network. Installable as a PWA (add to home screen).
|
|
55
|
+
Includes OpenDyslexic font toggle for accessibility.
|
|
56
|
+
|
|
57
|
+
Requires: uv pip install 'studyctl[web]'
|
|
58
|
+
"""
|
|
59
|
+
try:
|
|
60
|
+
import uvicorn
|
|
61
|
+
except ImportError:
|
|
62
|
+
console.print(
|
|
63
|
+
"[red]The web server requires FastAPI.[/red]\nInstall: uv pip install 'studyctl[web]'"
|
|
64
|
+
)
|
|
65
|
+
return
|
|
66
|
+
|
|
67
|
+
import yaml
|
|
68
|
+
|
|
69
|
+
config_path = Path.home() / ".config" / "studyctl" / "config.yaml"
|
|
70
|
+
study_dirs: list[str] = []
|
|
71
|
+
if config_path.exists():
|
|
72
|
+
try:
|
|
73
|
+
data = yaml.safe_load(config_path.read_text()) or {}
|
|
74
|
+
study_dirs = data.get("review", {}).get("directories", [])
|
|
75
|
+
except Exception:
|
|
76
|
+
pass
|
|
77
|
+
|
|
78
|
+
from studyctl.web.app import create_app
|
|
79
|
+
|
|
80
|
+
host = "0.0.0.0" if lan else "127.0.0.1"
|
|
81
|
+
app = create_app(study_dirs=study_dirs)
|
|
82
|
+
console.print(f"[bold]Study PWA at http://{host}:{port}[/bold]")
|
|
83
|
+
if not lan:
|
|
84
|
+
console.print("[dim]Use --lan to expose to network[/dim]")
|
|
85
|
+
uvicorn.run(app, host=host, port=port, workers=1, log_level="warning")
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@click.command()
|
|
89
|
+
def tui() -> None:
|
|
90
|
+
"""Launch the interactive terminal dashboard (requires textual).
|
|
91
|
+
|
|
92
|
+
Install: uv pip install 'studyctl[tui]'
|
|
93
|
+
|
|
94
|
+
Key bindings: f=flashcards, z=quiz, d=dashboard, q=quit, v=voice, o=OpenDyslexic
|
|
95
|
+
|
|
96
|
+
For a web-based UI accessible from any device, use: studyctl web
|
|
97
|
+
"""
|
|
98
|
+
try:
|
|
99
|
+
from studyctl.tui.app import StudyApp
|
|
100
|
+
except ImportError:
|
|
101
|
+
console.print(
|
|
102
|
+
"[red]The TUI requires 'textual'.[/red]\nInstall: uv pip install 'studyctl[tui]'"
|
|
103
|
+
)
|
|
104
|
+
return
|
|
105
|
+
|
|
106
|
+
import yaml
|
|
107
|
+
|
|
108
|
+
config_path = Path.home() / ".config" / "studyctl" / "config.yaml"
|
|
109
|
+
study_dirs: list[str] = []
|
|
110
|
+
theme: str = ""
|
|
111
|
+
dyslexic: bool = False
|
|
112
|
+
if config_path.exists():
|
|
113
|
+
try:
|
|
114
|
+
data = yaml.safe_load(config_path.read_text()) or {}
|
|
115
|
+
study_dirs = data.get("review", {}).get("directories", [])
|
|
116
|
+
tui_cfg = data.get("tui", {})
|
|
117
|
+
theme = tui_cfg.get("theme", "")
|
|
118
|
+
dyslexic = tui_cfg.get("dyslexic_friendly", False)
|
|
119
|
+
except Exception:
|
|
120
|
+
pass
|
|
121
|
+
|
|
122
|
+
app = StudyApp(
|
|
123
|
+
study_dirs=study_dirs,
|
|
124
|
+
theme_name=theme,
|
|
125
|
+
dyslexic_friendly=dyslexic,
|
|
126
|
+
)
|
|
127
|
+
app.run()
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# --- Docs commands ---
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@click.group(name="docs")
|
|
134
|
+
def docs_group() -> None:
|
|
135
|
+
"""Browse and read documentation."""
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
@docs_group.command(name="serve")
|
|
139
|
+
@click.option("--port", "-p", default=8000, help="Port for local server")
|
|
140
|
+
def docs_serve(port: int) -> None:
|
|
141
|
+
"""Serve documentation site locally and open in browser."""
|
|
142
|
+
import subprocess
|
|
143
|
+
|
|
144
|
+
repo_root = _find_docs_dir().parent
|
|
145
|
+
console.print(f"[bold]Serving docs at http://localhost:{port}[/bold]")
|
|
146
|
+
subprocess.run(["mkdocs", "serve", "-a", f"localhost:{port}"], cwd=str(repo_root), check=False)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@docs_group.command(name="open")
|
|
150
|
+
def docs_open() -> None:
|
|
151
|
+
"""Build and open documentation in browser."""
|
|
152
|
+
import subprocess
|
|
153
|
+
import webbrowser
|
|
154
|
+
|
|
155
|
+
repo_root = _find_docs_dir().parent
|
|
156
|
+
site_dir = repo_root / "site"
|
|
157
|
+
console.print("Building docs...")
|
|
158
|
+
subprocess.run(["mkdocs", "build"], cwd=str(repo_root), check=True, capture_output=True)
|
|
159
|
+
index = site_dir / "index.html"
|
|
160
|
+
if index.exists():
|
|
161
|
+
webbrowser.open(f"file://{index}")
|
|
162
|
+
console.print("[green]Opened docs in browser[/green]")
|
|
163
|
+
else:
|
|
164
|
+
console.print("[red]Build failed \u2014 site/index.html not found[/red]")
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@docs_group.command(name="list")
|
|
168
|
+
def docs_list() -> None:
|
|
169
|
+
"""List available documentation pages."""
|
|
170
|
+
docs_dir = _find_docs_dir()
|
|
171
|
+
table = Table(title="Documentation Pages")
|
|
172
|
+
table.add_column("Page", style="bold")
|
|
173
|
+
table.add_column("Title")
|
|
174
|
+
for md in sorted(docs_dir.glob("*.md")):
|
|
175
|
+
title = md.stem.replace("-", " ").title()
|
|
176
|
+
for line in md.read_text().splitlines():
|
|
177
|
+
if line.startswith("# "):
|
|
178
|
+
title = line[2:].strip()
|
|
179
|
+
break
|
|
180
|
+
table.add_row(md.stem, title)
|
|
181
|
+
console.print(table)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@docs_group.command(name="read")
|
|
185
|
+
@click.argument("page")
|
|
186
|
+
def docs_read(page: str) -> None:
|
|
187
|
+
"""Read a documentation page aloud using study-speak.
|
|
188
|
+
|
|
189
|
+
PAGE is the doc name without .md extension (e.g. 'voice-output', 'audhd-learning-philosophy').
|
|
190
|
+
Use 'studyctl docs list' to see available pages.
|
|
191
|
+
"""
|
|
192
|
+
import subprocess
|
|
193
|
+
|
|
194
|
+
docs_dir = _find_docs_dir()
|
|
195
|
+
md_file = docs_dir / f"{page}.md"
|
|
196
|
+
if not md_file.exists():
|
|
197
|
+
matches = [f for f in docs_dir.glob("*.md") if page.lower() in f.stem.lower()]
|
|
198
|
+
if len(matches) == 1:
|
|
199
|
+
md_file = matches[0]
|
|
200
|
+
else:
|
|
201
|
+
console.print(
|
|
202
|
+
f"[red]Page '{page}' not found.[/red] Run [bold]studyctl docs list[/bold]"
|
|
203
|
+
)
|
|
204
|
+
return
|
|
205
|
+
|
|
206
|
+
text = _strip_markdown(md_file.read_text())
|
|
207
|
+
if not text:
|
|
208
|
+
console.print("[yellow]Page is empty after stripping markdown.[/yellow]")
|
|
209
|
+
return
|
|
210
|
+
|
|
211
|
+
speak_bin = Path.home() / ".local" / "bin" / "study-speak"
|
|
212
|
+
if not speak_bin.exists():
|
|
213
|
+
console.print(
|
|
214
|
+
"[red]study-speak not installed.[/red]"
|
|
215
|
+
" Run: uv tool install './packages/agent-session-tools[tts]'"
|
|
216
|
+
)
|
|
217
|
+
return
|
|
218
|
+
|
|
219
|
+
console.print(f"[bold]\U0001f4d6 Reading: {md_file.stem}[/bold]")
|
|
220
|
+
console.print(f"[dim]({len(text.split())} words \u2014 press Ctrl+C to stop)[/dim]\n")
|
|
221
|
+
|
|
222
|
+
try:
|
|
223
|
+
subprocess.run([str(speak_bin), text], check=True, timeout=300)
|
|
224
|
+
console.print("\n[green]\u2713 Done reading[/green]")
|
|
225
|
+
except KeyboardInterrupt:
|
|
226
|
+
console.print("\n[yellow]Stopped reading[/yellow]")
|
|
227
|
+
except subprocess.TimeoutExpired:
|
|
228
|
+
console.print("\n[yellow]Reading timed out[/yellow]")
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
"""Markdown to PDF conversion with mermaid diagram rendering."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import re
|
|
7
|
+
import shutil
|
|
8
|
+
import subprocess
|
|
9
|
+
import tempfile
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
_WIKILINK_RE = re.compile(r"\[\[([^\]|]+)(?:\|([^\]]+))?\]\]")
|
|
15
|
+
_FRONTMATTER_RE = re.compile(r"^---\s*\n.*?\n---\s*\n", re.DOTALL)
|
|
16
|
+
_MERMAID_BLOCK_RE = re.compile(r"```mermaid\s*\n(.*?)```", re.DOTALL)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ConversionError(Exception):
|
|
20
|
+
"""Raised when markdown to PDF conversion fails."""
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def check_prerequisites() -> list[str]:
|
|
24
|
+
"""Check that pandoc and mmdc are installed.
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
List of missing tool names. Empty if all present.
|
|
28
|
+
"""
|
|
29
|
+
missing = []
|
|
30
|
+
if not shutil.which("pandoc"):
|
|
31
|
+
missing.append("pandoc")
|
|
32
|
+
if not shutil.which("mmdc"):
|
|
33
|
+
missing.append("mmdc (@mermaid-js/mermaid-cli)")
|
|
34
|
+
if not shutil.which("typst"):
|
|
35
|
+
missing.append("typst (brew install typst)")
|
|
36
|
+
return missing
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def preprocess_markdown(content: str) -> str:
|
|
40
|
+
"""Clean markdown for pandoc conversion.
|
|
41
|
+
|
|
42
|
+
Strips YAML frontmatter and converts Obsidian wikilinks to plain text.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
content: Raw markdown file content.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Cleaned markdown ready for pandoc.
|
|
49
|
+
"""
|
|
50
|
+
content = _FRONTMATTER_RE.sub("", content)
|
|
51
|
+
content = _WIKILINK_RE.sub(lambda m: m.group(2) or m.group(1), content)
|
|
52
|
+
return content
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# Matches unquoted node labels containing / (file paths), e.g. C[/home/user]
|
|
56
|
+
# but NOT already-quoted labels like C["some text"]
|
|
57
|
+
_UNQUOTED_PATH_NODE_RE = re.compile(r'\[(/[^\]"]+)\]')
|
|
58
|
+
|
|
59
|
+
# Matches <br/> or <br> in text (not valid in all mermaid contexts)
|
|
60
|
+
_HTML_BR_RE = re.compile(r"<br\s*/?>")
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _sanitize_mermaid(code: str) -> str:
|
|
64
|
+
"""Fix common mermaid syntax issues that cause parser failures.
|
|
65
|
+
|
|
66
|
+
Fixes:
|
|
67
|
+
- Unquoted node labels containing / (file paths) -> wraps in quotes
|
|
68
|
+
- <br/> tags in state diagram notes -> replaced with newline character
|
|
69
|
+
"""
|
|
70
|
+
# Wrap unquoted path-like node labels in quotes: [/home/user] -> ["/home/user"]
|
|
71
|
+
code = _UNQUOTED_PATH_NODE_RE.sub(lambda m: f'["{m.group(1)}"]', code)
|
|
72
|
+
|
|
73
|
+
# Replace <br/> with space -- special separators (|, /, \n) break state diagram notes
|
|
74
|
+
code = _HTML_BR_RE.sub(" ", code)
|
|
75
|
+
|
|
76
|
+
# In state diagram notes, colons after the initial "note ... :" break the parser.
|
|
77
|
+
# Strip extra colons from note body text.
|
|
78
|
+
def _fix_note_colons(m: re.Match) -> str:
|
|
79
|
+
prefix = m.group(1) # "note right of X: "
|
|
80
|
+
body = m.group(2)
|
|
81
|
+
return prefix + body.replace(":", " -")
|
|
82
|
+
|
|
83
|
+
code = re.sub(
|
|
84
|
+
r"(note\s+(?:right|left)\s+of\s+\w+\s*:\s*)(.*)",
|
|
85
|
+
_fix_note_colons,
|
|
86
|
+
code,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
return code
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _render_mermaid_to_png(mermaid_code: str, output_dir: Path, index: int) -> Path | None:
|
|
93
|
+
"""Render a mermaid diagram to PNG using mmdc.
|
|
94
|
+
|
|
95
|
+
Uses PNG (not SVG) because SVG foreignObject elements lose text
|
|
96
|
+
when converted to PDF by pandoc/typst.
|
|
97
|
+
"""
|
|
98
|
+
mermaid_code = _sanitize_mermaid(mermaid_code)
|
|
99
|
+
png_path = output_dir / f"mermaid_{index:03d}.png"
|
|
100
|
+
|
|
101
|
+
with tempfile.NamedTemporaryFile(
|
|
102
|
+
mode="w", suffix=".mmd", dir=str(output_dir), delete=False
|
|
103
|
+
) as f:
|
|
104
|
+
f.write(mermaid_code)
|
|
105
|
+
mmd_path = f.name
|
|
106
|
+
|
|
107
|
+
try:
|
|
108
|
+
result = subprocess.run(
|
|
109
|
+
["mmdc", "-i", mmd_path, "-o", str(png_path), "-b", "white", "-s", "2"],
|
|
110
|
+
capture_output=True,
|
|
111
|
+
text=True,
|
|
112
|
+
timeout=30,
|
|
113
|
+
)
|
|
114
|
+
if result.returncode != 0 or not png_path.exists():
|
|
115
|
+
logger.warning("mmdc failed for diagram %d: %s", index, result.stderr[:200])
|
|
116
|
+
return None
|
|
117
|
+
return png_path
|
|
118
|
+
except subprocess.TimeoutExpired:
|
|
119
|
+
logger.warning("mmdc timed out for diagram %d", index)
|
|
120
|
+
return None
|
|
121
|
+
finally:
|
|
122
|
+
Path(mmd_path).unlink(missing_ok=True)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def prerender_mermaid_diagrams(content: str, work_dir: Path) -> str:
|
|
126
|
+
"""Replace mermaid code blocks with rendered SVG image references.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
content: Markdown content with mermaid blocks.
|
|
130
|
+
work_dir: Directory for temporary SVG files.
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
Markdown with mermaid blocks replaced by image references.
|
|
134
|
+
"""
|
|
135
|
+
work_dir.mkdir(parents=True, exist_ok=True)
|
|
136
|
+
counter = 0
|
|
137
|
+
|
|
138
|
+
def _replace(match: re.Match) -> str:
|
|
139
|
+
nonlocal counter
|
|
140
|
+
counter += 1
|
|
141
|
+
mermaid_code = match.group(1)
|
|
142
|
+
png_path = _render_mermaid_to_png(mermaid_code, work_dir, counter)
|
|
143
|
+
if png_path:
|
|
144
|
+
return f""
|
|
145
|
+
return f"```\n{mermaid_code}```"
|
|
146
|
+
|
|
147
|
+
result = _MERMAID_BLOCK_RE.sub(_replace, content)
|
|
148
|
+
logger.debug("Rendered %d mermaid diagrams", counter)
|
|
149
|
+
return result
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def convert_markdown_to_pdf(
|
|
153
|
+
md_path: Path,
|
|
154
|
+
output_path: Path,
|
|
155
|
+
) -> Path:
|
|
156
|
+
"""Convert a markdown file to PDF with pre-rendered mermaid diagrams.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
md_path: Path to the source markdown file.
|
|
160
|
+
output_path: Path for the output PDF file.
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
Path to the generated PDF.
|
|
164
|
+
|
|
165
|
+
Raises:
|
|
166
|
+
ConversionError: If pandoc fails or prerequisites are missing.
|
|
167
|
+
"""
|
|
168
|
+
missing = check_prerequisites()
|
|
169
|
+
if missing:
|
|
170
|
+
raise ConversionError(
|
|
171
|
+
f"Missing prerequisites: {', '.join(missing)}. "
|
|
172
|
+
"Install with: brew install pandoc && npm install -g @mermaid-js/mermaid-cli"
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
raw_content = md_path.read_text(encoding="utf-8")
|
|
176
|
+
cleaned = preprocess_markdown(raw_content)
|
|
177
|
+
|
|
178
|
+
work_dir = output_path.parent / f".mermaid_{md_path.stem}"
|
|
179
|
+
cleaned = prerender_mermaid_diagrams(cleaned, work_dir)
|
|
180
|
+
|
|
181
|
+
temp_md = work_dir / f"{md_path.stem}_preprocessed.md"
|
|
182
|
+
try:
|
|
183
|
+
temp_md.write_text(cleaned, encoding="utf-8")
|
|
184
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
185
|
+
|
|
186
|
+
cmd = [
|
|
187
|
+
"pandoc",
|
|
188
|
+
str(temp_md),
|
|
189
|
+
"-o",
|
|
190
|
+
str(output_path),
|
|
191
|
+
"--pdf-engine=typst",
|
|
192
|
+
]
|
|
193
|
+
|
|
194
|
+
logger.debug("Running: %s", " ".join(cmd))
|
|
195
|
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=180)
|
|
196
|
+
|
|
197
|
+
if result.returncode != 0:
|
|
198
|
+
# Fallback to default engine (pdflatex) if typst fails
|
|
199
|
+
cmd_fallback = [
|
|
200
|
+
"pandoc",
|
|
201
|
+
str(temp_md),
|
|
202
|
+
"-o",
|
|
203
|
+
str(output_path),
|
|
204
|
+
"-V",
|
|
205
|
+
"geometry:margin=1in",
|
|
206
|
+
]
|
|
207
|
+
result = subprocess.run(cmd_fallback, capture_output=True, text=True, timeout=180)
|
|
208
|
+
if result.returncode != 0:
|
|
209
|
+
raise ConversionError(f"pandoc failed for {md_path.name}: {result.stderr[:500]}")
|
|
210
|
+
|
|
211
|
+
if not output_path.exists():
|
|
212
|
+
raise ConversionError(f"pandoc produced no output for {md_path.name}")
|
|
213
|
+
|
|
214
|
+
logger.info("Converted %s -> %s", md_path.name, output_path.name)
|
|
215
|
+
return output_path
|
|
216
|
+
|
|
217
|
+
finally:
|
|
218
|
+
if work_dir.exists():
|
|
219
|
+
shutil.rmtree(work_dir, ignore_errors=True)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def convert_directory(
|
|
223
|
+
source_dir: Path,
|
|
224
|
+
output_dir: Path,
|
|
225
|
+
) -> list[Path]:
|
|
226
|
+
"""Convert all markdown files in a directory to PDFs.
|
|
227
|
+
|
|
228
|
+
Files are sorted alphabetically and numbered sequentially.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
source_dir: Directory containing .md files.
|
|
232
|
+
output_dir: Directory to write PDFs into.
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
List of paths to generated PDF files, in order.
|
|
236
|
+
|
|
237
|
+
Raises:
|
|
238
|
+
ConversionError: If prerequisites are missing.
|
|
239
|
+
ValueError: If source_dir doesn't exist or has no .md files.
|
|
240
|
+
"""
|
|
241
|
+
if not source_dir.is_dir():
|
|
242
|
+
raise ValueError(f"Source directory does not exist: {source_dir}")
|
|
243
|
+
|
|
244
|
+
md_files = sorted(source_dir.glob("*.md"))
|
|
245
|
+
if not md_files:
|
|
246
|
+
raise ValueError(f"No .md files found in {source_dir}")
|
|
247
|
+
|
|
248
|
+
missing = check_prerequisites()
|
|
249
|
+
if missing:
|
|
250
|
+
raise ConversionError(
|
|
251
|
+
f"Missing prerequisites: {', '.join(missing)}. "
|
|
252
|
+
"Install with: brew install pandoc && npm install -g @mermaid-js/mermaid-cli"
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
pdf_dir = output_dir / "pdfs"
|
|
256
|
+
pdf_dir.mkdir(parents=True, exist_ok=True)
|
|
257
|
+
|
|
258
|
+
pdfs: list[Path] = []
|
|
259
|
+
for i, md_path in enumerate(md_files, 1):
|
|
260
|
+
stem = re.sub(r"-{2,}", "-", md_path.stem.lower().replace(" ", "_"))
|
|
261
|
+
pdf_name = f"{i:02d}-{stem}.pdf"
|
|
262
|
+
pdf_path = pdf_dir / pdf_name
|
|
263
|
+
|
|
264
|
+
try:
|
|
265
|
+
convert_markdown_to_pdf(md_path, pdf_path)
|
|
266
|
+
pdfs.append(pdf_path)
|
|
267
|
+
except ConversionError as exc:
|
|
268
|
+
logger.error("Failed to convert %s: %s", md_path.name, exc)
|
|
269
|
+
|
|
270
|
+
logger.info("Converted %d/%d files to %s", len(pdfs), len(md_files), pdf_dir)
|
|
271
|
+
return pdfs
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Shared data models for the content pipeline."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class UploadResult:
|
|
10
|
+
"""Result of uploading chapters to a notebook."""
|
|
11
|
+
|
|
12
|
+
id: str
|
|
13
|
+
title: str
|
|
14
|
+
chapters: int
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class NotebookInfo:
|
|
19
|
+
"""Summary of a NotebookLM notebook."""
|
|
20
|
+
|
|
21
|
+
id: str
|
|
22
|
+
title: str
|
|
23
|
+
sources_count: int
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class SourceInfo:
|
|
28
|
+
"""Summary of a source within a notebook."""
|
|
29
|
+
|
|
30
|
+
id: str
|
|
31
|
+
title: str
|