studyctl 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- studyctl/__init__.py +3 -0
- studyctl/calendar.py +140 -0
- studyctl/cli/__init__.py +56 -0
- studyctl/cli/_config.py +128 -0
- studyctl/cli/_content.py +462 -0
- studyctl/cli/_lazy.py +35 -0
- studyctl/cli/_review.py +491 -0
- studyctl/cli/_schedule.py +125 -0
- studyctl/cli/_setup.py +164 -0
- studyctl/cli/_shared.py +83 -0
- studyctl/cli/_state.py +69 -0
- studyctl/cli/_sync.py +156 -0
- studyctl/cli/_web.py +228 -0
- studyctl/content/__init__.py +5 -0
- studyctl/content/markdown_converter.py +271 -0
- studyctl/content/models.py +31 -0
- studyctl/content/notebooklm_client.py +434 -0
- studyctl/content/splitter.py +159 -0
- studyctl/content/storage.py +105 -0
- studyctl/content/syllabus.py +416 -0
- studyctl/history.py +982 -0
- studyctl/maintenance.py +69 -0
- studyctl/mcp/__init__.py +1 -0
- studyctl/mcp/server.py +58 -0
- studyctl/mcp/tools.py +234 -0
- studyctl/pdf.py +89 -0
- studyctl/review_db.py +277 -0
- studyctl/review_loader.py +375 -0
- studyctl/scheduler.py +242 -0
- studyctl/services/__init__.py +6 -0
- studyctl/services/content.py +39 -0
- studyctl/services/review.py +127 -0
- studyctl/settings.py +367 -0
- studyctl/shared.py +425 -0
- studyctl/state.py +120 -0
- studyctl/sync.py +229 -0
- studyctl/tui/__main__.py +33 -0
- studyctl/tui/app.py +395 -0
- studyctl/tui/study_cards.py +396 -0
- studyctl/web/__init__.py +1 -0
- studyctl/web/app.py +68 -0
- studyctl/web/routes/__init__.py +1 -0
- studyctl/web/routes/artefacts.py +57 -0
- studyctl/web/routes/cards.py +86 -0
- studyctl/web/routes/courses.py +91 -0
- studyctl/web/routes/history.py +69 -0
- studyctl/web/server.py +260 -0
- studyctl/web/static/app.js +853 -0
- studyctl/web/static/icon-192.svg +4 -0
- studyctl/web/static/icon-512.svg +4 -0
- studyctl/web/static/index.html +50 -0
- studyctl/web/static/manifest.json +21 -0
- studyctl/web/static/style.css +657 -0
- studyctl/web/static/sw.js +14 -0
- studyctl-2.0.0.dist-info/METADATA +49 -0
- studyctl-2.0.0.dist-info/RECORD +58 -0
- studyctl-2.0.0.dist-info/WHEEL +4 -0
- studyctl-2.0.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,434 @@
|
|
|
1
|
+
"""NotebookLM integration module for uploading chapters and generating overviews."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
8
|
+
|
|
9
|
+
from studyctl.content.models import NotebookInfo, SourceInfo, UploadResult
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
from notebooklm import NotebookLMClient
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _import_notebooklm():
|
|
20
|
+
"""Lazy-import notebooklm-py, raising a clear error if not installed."""
|
|
21
|
+
try:
|
|
22
|
+
import notebooklm
|
|
23
|
+
except ImportError as exc:
|
|
24
|
+
raise ImportError(
|
|
25
|
+
"notebooklm-py is required for NotebookLM integration. "
|
|
26
|
+
"Install with: uv pip install notebooklm-py"
|
|
27
|
+
) from exc
|
|
28
|
+
return notebooklm
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
async def upload_chapters(
|
|
32
|
+
chapter_pdfs: list[Path],
|
|
33
|
+
book_name: str,
|
|
34
|
+
notebook_id: str | None = None,
|
|
35
|
+
) -> UploadResult:
|
|
36
|
+
"""Upload chapter PDFs to a NotebookLM notebook.
|
|
37
|
+
|
|
38
|
+
If no notebook_id is given, checks for an existing notebook with a
|
|
39
|
+
matching title before creating a new one.
|
|
40
|
+
"""
|
|
41
|
+
nlm = _import_notebooklm()
|
|
42
|
+
async with await nlm.NotebookLMClient.from_storage() as client:
|
|
43
|
+
if notebook_id:
|
|
44
|
+
nb_id = notebook_id
|
|
45
|
+
nb_title = book_name
|
|
46
|
+
logger.info("Using existing notebook: %s", nb_id)
|
|
47
|
+
else:
|
|
48
|
+
notebooks = await client.notebooks.list()
|
|
49
|
+
existing = next((nb for nb in notebooks if nb.title == book_name), None)
|
|
50
|
+
if existing:
|
|
51
|
+
nb_id = existing.id
|
|
52
|
+
nb_title = existing.title
|
|
53
|
+
logger.info("Found existing notebook: %s (%s)", nb_title, nb_id)
|
|
54
|
+
else:
|
|
55
|
+
notebook = await client.notebooks.create(title=book_name)
|
|
56
|
+
nb_id = notebook.id
|
|
57
|
+
nb_title = notebook.title
|
|
58
|
+
logger.info("Created notebook: %s (%s)", nb_title, nb_id)
|
|
59
|
+
|
|
60
|
+
for pdf_path in chapter_pdfs:
|
|
61
|
+
await client.sources.add_file(nb_id, pdf_path)
|
|
62
|
+
logger.info("Uploaded %s", pdf_path.name)
|
|
63
|
+
await asyncio.sleep(2)
|
|
64
|
+
|
|
65
|
+
return UploadResult(id=nb_id, title=nb_title, chapters=len(chapter_pdfs))
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
async def list_notebooks() -> list[NotebookInfo]:
|
|
69
|
+
"""List all NotebookLM notebooks with source counts."""
|
|
70
|
+
nlm = _import_notebooklm()
|
|
71
|
+
results: list[NotebookInfo] = []
|
|
72
|
+
async with await nlm.NotebookLMClient.from_storage() as client:
|
|
73
|
+
notebooks = await client.notebooks.list()
|
|
74
|
+
for nb in notebooks:
|
|
75
|
+
sources = await client.sources.list(nb.id)
|
|
76
|
+
results.append(NotebookInfo(id=nb.id, title=nb.title, sources_count=len(sources)))
|
|
77
|
+
return results
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
async def list_sources(notebook_id: str) -> list[SourceInfo]:
|
|
81
|
+
"""List all sources in a notebook."""
|
|
82
|
+
nlm = _import_notebooklm()
|
|
83
|
+
results: list[SourceInfo] = []
|
|
84
|
+
async with await nlm.NotebookLMClient.from_storage() as client:
|
|
85
|
+
sources = await client.sources.list(notebook_id)
|
|
86
|
+
for src in sources:
|
|
87
|
+
results.append(SourceInfo(id=src.id, title=src.title))
|
|
88
|
+
return results
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
MAX_RETRIES = 3
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
async def _request_chapter_artifact(
|
|
95
|
+
client: NotebookLMClient,
|
|
96
|
+
notebook_id: str,
|
|
97
|
+
label: str,
|
|
98
|
+
source_ids: list[str],
|
|
99
|
+
instructions: str,
|
|
100
|
+
) -> str:
|
|
101
|
+
"""Fire off a single chapter generation request. Returns task_id.
|
|
102
|
+
|
|
103
|
+
Raises:
|
|
104
|
+
RuntimeError: If the API returns a failed status (rate limit, quota, etc.)
|
|
105
|
+
"""
|
|
106
|
+
nlm = _import_notebooklm()
|
|
107
|
+
if label == "audio":
|
|
108
|
+
status = await client.artifacts.generate_audio(
|
|
109
|
+
notebook_id,
|
|
110
|
+
source_ids=source_ids,
|
|
111
|
+
instructions=instructions,
|
|
112
|
+
audio_format=nlm.AudioFormat.DEEP_DIVE,
|
|
113
|
+
)
|
|
114
|
+
elif label == "video":
|
|
115
|
+
status = await client.artifacts.generate_video(
|
|
116
|
+
notebook_id,
|
|
117
|
+
source_ids=source_ids,
|
|
118
|
+
instructions=instructions,
|
|
119
|
+
video_style=nlm.VideoStyle.WHITEBOARD,
|
|
120
|
+
)
|
|
121
|
+
else:
|
|
122
|
+
raise ValueError(f"Unknown artifact type: {label}")
|
|
123
|
+
|
|
124
|
+
if status.is_failed or not status.task_id:
|
|
125
|
+
error_msg = status.error or "unknown error"
|
|
126
|
+
error_code = status.error_code or ""
|
|
127
|
+
raise RuntimeError(
|
|
128
|
+
f"{label} generation rejected by API: {error_msg}"
|
|
129
|
+
+ (f" (code: {error_code})" if error_code else "")
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
return status.task_id
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
async def generate_for_chapters(
|
|
136
|
+
notebook_id: str,
|
|
137
|
+
chapter_range: tuple[int, int],
|
|
138
|
+
generate_audio: bool = True,
|
|
139
|
+
generate_video: bool = True,
|
|
140
|
+
timeout: int = 900,
|
|
141
|
+
) -> None:
|
|
142
|
+
"""Generate audio/video overviews for a chapter range.
|
|
143
|
+
|
|
144
|
+
Fires off requests concurrently, polls every 30s. Retries failed
|
|
145
|
+
artifacts up to MAX_RETRIES times.
|
|
146
|
+
"""
|
|
147
|
+
nlm = _import_notebooklm()
|
|
148
|
+
start, end = chapter_range
|
|
149
|
+
range_label = f"ch{start}-{end}"
|
|
150
|
+
|
|
151
|
+
async with await nlm.NotebookLMClient.from_storage() as client:
|
|
152
|
+
sources = await client.sources.list(notebook_id)
|
|
153
|
+
sources.sort(key=lambda s: s.title)
|
|
154
|
+
selected = sources[start - 1 : end]
|
|
155
|
+
|
|
156
|
+
if not selected:
|
|
157
|
+
logger.warning("No sources found in the specified range")
|
|
158
|
+
return
|
|
159
|
+
|
|
160
|
+
selected_ids = [s.id for s in selected]
|
|
161
|
+
logger.info(
|
|
162
|
+
"Generating for chapters %d-%d (%d sources): %s",
|
|
163
|
+
start,
|
|
164
|
+
end,
|
|
165
|
+
len(selected),
|
|
166
|
+
", ".join(s.title for s in selected),
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
tasks: dict[str, str] = {}
|
|
170
|
+
retries: dict[str, int] = {}
|
|
171
|
+
instructions = {
|
|
172
|
+
"audio": f"Create an engaging audio overview covering chapters {start} to {end}",
|
|
173
|
+
"video": f"Create a visual explainer covering chapters {start} to {end}",
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
for label, should_gen in [("audio", generate_audio), ("video", generate_video)]:
|
|
177
|
+
if not should_gen:
|
|
178
|
+
continue
|
|
179
|
+
retries[label] = 0
|
|
180
|
+
try:
|
|
181
|
+
logger.info("Requesting %s (%s)...", label, range_label)
|
|
182
|
+
tasks[label] = await _request_chapter_artifact(
|
|
183
|
+
client, notebook_id, label, selected_ids, instructions[label]
|
|
184
|
+
)
|
|
185
|
+
except Exception as e:
|
|
186
|
+
logger.error("Failed to request %s: %s", label, e)
|
|
187
|
+
|
|
188
|
+
pending = dict(tasks)
|
|
189
|
+
elapsed = 0
|
|
190
|
+
poll_interval = 30
|
|
191
|
+
|
|
192
|
+
logger.info(
|
|
193
|
+
"Timeout: %ds (%dmin), max retries: %d",
|
|
194
|
+
timeout,
|
|
195
|
+
timeout // 60,
|
|
196
|
+
MAX_RETRIES,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
while pending and elapsed < timeout:
|
|
200
|
+
await asyncio.sleep(poll_interval)
|
|
201
|
+
elapsed += poll_interval
|
|
202
|
+
|
|
203
|
+
for label, task_id in list(pending.items()):
|
|
204
|
+
try:
|
|
205
|
+
result = await client.artifacts.poll_status(notebook_id, task_id)
|
|
206
|
+
except Exception as e:
|
|
207
|
+
logger.warning("Poll error for %s: %s", label, e)
|
|
208
|
+
continue
|
|
209
|
+
|
|
210
|
+
if result.is_complete:
|
|
211
|
+
logger.info("%s ready (%s)", label.capitalize(), range_label)
|
|
212
|
+
del pending[label]
|
|
213
|
+
elif result.is_failed:
|
|
214
|
+
retries[label] += 1
|
|
215
|
+
if retries[label] <= MAX_RETRIES:
|
|
216
|
+
logger.warning(
|
|
217
|
+
"%s failed (%s) -- retrying (%d/%d)...",
|
|
218
|
+
label.capitalize(),
|
|
219
|
+
result.error or "unknown error",
|
|
220
|
+
retries[label],
|
|
221
|
+
MAX_RETRIES,
|
|
222
|
+
)
|
|
223
|
+
try:
|
|
224
|
+
pending[label] = await _request_chapter_artifact(
|
|
225
|
+
client,
|
|
226
|
+
notebook_id,
|
|
227
|
+
label,
|
|
228
|
+
selected_ids,
|
|
229
|
+
instructions[label],
|
|
230
|
+
)
|
|
231
|
+
except Exception as e:
|
|
232
|
+
logger.error("Retry failed: %s", e)
|
|
233
|
+
del pending[label]
|
|
234
|
+
else:
|
|
235
|
+
logger.error(
|
|
236
|
+
"%s failed after %d retries: %s",
|
|
237
|
+
label.capitalize(),
|
|
238
|
+
MAX_RETRIES,
|
|
239
|
+
result.error,
|
|
240
|
+
)
|
|
241
|
+
del pending[label]
|
|
242
|
+
else:
|
|
243
|
+
logger.debug("%s still generating (%ds elapsed)", label, elapsed)
|
|
244
|
+
|
|
245
|
+
for label in pending:
|
|
246
|
+
logger.error("%s timed out (%s)", label.capitalize(), range_label)
|
|
247
|
+
|
|
248
|
+
logger.info("Generation complete for %s", range_label)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
async def download_artifacts(
|
|
252
|
+
notebook_id: str,
|
|
253
|
+
output_dir: Path,
|
|
254
|
+
chapter_range: tuple[int, int] | None = None,
|
|
255
|
+
) -> None:
|
|
256
|
+
"""Download audio and video artifacts from a notebook.
|
|
257
|
+
|
|
258
|
+
If chapter_range is given, files are named by range (e.g. audio_ch1-3.mp3).
|
|
259
|
+
Otherwise, files are numbered sequentially.
|
|
260
|
+
"""
|
|
261
|
+
nlm = _import_notebooklm()
|
|
262
|
+
output_dir = output_dir.resolve()
|
|
263
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
264
|
+
|
|
265
|
+
async with await nlm.NotebookLMClient.from_storage() as client:
|
|
266
|
+
range_tag = f"_ch{chapter_range[0]}-{chapter_range[1]}" if chapter_range else ""
|
|
267
|
+
|
|
268
|
+
audios = await client.artifacts.list_audio(notebook_id)
|
|
269
|
+
for i, artifact in enumerate(audios, 1):
|
|
270
|
+
name = f"audio{range_tag}_{i:02d}.mp3"
|
|
271
|
+
path = str(output_dir / name)
|
|
272
|
+
await client.artifacts.download_audio(notebook_id, path, artifact_id=artifact.id)
|
|
273
|
+
logger.info("Downloaded %s", path)
|
|
274
|
+
|
|
275
|
+
videos = await client.artifacts.list_video(notebook_id)
|
|
276
|
+
for i, artifact in enumerate(videos, 1):
|
|
277
|
+
name = f"video{range_tag}_{i:02d}.mp4"
|
|
278
|
+
path = str(output_dir / name)
|
|
279
|
+
await client.artifacts.download_video(notebook_id, path, artifact_id=artifact.id)
|
|
280
|
+
logger.info("Downloaded %s", path)
|
|
281
|
+
|
|
282
|
+
logger.info("Files saved to %s", output_dir)
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
async def delete_notebook(notebook_id: str) -> None:
|
|
286
|
+
"""Delete a notebook and all its contents."""
|
|
287
|
+
nlm = _import_notebooklm()
|
|
288
|
+
async with await nlm.NotebookLMClient.from_storage() as client:
|
|
289
|
+
await client.notebooks.delete(notebook_id)
|
|
290
|
+
logger.info("Deleted notebook %s", notebook_id)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
async def delete_artifact(
|
|
294
|
+
client: NotebookLMClient,
|
|
295
|
+
notebook_id: str,
|
|
296
|
+
artifact_id: str,
|
|
297
|
+
) -> None:
|
|
298
|
+
"""Delete an artifact by ID. Best-effort, logs warning on failure."""
|
|
299
|
+
try:
|
|
300
|
+
await client.artifacts.delete(notebook_id, artifact_id)
|
|
301
|
+
logger.info("Deleted artifact %s", artifact_id)
|
|
302
|
+
except Exception as e:
|
|
303
|
+
logger.warning("Failed to delete artifact %s: %s", artifact_id, e)
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
async def download_episode_audio(
|
|
307
|
+
client: NotebookLMClient,
|
|
308
|
+
notebook_id: str,
|
|
309
|
+
artifact_id: str,
|
|
310
|
+
output_path: Path,
|
|
311
|
+
) -> None:
|
|
312
|
+
"""Download a single audio artifact to the specified path.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
client: An open NotebookLM client.
|
|
316
|
+
notebook_id: The notebook ID.
|
|
317
|
+
artifact_id: The audio artifact ID (same as task_id).
|
|
318
|
+
output_path: Full path to save the file (e.g. downloads/01-title.mp3).
|
|
319
|
+
"""
|
|
320
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
321
|
+
await client.artifacts.download_audio(notebook_id, str(output_path), artifact_id=artifact_id)
|
|
322
|
+
logger.info("Downloaded %s", output_path)
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
async def create_syllabus(
|
|
326
|
+
client: NotebookLMClient,
|
|
327
|
+
notebook_id: str,
|
|
328
|
+
prompt: str,
|
|
329
|
+
) -> str:
|
|
330
|
+
"""Send syllabus prompt to NotebookLM chat.
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
client: An open NotebookLM client.
|
|
334
|
+
notebook_id: The notebook ID.
|
|
335
|
+
prompt: The syllabus generation prompt.
|
|
336
|
+
|
|
337
|
+
Returns:
|
|
338
|
+
Raw AI response text.
|
|
339
|
+
"""
|
|
340
|
+
result = await client.chat.ask(notebook_id, prompt)
|
|
341
|
+
return result.answer
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def _build_instructions(episode_title: str, chapter_titles: list[str] | None) -> dict[str, str]:
|
|
345
|
+
"""Build scoped instructions referencing specific chapter titles."""
|
|
346
|
+
if chapter_titles:
|
|
347
|
+
ch_list = ", ".join(chapter_titles)
|
|
348
|
+
return {
|
|
349
|
+
"audio": (
|
|
350
|
+
f"Focus ONLY on these specific chapters: {ch_list}. "
|
|
351
|
+
f"Create an engaging audio deep-dive covering: {episode_title}. "
|
|
352
|
+
"Do not discuss content from other chapters."
|
|
353
|
+
),
|
|
354
|
+
"video": (
|
|
355
|
+
f"Focus ONLY on these specific chapters: {ch_list}. "
|
|
356
|
+
f"Create a visual explainer covering: {episode_title}. "
|
|
357
|
+
"Do not discuss content from other chapters."
|
|
358
|
+
),
|
|
359
|
+
}
|
|
360
|
+
return {
|
|
361
|
+
"audio": f"Create an engaging audio overview: {episode_title}",
|
|
362
|
+
"video": f"Create a visual explainer: {episode_title}",
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
async def start_chunk_generation(
|
|
367
|
+
client: NotebookLMClient,
|
|
368
|
+
notebook_id: str,
|
|
369
|
+
source_ids: list[str],
|
|
370
|
+
episode_title: str,
|
|
371
|
+
generate_audio: bool = True,
|
|
372
|
+
generate_video: bool = True,
|
|
373
|
+
chapter_titles: list[str] | None = None,
|
|
374
|
+
) -> dict[str, str]:
|
|
375
|
+
"""Fire off generation requests without polling. Returns {label: task_id}.
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
client: An open NotebookLM client.
|
|
379
|
+
notebook_id: The notebook ID.
|
|
380
|
+
source_ids: Source IDs for this chunk's chapters.
|
|
381
|
+
episode_title: Title for the episode.
|
|
382
|
+
generate_audio: Whether to generate audio.
|
|
383
|
+
generate_video: Whether to generate video.
|
|
384
|
+
chapter_titles: Actual chapter titles for scoped instructions.
|
|
385
|
+
|
|
386
|
+
Returns:
|
|
387
|
+
Mapping of label ("audio"/"video") -> task_id for started tasks.
|
|
388
|
+
"""
|
|
389
|
+
instructions = _build_instructions(episode_title, chapter_titles)
|
|
390
|
+
tasks: dict[str, str] = {}
|
|
391
|
+
for label, should_gen in [("audio", generate_audio), ("video", generate_video)]:
|
|
392
|
+
if not should_gen:
|
|
393
|
+
continue
|
|
394
|
+
try:
|
|
395
|
+
logger.info("Requesting %s for '%s'...", label, episode_title)
|
|
396
|
+
tasks[label] = await _request_chapter_artifact(
|
|
397
|
+
client, notebook_id, label, source_ids, instructions[label]
|
|
398
|
+
)
|
|
399
|
+
except Exception as e:
|
|
400
|
+
logger.error("Failed to request %s: %s", label, e)
|
|
401
|
+
return tasks
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
async def poll_chunk_status(
|
|
405
|
+
client: NotebookLMClient,
|
|
406
|
+
notebook_id: str,
|
|
407
|
+
tasks: dict[str, str],
|
|
408
|
+
) -> dict[str, str]:
|
|
409
|
+
"""Single poll of artifact generation status. Returns {label: status_str}.
|
|
410
|
+
|
|
411
|
+
Args:
|
|
412
|
+
client: An open NotebookLM client.
|
|
413
|
+
notebook_id: The notebook ID.
|
|
414
|
+
tasks: Mapping of label -> task_id.
|
|
415
|
+
|
|
416
|
+
Returns:
|
|
417
|
+
Mapping of label -> status string ("completed", "failed", "in_progress", "pending").
|
|
418
|
+
"""
|
|
419
|
+
results: dict[str, str] = {}
|
|
420
|
+
for label, task_id in tasks.items():
|
|
421
|
+
try:
|
|
422
|
+
status = await client.artifacts.poll_status(notebook_id, task_id)
|
|
423
|
+
if status.is_complete:
|
|
424
|
+
results[label] = "completed"
|
|
425
|
+
elif status.is_failed:
|
|
426
|
+
results[label] = "failed"
|
|
427
|
+
elif status.is_in_progress:
|
|
428
|
+
results[label] = "in_progress"
|
|
429
|
+
else:
|
|
430
|
+
results[label] = "pending"
|
|
431
|
+
except Exception as e:
|
|
432
|
+
logger.warning("Poll error for %s: %s", label, e)
|
|
433
|
+
results[label] = "unknown"
|
|
434
|
+
return results
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""PDF splitting module using PyMuPDF.
|
|
2
|
+
|
|
3
|
+
Splits a PDF into per-chapter files based on its Table of Contents
|
|
4
|
+
bookmarks. Requires pymupdf (install via studyctl[content] extra).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
import re
|
|
11
|
+
from typing import TYPE_CHECKING
|
|
12
|
+
|
|
13
|
+
import pymupdf
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def sanitize_filename(name: str) -> str:
|
|
22
|
+
"""Clean a chapter title for use as a filename.
|
|
23
|
+
|
|
24
|
+
Removes special characters, replaces whitespace with underscores,
|
|
25
|
+
truncates to 80 characters, and lowercases the result.
|
|
26
|
+
"""
|
|
27
|
+
name = re.sub(r"[^\w\s-]", "", name)
|
|
28
|
+
name = re.sub(r"[\s]+", "_", name.strip())
|
|
29
|
+
return name[:80].strip("_").lower()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def split_pdf_by_chapters(
|
|
33
|
+
input_path: Path,
|
|
34
|
+
output_dir: Path,
|
|
35
|
+
book_name: str,
|
|
36
|
+
level: int = 1,
|
|
37
|
+
) -> list[Path]:
|
|
38
|
+
"""Split a PDF into per-chapter files based on its TOC bookmarks.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
input_path: Path to the source PDF.
|
|
42
|
+
output_dir: Directory to write chapter PDFs into.
|
|
43
|
+
book_name: Base name used in output filenames.
|
|
44
|
+
level: TOC depth level to split on (1 = top-level chapters).
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
List of paths to the generated chapter PDF files.
|
|
48
|
+
|
|
49
|
+
Raises:
|
|
50
|
+
ValueError: If the PDF contains no TOC / bookmarks.
|
|
51
|
+
"""
|
|
52
|
+
with pymupdf.open(input_path) as doc:
|
|
53
|
+
toc = doc.get_toc()
|
|
54
|
+
|
|
55
|
+
if not toc:
|
|
56
|
+
raise ValueError(
|
|
57
|
+
f"'{input_path.name}' has no bookmarks/TOC. Cannot split without chapter markers."
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# Filter to requested level entries: each entry is [level, title, page]
|
|
61
|
+
chapters = [(title, page) for lvl, title, page in toc if lvl == level]
|
|
62
|
+
|
|
63
|
+
if not chapters:
|
|
64
|
+
raise ValueError(
|
|
65
|
+
f"No TOC entries at level {level}. Available levels: {sorted({e[0] for e in toc})}"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
output_dir = output_dir.resolve()
|
|
69
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
70
|
+
total_pages = doc.page_count
|
|
71
|
+
output_paths: list[Path] = []
|
|
72
|
+
|
|
73
|
+
logger.info(
|
|
74
|
+
"Splitting '%s' into %d chapters (level %d)",
|
|
75
|
+
input_path.name,
|
|
76
|
+
len(chapters),
|
|
77
|
+
level,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
for i, (title, start_page) in enumerate(chapters):
|
|
81
|
+
start = start_page - 1 # TOC pages are 1-indexed
|
|
82
|
+
end = chapters[i + 1][1] - 2 if i + 1 < len(chapters) else total_pages - 1
|
|
83
|
+
|
|
84
|
+
safe_title = sanitize_filename(title)
|
|
85
|
+
filename = f"{book_name}_chapter_{i + 1:02d}_{safe_title}.pdf"
|
|
86
|
+
out_path = output_dir / filename
|
|
87
|
+
|
|
88
|
+
with pymupdf.open() as chapter_doc:
|
|
89
|
+
chapter_doc.insert_pdf(doc, from_page=start, to_page=end)
|
|
90
|
+
|
|
91
|
+
# Rebuild TOC for this chunk
|
|
92
|
+
chunk_toc = [
|
|
93
|
+
[lvl, t, p - start_page + 1] for lvl, t, p in toc if start_page <= p <= end + 1
|
|
94
|
+
]
|
|
95
|
+
if chunk_toc:
|
|
96
|
+
min_lvl = min(entry[0] for entry in chunk_toc)
|
|
97
|
+
if min_lvl > 1:
|
|
98
|
+
chunk_toc = [[lvl - min_lvl + 1, t, p] for lvl, t, p in chunk_toc]
|
|
99
|
+
chapter_doc.set_toc(chunk_toc)
|
|
100
|
+
|
|
101
|
+
chapter_doc.ez_save(str(out_path))
|
|
102
|
+
output_paths.append(out_path)
|
|
103
|
+
|
|
104
|
+
logger.info(
|
|
105
|
+
"Chapter %02d: %s (pages %d-%d)",
|
|
106
|
+
i + 1,
|
|
107
|
+
title,
|
|
108
|
+
start + 1,
|
|
109
|
+
end + 1,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
logger.info("%d files written to %s", len(output_paths), output_dir)
|
|
113
|
+
return output_paths
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def split_pdf_by_ranges(
|
|
117
|
+
input_path: Path,
|
|
118
|
+
output_dir: Path,
|
|
119
|
+
book_name: str,
|
|
120
|
+
ranges: str,
|
|
121
|
+
) -> list[Path]:
|
|
122
|
+
"""Split a PDF by explicit page ranges (for PDFs without TOC).
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
input_path: Path to the source PDF.
|
|
126
|
+
output_dir: Directory to write chapter PDFs into.
|
|
127
|
+
book_name: Base name used in output filenames.
|
|
128
|
+
ranges: Comma-separated page ranges, e.g. "1-30,31-60,61-90".
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
List of paths to the generated chapter PDF files.
|
|
132
|
+
"""
|
|
133
|
+
output_dir = output_dir.resolve()
|
|
134
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
135
|
+
output_paths: list[Path] = []
|
|
136
|
+
|
|
137
|
+
with pymupdf.open(input_path) as doc:
|
|
138
|
+
for i, range_str in enumerate(ranges.split(",")):
|
|
139
|
+
range_str = range_str.strip()
|
|
140
|
+
if "-" in range_str:
|
|
141
|
+
start_s, end_s = range_str.split("-", 1)
|
|
142
|
+
start = int(start_s) - 1
|
|
143
|
+
end = min(int(end_s) - 1, doc.page_count - 1)
|
|
144
|
+
else:
|
|
145
|
+
start = int(range_str) - 1
|
|
146
|
+
end = start
|
|
147
|
+
|
|
148
|
+
filename = f"{book_name}_part_{i + 1:02d}.pdf"
|
|
149
|
+
out_path = output_dir / filename
|
|
150
|
+
|
|
151
|
+
with pymupdf.open() as chapter_doc:
|
|
152
|
+
chapter_doc.insert_pdf(doc, from_page=start, to_page=end)
|
|
153
|
+
chapter_doc.ez_save(str(out_path))
|
|
154
|
+
|
|
155
|
+
output_paths.append(out_path)
|
|
156
|
+
logger.info("Part %02d: pages %d-%d", i + 1, start + 1, end + 1)
|
|
157
|
+
|
|
158
|
+
logger.info("%d files written to %s", len(output_paths), output_dir)
|
|
159
|
+
return output_paths
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Course-centric storage management.
|
|
2
|
+
|
|
3
|
+
Each course has a directory under ``content.base_path`` with a standard
|
|
4
|
+
subdirectory layout for chapters, audio, flashcards, quizzes, video,
|
|
5
|
+
and slides. A ``metadata.json`` file tracks notebook IDs, syllabus
|
|
6
|
+
state, and generation history.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import logging
|
|
13
|
+
import re
|
|
14
|
+
import tempfile
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
COURSE_SUBDIRS = ("chapters", "audio", "flashcards", "quizzes", "video", "slides")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_course_dir(base_path: Path, slug: str) -> Path:
|
|
23
|
+
"""Return course directory, creating subdirs if needed."""
|
|
24
|
+
course_dir = base_path / slug
|
|
25
|
+
for subdir in COURSE_SUBDIRS:
|
|
26
|
+
(course_dir / subdir).mkdir(parents=True, exist_ok=True)
|
|
27
|
+
return course_dir
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def slugify(title: str) -> str:
|
|
31
|
+
"""Convert a book/course title to a filesystem-safe slug."""
|
|
32
|
+
slug = title.lower().strip()
|
|
33
|
+
slug = re.sub(r"[^\w\s-]", "", slug)
|
|
34
|
+
slug = re.sub(r"[\s_]+", "-", slug)
|
|
35
|
+
return slug[:60].strip("-")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def list_courses(base_path: Path) -> list[dict]:
|
|
39
|
+
"""List all courses under the base path.
|
|
40
|
+
|
|
41
|
+
Returns a list of dicts with keys: slug, path, metadata.
|
|
42
|
+
"""
|
|
43
|
+
if not base_path.is_dir():
|
|
44
|
+
return []
|
|
45
|
+
|
|
46
|
+
courses = []
|
|
47
|
+
for child in sorted(base_path.iterdir()):
|
|
48
|
+
if not child.is_dir() or child.name.startswith("."):
|
|
49
|
+
continue
|
|
50
|
+
meta = load_course_metadata(child)
|
|
51
|
+
courses.append(
|
|
52
|
+
{
|
|
53
|
+
"slug": child.name,
|
|
54
|
+
"path": child,
|
|
55
|
+
"metadata": meta,
|
|
56
|
+
}
|
|
57
|
+
)
|
|
58
|
+
return courses
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def load_course_metadata(course_dir: Path) -> dict:
|
|
62
|
+
"""Load metadata.json (notebook IDs, syllabus state, generation history)."""
|
|
63
|
+
meta_path = course_dir / "metadata.json"
|
|
64
|
+
if not meta_path.exists():
|
|
65
|
+
return {}
|
|
66
|
+
try:
|
|
67
|
+
return json.loads(meta_path.read_text())
|
|
68
|
+
except (json.JSONDecodeError, OSError) as exc:
|
|
69
|
+
logger.warning("Failed to read %s: %s", meta_path, exc)
|
|
70
|
+
return {}
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def save_course_metadata(course_dir: Path, metadata: dict) -> None:
|
|
74
|
+
"""Save metadata.json atomically (write to .tmp, rename)."""
|
|
75
|
+
meta_path = course_dir / "metadata.json"
|
|
76
|
+
course_dir.mkdir(parents=True, exist_ok=True)
|
|
77
|
+
|
|
78
|
+
# Atomic write: write to temp file in same dir, then rename
|
|
79
|
+
fd, tmp_path = tempfile.mkstemp(dir=course_dir, prefix=".metadata-", suffix=".tmp")
|
|
80
|
+
try:
|
|
81
|
+
tmp = Path(tmp_path)
|
|
82
|
+
tmp.write_text(json.dumps(metadata, indent=2, default=str))
|
|
83
|
+
tmp.replace(meta_path)
|
|
84
|
+
except Exception:
|
|
85
|
+
Path(tmp_path).unlink(missing_ok=True)
|
|
86
|
+
raise
|
|
87
|
+
finally:
|
|
88
|
+
import os
|
|
89
|
+
|
|
90
|
+
os.close(fd)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def check_content_dependencies() -> list[str]:
|
|
94
|
+
"""Check pandoc, mmdc, typst availability.
|
|
95
|
+
|
|
96
|
+
Returns list of missing tools with install instructions.
|
|
97
|
+
"""
|
|
98
|
+
import shutil
|
|
99
|
+
|
|
100
|
+
missing = []
|
|
101
|
+
if not shutil.which("pandoc"):
|
|
102
|
+
missing.append("pandoc (install: brew install pandoc)")
|
|
103
|
+
if not shutil.which("mmdc"):
|
|
104
|
+
missing.append("mmdc / mermaid-cli (install: npm install -g @mermaid-js/mermaid-cli)")
|
|
105
|
+
return missing
|