studyctl 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. studyctl/__init__.py +3 -0
  2. studyctl/calendar.py +140 -0
  3. studyctl/cli/__init__.py +56 -0
  4. studyctl/cli/_config.py +128 -0
  5. studyctl/cli/_content.py +462 -0
  6. studyctl/cli/_lazy.py +35 -0
  7. studyctl/cli/_review.py +491 -0
  8. studyctl/cli/_schedule.py +125 -0
  9. studyctl/cli/_setup.py +164 -0
  10. studyctl/cli/_shared.py +83 -0
  11. studyctl/cli/_state.py +69 -0
  12. studyctl/cli/_sync.py +156 -0
  13. studyctl/cli/_web.py +228 -0
  14. studyctl/content/__init__.py +5 -0
  15. studyctl/content/markdown_converter.py +271 -0
  16. studyctl/content/models.py +31 -0
  17. studyctl/content/notebooklm_client.py +434 -0
  18. studyctl/content/splitter.py +159 -0
  19. studyctl/content/storage.py +105 -0
  20. studyctl/content/syllabus.py +416 -0
  21. studyctl/history.py +982 -0
  22. studyctl/maintenance.py +69 -0
  23. studyctl/mcp/__init__.py +1 -0
  24. studyctl/mcp/server.py +58 -0
  25. studyctl/mcp/tools.py +234 -0
  26. studyctl/pdf.py +89 -0
  27. studyctl/review_db.py +277 -0
  28. studyctl/review_loader.py +375 -0
  29. studyctl/scheduler.py +242 -0
  30. studyctl/services/__init__.py +6 -0
  31. studyctl/services/content.py +39 -0
  32. studyctl/services/review.py +127 -0
  33. studyctl/settings.py +367 -0
  34. studyctl/shared.py +425 -0
  35. studyctl/state.py +120 -0
  36. studyctl/sync.py +229 -0
  37. studyctl/tui/__main__.py +33 -0
  38. studyctl/tui/app.py +395 -0
  39. studyctl/tui/study_cards.py +396 -0
  40. studyctl/web/__init__.py +1 -0
  41. studyctl/web/app.py +68 -0
  42. studyctl/web/routes/__init__.py +1 -0
  43. studyctl/web/routes/artefacts.py +57 -0
  44. studyctl/web/routes/cards.py +86 -0
  45. studyctl/web/routes/courses.py +91 -0
  46. studyctl/web/routes/history.py +69 -0
  47. studyctl/web/server.py +260 -0
  48. studyctl/web/static/app.js +853 -0
  49. studyctl/web/static/icon-192.svg +4 -0
  50. studyctl/web/static/icon-512.svg +4 -0
  51. studyctl/web/static/index.html +50 -0
  52. studyctl/web/static/manifest.json +21 -0
  53. studyctl/web/static/style.css +657 -0
  54. studyctl/web/static/sw.js +14 -0
  55. studyctl-2.0.0.dist-info/METADATA +49 -0
  56. studyctl-2.0.0.dist-info/RECORD +58 -0
  57. studyctl-2.0.0.dist-info/WHEEL +4 -0
  58. studyctl-2.0.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,434 @@
1
+ """NotebookLM integration module for uploading chapters and generating overviews."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ from typing import TYPE_CHECKING
8
+
9
+ from studyctl.content.models import NotebookInfo, SourceInfo, UploadResult
10
+
11
+ if TYPE_CHECKING:
12
+ from pathlib import Path
13
+
14
+ from notebooklm import NotebookLMClient
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ def _import_notebooklm():
20
+ """Lazy-import notebooklm-py, raising a clear error if not installed."""
21
+ try:
22
+ import notebooklm
23
+ except ImportError as exc:
24
+ raise ImportError(
25
+ "notebooklm-py is required for NotebookLM integration. "
26
+ "Install with: uv pip install notebooklm-py"
27
+ ) from exc
28
+ return notebooklm
29
+
30
+
31
+ async def upload_chapters(
32
+ chapter_pdfs: list[Path],
33
+ book_name: str,
34
+ notebook_id: str | None = None,
35
+ ) -> UploadResult:
36
+ """Upload chapter PDFs to a NotebookLM notebook.
37
+
38
+ If no notebook_id is given, checks for an existing notebook with a
39
+ matching title before creating a new one.
40
+ """
41
+ nlm = _import_notebooklm()
42
+ async with await nlm.NotebookLMClient.from_storage() as client:
43
+ if notebook_id:
44
+ nb_id = notebook_id
45
+ nb_title = book_name
46
+ logger.info("Using existing notebook: %s", nb_id)
47
+ else:
48
+ notebooks = await client.notebooks.list()
49
+ existing = next((nb for nb in notebooks if nb.title == book_name), None)
50
+ if existing:
51
+ nb_id = existing.id
52
+ nb_title = existing.title
53
+ logger.info("Found existing notebook: %s (%s)", nb_title, nb_id)
54
+ else:
55
+ notebook = await client.notebooks.create(title=book_name)
56
+ nb_id = notebook.id
57
+ nb_title = notebook.title
58
+ logger.info("Created notebook: %s (%s)", nb_title, nb_id)
59
+
60
+ for pdf_path in chapter_pdfs:
61
+ await client.sources.add_file(nb_id, pdf_path)
62
+ logger.info("Uploaded %s", pdf_path.name)
63
+ await asyncio.sleep(2)
64
+
65
+ return UploadResult(id=nb_id, title=nb_title, chapters=len(chapter_pdfs))
66
+
67
+
68
+ async def list_notebooks() -> list[NotebookInfo]:
69
+ """List all NotebookLM notebooks with source counts."""
70
+ nlm = _import_notebooklm()
71
+ results: list[NotebookInfo] = []
72
+ async with await nlm.NotebookLMClient.from_storage() as client:
73
+ notebooks = await client.notebooks.list()
74
+ for nb in notebooks:
75
+ sources = await client.sources.list(nb.id)
76
+ results.append(NotebookInfo(id=nb.id, title=nb.title, sources_count=len(sources)))
77
+ return results
78
+
79
+
80
+ async def list_sources(notebook_id: str) -> list[SourceInfo]:
81
+ """List all sources in a notebook."""
82
+ nlm = _import_notebooklm()
83
+ results: list[SourceInfo] = []
84
+ async with await nlm.NotebookLMClient.from_storage() as client:
85
+ sources = await client.sources.list(notebook_id)
86
+ for src in sources:
87
+ results.append(SourceInfo(id=src.id, title=src.title))
88
+ return results
89
+
90
+
91
+ MAX_RETRIES = 3
92
+
93
+
94
+ async def _request_chapter_artifact(
95
+ client: NotebookLMClient,
96
+ notebook_id: str,
97
+ label: str,
98
+ source_ids: list[str],
99
+ instructions: str,
100
+ ) -> str:
101
+ """Fire off a single chapter generation request. Returns task_id.
102
+
103
+ Raises:
104
+ RuntimeError: If the API returns a failed status (rate limit, quota, etc.)
105
+ """
106
+ nlm = _import_notebooklm()
107
+ if label == "audio":
108
+ status = await client.artifacts.generate_audio(
109
+ notebook_id,
110
+ source_ids=source_ids,
111
+ instructions=instructions,
112
+ audio_format=nlm.AudioFormat.DEEP_DIVE,
113
+ )
114
+ elif label == "video":
115
+ status = await client.artifacts.generate_video(
116
+ notebook_id,
117
+ source_ids=source_ids,
118
+ instructions=instructions,
119
+ video_style=nlm.VideoStyle.WHITEBOARD,
120
+ )
121
+ else:
122
+ raise ValueError(f"Unknown artifact type: {label}")
123
+
124
+ if status.is_failed or not status.task_id:
125
+ error_msg = status.error or "unknown error"
126
+ error_code = status.error_code or ""
127
+ raise RuntimeError(
128
+ f"{label} generation rejected by API: {error_msg}"
129
+ + (f" (code: {error_code})" if error_code else "")
130
+ )
131
+
132
+ return status.task_id
133
+
134
+
135
+ async def generate_for_chapters(
136
+ notebook_id: str,
137
+ chapter_range: tuple[int, int],
138
+ generate_audio: bool = True,
139
+ generate_video: bool = True,
140
+ timeout: int = 900,
141
+ ) -> None:
142
+ """Generate audio/video overviews for a chapter range.
143
+
144
+ Fires off requests concurrently, polls every 30s. Retries failed
145
+ artifacts up to MAX_RETRIES times.
146
+ """
147
+ nlm = _import_notebooklm()
148
+ start, end = chapter_range
149
+ range_label = f"ch{start}-{end}"
150
+
151
+ async with await nlm.NotebookLMClient.from_storage() as client:
152
+ sources = await client.sources.list(notebook_id)
153
+ sources.sort(key=lambda s: s.title)
154
+ selected = sources[start - 1 : end]
155
+
156
+ if not selected:
157
+ logger.warning("No sources found in the specified range")
158
+ return
159
+
160
+ selected_ids = [s.id for s in selected]
161
+ logger.info(
162
+ "Generating for chapters %d-%d (%d sources): %s",
163
+ start,
164
+ end,
165
+ len(selected),
166
+ ", ".join(s.title for s in selected),
167
+ )
168
+
169
+ tasks: dict[str, str] = {}
170
+ retries: dict[str, int] = {}
171
+ instructions = {
172
+ "audio": f"Create an engaging audio overview covering chapters {start} to {end}",
173
+ "video": f"Create a visual explainer covering chapters {start} to {end}",
174
+ }
175
+
176
+ for label, should_gen in [("audio", generate_audio), ("video", generate_video)]:
177
+ if not should_gen:
178
+ continue
179
+ retries[label] = 0
180
+ try:
181
+ logger.info("Requesting %s (%s)...", label, range_label)
182
+ tasks[label] = await _request_chapter_artifact(
183
+ client, notebook_id, label, selected_ids, instructions[label]
184
+ )
185
+ except Exception as e:
186
+ logger.error("Failed to request %s: %s", label, e)
187
+
188
+ pending = dict(tasks)
189
+ elapsed = 0
190
+ poll_interval = 30
191
+
192
+ logger.info(
193
+ "Timeout: %ds (%dmin), max retries: %d",
194
+ timeout,
195
+ timeout // 60,
196
+ MAX_RETRIES,
197
+ )
198
+
199
+ while pending and elapsed < timeout:
200
+ await asyncio.sleep(poll_interval)
201
+ elapsed += poll_interval
202
+
203
+ for label, task_id in list(pending.items()):
204
+ try:
205
+ result = await client.artifacts.poll_status(notebook_id, task_id)
206
+ except Exception as e:
207
+ logger.warning("Poll error for %s: %s", label, e)
208
+ continue
209
+
210
+ if result.is_complete:
211
+ logger.info("%s ready (%s)", label.capitalize(), range_label)
212
+ del pending[label]
213
+ elif result.is_failed:
214
+ retries[label] += 1
215
+ if retries[label] <= MAX_RETRIES:
216
+ logger.warning(
217
+ "%s failed (%s) -- retrying (%d/%d)...",
218
+ label.capitalize(),
219
+ result.error or "unknown error",
220
+ retries[label],
221
+ MAX_RETRIES,
222
+ )
223
+ try:
224
+ pending[label] = await _request_chapter_artifact(
225
+ client,
226
+ notebook_id,
227
+ label,
228
+ selected_ids,
229
+ instructions[label],
230
+ )
231
+ except Exception as e:
232
+ logger.error("Retry failed: %s", e)
233
+ del pending[label]
234
+ else:
235
+ logger.error(
236
+ "%s failed after %d retries: %s",
237
+ label.capitalize(),
238
+ MAX_RETRIES,
239
+ result.error,
240
+ )
241
+ del pending[label]
242
+ else:
243
+ logger.debug("%s still generating (%ds elapsed)", label, elapsed)
244
+
245
+ for label in pending:
246
+ logger.error("%s timed out (%s)", label.capitalize(), range_label)
247
+
248
+ logger.info("Generation complete for %s", range_label)
249
+
250
+
251
+ async def download_artifacts(
252
+ notebook_id: str,
253
+ output_dir: Path,
254
+ chapter_range: tuple[int, int] | None = None,
255
+ ) -> None:
256
+ """Download audio and video artifacts from a notebook.
257
+
258
+ If chapter_range is given, files are named by range (e.g. audio_ch1-3.mp3).
259
+ Otherwise, files are numbered sequentially.
260
+ """
261
+ nlm = _import_notebooklm()
262
+ output_dir = output_dir.resolve()
263
+ output_dir.mkdir(parents=True, exist_ok=True)
264
+
265
+ async with await nlm.NotebookLMClient.from_storage() as client:
266
+ range_tag = f"_ch{chapter_range[0]}-{chapter_range[1]}" if chapter_range else ""
267
+
268
+ audios = await client.artifacts.list_audio(notebook_id)
269
+ for i, artifact in enumerate(audios, 1):
270
+ name = f"audio{range_tag}_{i:02d}.mp3"
271
+ path = str(output_dir / name)
272
+ await client.artifacts.download_audio(notebook_id, path, artifact_id=artifact.id)
273
+ logger.info("Downloaded %s", path)
274
+
275
+ videos = await client.artifacts.list_video(notebook_id)
276
+ for i, artifact in enumerate(videos, 1):
277
+ name = f"video{range_tag}_{i:02d}.mp4"
278
+ path = str(output_dir / name)
279
+ await client.artifacts.download_video(notebook_id, path, artifact_id=artifact.id)
280
+ logger.info("Downloaded %s", path)
281
+
282
+ logger.info("Files saved to %s", output_dir)
283
+
284
+
285
+ async def delete_notebook(notebook_id: str) -> None:
286
+ """Delete a notebook and all its contents."""
287
+ nlm = _import_notebooklm()
288
+ async with await nlm.NotebookLMClient.from_storage() as client:
289
+ await client.notebooks.delete(notebook_id)
290
+ logger.info("Deleted notebook %s", notebook_id)
291
+
292
+
293
+ async def delete_artifact(
294
+ client: NotebookLMClient,
295
+ notebook_id: str,
296
+ artifact_id: str,
297
+ ) -> None:
298
+ """Delete an artifact by ID. Best-effort, logs warning on failure."""
299
+ try:
300
+ await client.artifacts.delete(notebook_id, artifact_id)
301
+ logger.info("Deleted artifact %s", artifact_id)
302
+ except Exception as e:
303
+ logger.warning("Failed to delete artifact %s: %s", artifact_id, e)
304
+
305
+
306
+ async def download_episode_audio(
307
+ client: NotebookLMClient,
308
+ notebook_id: str,
309
+ artifact_id: str,
310
+ output_path: Path,
311
+ ) -> None:
312
+ """Download a single audio artifact to the specified path.
313
+
314
+ Args:
315
+ client: An open NotebookLM client.
316
+ notebook_id: The notebook ID.
317
+ artifact_id: The audio artifact ID (same as task_id).
318
+ output_path: Full path to save the file (e.g. downloads/01-title.mp3).
319
+ """
320
+ output_path.parent.mkdir(parents=True, exist_ok=True)
321
+ await client.artifacts.download_audio(notebook_id, str(output_path), artifact_id=artifact_id)
322
+ logger.info("Downloaded %s", output_path)
323
+
324
+
325
+ async def create_syllabus(
326
+ client: NotebookLMClient,
327
+ notebook_id: str,
328
+ prompt: str,
329
+ ) -> str:
330
+ """Send syllabus prompt to NotebookLM chat.
331
+
332
+ Args:
333
+ client: An open NotebookLM client.
334
+ notebook_id: The notebook ID.
335
+ prompt: The syllabus generation prompt.
336
+
337
+ Returns:
338
+ Raw AI response text.
339
+ """
340
+ result = await client.chat.ask(notebook_id, prompt)
341
+ return result.answer
342
+
343
+
344
+ def _build_instructions(episode_title: str, chapter_titles: list[str] | None) -> dict[str, str]:
345
+ """Build scoped instructions referencing specific chapter titles."""
346
+ if chapter_titles:
347
+ ch_list = ", ".join(chapter_titles)
348
+ return {
349
+ "audio": (
350
+ f"Focus ONLY on these specific chapters: {ch_list}. "
351
+ f"Create an engaging audio deep-dive covering: {episode_title}. "
352
+ "Do not discuss content from other chapters."
353
+ ),
354
+ "video": (
355
+ f"Focus ONLY on these specific chapters: {ch_list}. "
356
+ f"Create a visual explainer covering: {episode_title}. "
357
+ "Do not discuss content from other chapters."
358
+ ),
359
+ }
360
+ return {
361
+ "audio": f"Create an engaging audio overview: {episode_title}",
362
+ "video": f"Create a visual explainer: {episode_title}",
363
+ }
364
+
365
+
366
+ async def start_chunk_generation(
367
+ client: NotebookLMClient,
368
+ notebook_id: str,
369
+ source_ids: list[str],
370
+ episode_title: str,
371
+ generate_audio: bool = True,
372
+ generate_video: bool = True,
373
+ chapter_titles: list[str] | None = None,
374
+ ) -> dict[str, str]:
375
+ """Fire off generation requests without polling. Returns {label: task_id}.
376
+
377
+ Args:
378
+ client: An open NotebookLM client.
379
+ notebook_id: The notebook ID.
380
+ source_ids: Source IDs for this chunk's chapters.
381
+ episode_title: Title for the episode.
382
+ generate_audio: Whether to generate audio.
383
+ generate_video: Whether to generate video.
384
+ chapter_titles: Actual chapter titles for scoped instructions.
385
+
386
+ Returns:
387
+ Mapping of label ("audio"/"video") -> task_id for started tasks.
388
+ """
389
+ instructions = _build_instructions(episode_title, chapter_titles)
390
+ tasks: dict[str, str] = {}
391
+ for label, should_gen in [("audio", generate_audio), ("video", generate_video)]:
392
+ if not should_gen:
393
+ continue
394
+ try:
395
+ logger.info("Requesting %s for '%s'...", label, episode_title)
396
+ tasks[label] = await _request_chapter_artifact(
397
+ client, notebook_id, label, source_ids, instructions[label]
398
+ )
399
+ except Exception as e:
400
+ logger.error("Failed to request %s: %s", label, e)
401
+ return tasks
402
+
403
+
404
+ async def poll_chunk_status(
405
+ client: NotebookLMClient,
406
+ notebook_id: str,
407
+ tasks: dict[str, str],
408
+ ) -> dict[str, str]:
409
+ """Single poll of artifact generation status. Returns {label: status_str}.
410
+
411
+ Args:
412
+ client: An open NotebookLM client.
413
+ notebook_id: The notebook ID.
414
+ tasks: Mapping of label -> task_id.
415
+
416
+ Returns:
417
+ Mapping of label -> status string ("completed", "failed", "in_progress", "pending").
418
+ """
419
+ results: dict[str, str] = {}
420
+ for label, task_id in tasks.items():
421
+ try:
422
+ status = await client.artifacts.poll_status(notebook_id, task_id)
423
+ if status.is_complete:
424
+ results[label] = "completed"
425
+ elif status.is_failed:
426
+ results[label] = "failed"
427
+ elif status.is_in_progress:
428
+ results[label] = "in_progress"
429
+ else:
430
+ results[label] = "pending"
431
+ except Exception as e:
432
+ logger.warning("Poll error for %s: %s", label, e)
433
+ results[label] = "unknown"
434
+ return results
@@ -0,0 +1,159 @@
1
+ """PDF splitting module using PyMuPDF.
2
+
3
+ Splits a PDF into per-chapter files based on its Table of Contents
4
+ bookmarks. Requires pymupdf (install via studyctl[content] extra).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ import re
11
+ from typing import TYPE_CHECKING
12
+
13
+ import pymupdf
14
+
15
+ if TYPE_CHECKING:
16
+ from pathlib import Path
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ def sanitize_filename(name: str) -> str:
22
+ """Clean a chapter title for use as a filename.
23
+
24
+ Removes special characters, replaces whitespace with underscores,
25
+ truncates to 80 characters, and lowercases the result.
26
+ """
27
+ name = re.sub(r"[^\w\s-]", "", name)
28
+ name = re.sub(r"[\s]+", "_", name.strip())
29
+ return name[:80].strip("_").lower()
30
+
31
+
32
+ def split_pdf_by_chapters(
33
+ input_path: Path,
34
+ output_dir: Path,
35
+ book_name: str,
36
+ level: int = 1,
37
+ ) -> list[Path]:
38
+ """Split a PDF into per-chapter files based on its TOC bookmarks.
39
+
40
+ Args:
41
+ input_path: Path to the source PDF.
42
+ output_dir: Directory to write chapter PDFs into.
43
+ book_name: Base name used in output filenames.
44
+ level: TOC depth level to split on (1 = top-level chapters).
45
+
46
+ Returns:
47
+ List of paths to the generated chapter PDF files.
48
+
49
+ Raises:
50
+ ValueError: If the PDF contains no TOC / bookmarks.
51
+ """
52
+ with pymupdf.open(input_path) as doc:
53
+ toc = doc.get_toc()
54
+
55
+ if not toc:
56
+ raise ValueError(
57
+ f"'{input_path.name}' has no bookmarks/TOC. Cannot split without chapter markers."
58
+ )
59
+
60
+ # Filter to requested level entries: each entry is [level, title, page]
61
+ chapters = [(title, page) for lvl, title, page in toc if lvl == level]
62
+
63
+ if not chapters:
64
+ raise ValueError(
65
+ f"No TOC entries at level {level}. Available levels: {sorted({e[0] for e in toc})}"
66
+ )
67
+
68
+ output_dir = output_dir.resolve()
69
+ output_dir.mkdir(parents=True, exist_ok=True)
70
+ total_pages = doc.page_count
71
+ output_paths: list[Path] = []
72
+
73
+ logger.info(
74
+ "Splitting '%s' into %d chapters (level %d)",
75
+ input_path.name,
76
+ len(chapters),
77
+ level,
78
+ )
79
+
80
+ for i, (title, start_page) in enumerate(chapters):
81
+ start = start_page - 1 # TOC pages are 1-indexed
82
+ end = chapters[i + 1][1] - 2 if i + 1 < len(chapters) else total_pages - 1
83
+
84
+ safe_title = sanitize_filename(title)
85
+ filename = f"{book_name}_chapter_{i + 1:02d}_{safe_title}.pdf"
86
+ out_path = output_dir / filename
87
+
88
+ with pymupdf.open() as chapter_doc:
89
+ chapter_doc.insert_pdf(doc, from_page=start, to_page=end)
90
+
91
+ # Rebuild TOC for this chunk
92
+ chunk_toc = [
93
+ [lvl, t, p - start_page + 1] for lvl, t, p in toc if start_page <= p <= end + 1
94
+ ]
95
+ if chunk_toc:
96
+ min_lvl = min(entry[0] for entry in chunk_toc)
97
+ if min_lvl > 1:
98
+ chunk_toc = [[lvl - min_lvl + 1, t, p] for lvl, t, p in chunk_toc]
99
+ chapter_doc.set_toc(chunk_toc)
100
+
101
+ chapter_doc.ez_save(str(out_path))
102
+ output_paths.append(out_path)
103
+
104
+ logger.info(
105
+ "Chapter %02d: %s (pages %d-%d)",
106
+ i + 1,
107
+ title,
108
+ start + 1,
109
+ end + 1,
110
+ )
111
+
112
+ logger.info("%d files written to %s", len(output_paths), output_dir)
113
+ return output_paths
114
+
115
+
116
+ def split_pdf_by_ranges(
117
+ input_path: Path,
118
+ output_dir: Path,
119
+ book_name: str,
120
+ ranges: str,
121
+ ) -> list[Path]:
122
+ """Split a PDF by explicit page ranges (for PDFs without TOC).
123
+
124
+ Args:
125
+ input_path: Path to the source PDF.
126
+ output_dir: Directory to write chapter PDFs into.
127
+ book_name: Base name used in output filenames.
128
+ ranges: Comma-separated page ranges, e.g. "1-30,31-60,61-90".
129
+
130
+ Returns:
131
+ List of paths to the generated chapter PDF files.
132
+ """
133
+ output_dir = output_dir.resolve()
134
+ output_dir.mkdir(parents=True, exist_ok=True)
135
+ output_paths: list[Path] = []
136
+
137
+ with pymupdf.open(input_path) as doc:
138
+ for i, range_str in enumerate(ranges.split(",")):
139
+ range_str = range_str.strip()
140
+ if "-" in range_str:
141
+ start_s, end_s = range_str.split("-", 1)
142
+ start = int(start_s) - 1
143
+ end = min(int(end_s) - 1, doc.page_count - 1)
144
+ else:
145
+ start = int(range_str) - 1
146
+ end = start
147
+
148
+ filename = f"{book_name}_part_{i + 1:02d}.pdf"
149
+ out_path = output_dir / filename
150
+
151
+ with pymupdf.open() as chapter_doc:
152
+ chapter_doc.insert_pdf(doc, from_page=start, to_page=end)
153
+ chapter_doc.ez_save(str(out_path))
154
+
155
+ output_paths.append(out_path)
156
+ logger.info("Part %02d: pages %d-%d", i + 1, start + 1, end + 1)
157
+
158
+ logger.info("%d files written to %s", len(output_paths), output_dir)
159
+ return output_paths
@@ -0,0 +1,105 @@
1
+ """Course-centric storage management.
2
+
3
+ Each course has a directory under ``content.base_path`` with a standard
4
+ subdirectory layout for chapters, audio, flashcards, quizzes, video,
5
+ and slides. A ``metadata.json`` file tracks notebook IDs, syllabus
6
+ state, and generation history.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import logging
13
+ import re
14
+ import tempfile
15
+ from pathlib import Path
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ COURSE_SUBDIRS = ("chapters", "audio", "flashcards", "quizzes", "video", "slides")
20
+
21
+
22
+ def get_course_dir(base_path: Path, slug: str) -> Path:
23
+ """Return course directory, creating subdirs if needed."""
24
+ course_dir = base_path / slug
25
+ for subdir in COURSE_SUBDIRS:
26
+ (course_dir / subdir).mkdir(parents=True, exist_ok=True)
27
+ return course_dir
28
+
29
+
30
+ def slugify(title: str) -> str:
31
+ """Convert a book/course title to a filesystem-safe slug."""
32
+ slug = title.lower().strip()
33
+ slug = re.sub(r"[^\w\s-]", "", slug)
34
+ slug = re.sub(r"[\s_]+", "-", slug)
35
+ return slug[:60].strip("-")
36
+
37
+
38
+ def list_courses(base_path: Path) -> list[dict]:
39
+ """List all courses under the base path.
40
+
41
+ Returns a list of dicts with keys: slug, path, metadata.
42
+ """
43
+ if not base_path.is_dir():
44
+ return []
45
+
46
+ courses = []
47
+ for child in sorted(base_path.iterdir()):
48
+ if not child.is_dir() or child.name.startswith("."):
49
+ continue
50
+ meta = load_course_metadata(child)
51
+ courses.append(
52
+ {
53
+ "slug": child.name,
54
+ "path": child,
55
+ "metadata": meta,
56
+ }
57
+ )
58
+ return courses
59
+
60
+
61
+ def load_course_metadata(course_dir: Path) -> dict:
62
+ """Load metadata.json (notebook IDs, syllabus state, generation history)."""
63
+ meta_path = course_dir / "metadata.json"
64
+ if not meta_path.exists():
65
+ return {}
66
+ try:
67
+ return json.loads(meta_path.read_text())
68
+ except (json.JSONDecodeError, OSError) as exc:
69
+ logger.warning("Failed to read %s: %s", meta_path, exc)
70
+ return {}
71
+
72
+
73
+ def save_course_metadata(course_dir: Path, metadata: dict) -> None:
74
+ """Save metadata.json atomically (write to .tmp, rename)."""
75
+ meta_path = course_dir / "metadata.json"
76
+ course_dir.mkdir(parents=True, exist_ok=True)
77
+
78
+ # Atomic write: write to temp file in same dir, then rename
79
+ fd, tmp_path = tempfile.mkstemp(dir=course_dir, prefix=".metadata-", suffix=".tmp")
80
+ try:
81
+ tmp = Path(tmp_path)
82
+ tmp.write_text(json.dumps(metadata, indent=2, default=str))
83
+ tmp.replace(meta_path)
84
+ except Exception:
85
+ Path(tmp_path).unlink(missing_ok=True)
86
+ raise
87
+ finally:
88
+ import os
89
+
90
+ os.close(fd)
91
+
92
+
93
+ def check_content_dependencies() -> list[str]:
94
+ """Check pandoc, mmdc, typst availability.
95
+
96
+ Returns list of missing tools with install instructions.
97
+ """
98
+ import shutil
99
+
100
+ missing = []
101
+ if not shutil.which("pandoc"):
102
+ missing.append("pandoc (install: brew install pandoc)")
103
+ if not shutil.which("mmdc"):
104
+ missing.append("mmdc / mermaid-cli (install: npm install -g @mermaid-js/mermaid-cli)")
105
+ return missing