yt-instruct 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- yt_instruct/__init__.py +3 -0
- yt_instruct/cli.py +415 -0
- yt_instruct/downloader.py +105 -0
- yt_instruct/generator.py +226 -0
- yt_instruct/prompts/__init__.py +1 -0
- yt_instruct/prompts/adhd.md +29 -0
- yt_instruct/prompts/default.md +27 -0
- yt_instruct/prompts/ib copy.md +30 -0
- yt_instruct/prompts/ib.md +37 -0
- yt_instruct/prompts/lecture.md +29 -0
- yt_instruct/prompts/tutorial.md +29 -0
- yt_instruct/transcriber.py +30 -0
- yt_instruct/utils.py +34 -0
- yt_instruct-1.0.0.dist-info/METADATA +170 -0
- yt_instruct-1.0.0.dist-info/RECORD +18 -0
- yt_instruct-1.0.0.dist-info/WHEEL +5 -0
- yt_instruct-1.0.0.dist-info/entry_points.txt +2 -0
- yt_instruct-1.0.0.dist-info/top_level.txt +1 -0
yt_instruct/__init__.py
ADDED
yt_instruct/cli.py
ADDED
|
@@ -0,0 +1,415 @@
|
|
|
1
|
+
"""yt-instruct CLI — Download, transcribe, and generate instruction docs from YouTube videos."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
import tempfile
|
|
5
|
+
from datetime import date
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import click
|
|
9
|
+
|
|
10
|
+
from . import __version__
|
|
11
|
+
from .downloader import VideoInfo, download_audio, fetch_info, resolve_urls
|
|
12
|
+
from .generator import generate
|
|
13
|
+
from .transcriber import transcribe
|
|
14
|
+
from .utils import output_path, slugify
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _frontmatter(title: str, url: str, description: str, draft: bool) -> str:
|
|
18
|
+
safe_title = title.replace('"', '\\"')
|
|
19
|
+
safe_desc = description.replace('"', '\\"')
|
|
20
|
+
draft_str = "true" if draft else "false"
|
|
21
|
+
return (
|
|
22
|
+
"---\n"
|
|
23
|
+
f'title: "{safe_title}"\n'
|
|
24
|
+
f"url: {url}\n"
|
|
25
|
+
f'description: "{safe_desc}"\n'
|
|
26
|
+
f"date: {date.today().isoformat()}\n"
|
|
27
|
+
f"draft: {draft_str}\n"
|
|
28
|
+
"---\n\n"
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _transcript_cache_path(output_dir: Path, title: str) -> Path:
|
|
33
|
+
"""Predictable path for a cached transcript file (used by --keep and --resume)."""
|
|
34
|
+
return output_dir / f"{slugify(title)}_transcript.txt"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _resolve_input_file(path: Path, output_dir: Path) -> Path:
|
|
38
|
+
"""Resolve an input file path, falling back to output_dir if the file isn't found as-is."""
|
|
39
|
+
if path.exists():
|
|
40
|
+
return path
|
|
41
|
+
candidate = output_dir / path
|
|
42
|
+
if candidate.exists():
|
|
43
|
+
return candidate
|
|
44
|
+
raise click.BadParameter(f"File not found: {path} (also tried {candidate})")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@click.command()
|
|
48
|
+
@click.version_option(__version__, prog_name="yt-instruct")
|
|
49
|
+
@click.argument("urls", nargs=-1, metavar="URL...")
|
|
50
|
+
@click.option(
|
|
51
|
+
"--url-file",
|
|
52
|
+
type=click.Path(exists=True, dir_okay=False, path_type=Path),
|
|
53
|
+
default=None,
|
|
54
|
+
help="Text file with one YouTube URL per line.",
|
|
55
|
+
)
|
|
56
|
+
@click.option(
|
|
57
|
+
"--output-dir",
|
|
58
|
+
"-o",
|
|
59
|
+
type=click.Path(file_okay=False, path_type=Path),
|
|
60
|
+
default=Path("."),
|
|
61
|
+
show_default=True,
|
|
62
|
+
help="Directory to write output markdown files.",
|
|
63
|
+
)
|
|
64
|
+
@click.option(
|
|
65
|
+
"--keep",
|
|
66
|
+
is_flag=True,
|
|
67
|
+
default=False,
|
|
68
|
+
help="Keep intermediate audio and transcript files.",
|
|
69
|
+
)
|
|
70
|
+
@click.option(
|
|
71
|
+
"--merge",
|
|
72
|
+
is_flag=True,
|
|
73
|
+
default=False,
|
|
74
|
+
help="Merge all videos into a single output document.",
|
|
75
|
+
)
|
|
76
|
+
@click.option(
|
|
77
|
+
"--content-type",
|
|
78
|
+
type=click.Choice(["tutorial", "lecture", "ib", "auto"], case_sensitive=False),
|
|
79
|
+
default="auto",
|
|
80
|
+
show_default=True,
|
|
81
|
+
help="Prompt style to use for generation.",
|
|
82
|
+
)
|
|
83
|
+
@click.option(
|
|
84
|
+
"--backend",
|
|
85
|
+
type=click.Choice(["anthropic", "llm", "nvidia"], case_sensitive=False),
|
|
86
|
+
default="anthropic",
|
|
87
|
+
show_default=True,
|
|
88
|
+
help="LLM backend to use.",
|
|
89
|
+
)
|
|
90
|
+
@click.option(
|
|
91
|
+
"--model",
|
|
92
|
+
default="claude-sonnet-4-6",
|
|
93
|
+
show_default=True,
|
|
94
|
+
help="Model name (e.g. 'claude-sonnet-4-6' for anthropic/llm, 'moonshotai/kimi-k2-instruct' for nvidia).",
|
|
95
|
+
)
|
|
96
|
+
@click.option(
|
|
97
|
+
"--prompt-file",
|
|
98
|
+
type=click.Path(exists=True, dir_okay=False, path_type=Path),
|
|
99
|
+
default=None,
|
|
100
|
+
help="Custom system prompt file (overrides built-in prompts).",
|
|
101
|
+
)
|
|
102
|
+
@click.option(
|
|
103
|
+
"--mistral-model",
|
|
104
|
+
default="voxtral-mini-latest",
|
|
105
|
+
show_default=True,
|
|
106
|
+
help="Mistral transcription model.",
|
|
107
|
+
)
|
|
108
|
+
@click.option(
|
|
109
|
+
"--audio-format",
|
|
110
|
+
type=click.Choice(["mp3", "m4a"], case_sensitive=False),
|
|
111
|
+
default="mp3",
|
|
112
|
+
show_default=True,
|
|
113
|
+
help="Audio format for intermediate file.",
|
|
114
|
+
)
|
|
115
|
+
@click.option(
|
|
116
|
+
"--language",
|
|
117
|
+
default=None,
|
|
118
|
+
metavar="LANG",
|
|
119
|
+
help="Output language for the generated document (e.g. 'French', 'Spanish'). Defaults to English.",
|
|
120
|
+
)
|
|
121
|
+
@click.option(
|
|
122
|
+
"--transcript-file",
|
|
123
|
+
type=click.Path(dir_okay=False, path_type=Path),
|
|
124
|
+
default=None,
|
|
125
|
+
help="Use an existing transcript .txt file; skips download and transcription.",
|
|
126
|
+
)
|
|
127
|
+
@click.option(
|
|
128
|
+
"--audio-file",
|
|
129
|
+
type=click.Path(dir_okay=False, path_type=Path),
|
|
130
|
+
default=None,
|
|
131
|
+
help="Use an existing audio file (e.g. MP3); skips download, transcribes directly.",
|
|
132
|
+
)
|
|
133
|
+
@click.option(
|
|
134
|
+
"--title",
|
|
135
|
+
default=None,
|
|
136
|
+
help="Video title to use with --transcript-file or --audio-file (defaults to filename stem).",
|
|
137
|
+
)
|
|
138
|
+
@click.option(
|
|
139
|
+
"--draft",
|
|
140
|
+
is_flag=True,
|
|
141
|
+
default=False,
|
|
142
|
+
help="Set draft: true in the output frontmatter.",
|
|
143
|
+
)
|
|
144
|
+
@click.option(
|
|
145
|
+
"--resume",
|
|
146
|
+
is_flag=True,
|
|
147
|
+
default=False,
|
|
148
|
+
help=(
|
|
149
|
+
"Skip videos that already have a generated output file. "
|
|
150
|
+
"If a cached transcript (from --keep) exists, skips download and transcription too."
|
|
151
|
+
),
|
|
152
|
+
)
|
|
153
|
+
def cli(
|
|
154
|
+
urls,
|
|
155
|
+
url_file,
|
|
156
|
+
output_dir,
|
|
157
|
+
keep,
|
|
158
|
+
merge,
|
|
159
|
+
content_type,
|
|
160
|
+
backend,
|
|
161
|
+
model,
|
|
162
|
+
prompt_file,
|
|
163
|
+
mistral_model,
|
|
164
|
+
audio_format,
|
|
165
|
+
language,
|
|
166
|
+
transcript_file,
|
|
167
|
+
audio_file,
|
|
168
|
+
title,
|
|
169
|
+
draft,
|
|
170
|
+
resume,
|
|
171
|
+
):
|
|
172
|
+
"""Convert YouTube videos into structured markdown instruction documents.
|
|
173
|
+
|
|
174
|
+
Pipeline: URL → audio download (yt-dlp) → transcription (Mistral voxtral) →
|
|
175
|
+
LLM document generation (Anthropic / llm / NVIDIA). Each output file is
|
|
176
|
+
prefixed with YAML frontmatter (title, url, description, date, draft).
|
|
177
|
+
|
|
178
|
+
\b
|
|
179
|
+
REQUIRED ENVIRONMENT VARIABLES
|
|
180
|
+
MISTRAL_API_KEY Always required (transcription).
|
|
181
|
+
ANTHROPIC_API_KEY Required for --backend anthropic (default).
|
|
182
|
+
NVIDIA_API_KEY Required for --backend nvidia.
|
|
183
|
+
|
|
184
|
+
\b
|
|
185
|
+
CONTENT TYPES
|
|
186
|
+
auto LLM classifies the video and picks the best template (default).
|
|
187
|
+
tutorial Hands-on how-to guides with steps and code.
|
|
188
|
+
lecture Tech talks and academic presentations.
|
|
189
|
+
ib IB student revision notes.
|
|
190
|
+
|
|
191
|
+
\b
|
|
192
|
+
BACKENDS
|
|
193
|
+
anthropic Anthropic Python SDK (default model: claude-sonnet-4-6).
|
|
194
|
+
llm Simon Willison's llm CLI library.
|
|
195
|
+
nvidia NVIDIA NIM API via OpenAI-compatible endpoint.
|
|
196
|
+
|
|
197
|
+
\b
|
|
198
|
+
FILE RESOLUTION FOR --audio-file AND --transcript-file
|
|
199
|
+
If the given path does not exist, it is looked up inside --output-dir.
|
|
200
|
+
Example: --audio-file recording.mp3 --output-dir ./docs
|
|
201
|
+
resolves to ./docs/recording.mp3 if not found locally.
|
|
202
|
+
|
|
203
|
+
\b
|
|
204
|
+
EXAMPLES
|
|
205
|
+
yt-instruct https://youtu.be/dQw4w9WgXcQ
|
|
206
|
+
yt-instruct url1 url2 --output-dir ./docs
|
|
207
|
+
yt-instruct <URL> --content-type tutorial --backend llm
|
|
208
|
+
yt-instruct --url-file urls.txt --merge --output-dir ./docs
|
|
209
|
+
yt-instruct --transcript-file transcript.txt --title "My Video"
|
|
210
|
+
yt-instruct --audio-file recording.mp3 --output-dir ./docs
|
|
211
|
+
yt-instruct <URL> --backend nvidia --model moonshotai/kimi-k2-instruct
|
|
212
|
+
yt-instruct <URL> --language French
|
|
213
|
+
yt-instruct <URL> --draft
|
|
214
|
+
yt-instruct --url-file urls.txt --keep --output-dir ./docs
|
|
215
|
+
yt-instruct --url-file urls.txt --resume --output-dir ./docs
|
|
216
|
+
"""
|
|
217
|
+
# Fast path: transcript file provided — skip download and transcription
|
|
218
|
+
if transcript_file:
|
|
219
|
+
if urls or url_file or audio_file:
|
|
220
|
+
raise click.UsageError("--transcript-file cannot be combined with URLs, --url-file, or --audio-file.")
|
|
221
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
222
|
+
transcript_file = _resolve_input_file(transcript_file, output_dir)
|
|
223
|
+
transcript = transcript_file.read_text(encoding="utf-8").strip()
|
|
224
|
+
resolved_title = title or transcript_file.stem
|
|
225
|
+
video = VideoInfo(title=resolved_title, channel="", url="", duration=None, audio_path=None)
|
|
226
|
+
lang_note = f", language={language}" if language else ""
|
|
227
|
+
click.echo(f"\n[yt-instruct] Generating from transcript: {transcript_file.name} ({content_type}, backend={backend}{lang_note})")
|
|
228
|
+
try:
|
|
229
|
+
markdown = generate(
|
|
230
|
+
video=video,
|
|
231
|
+
transcript=transcript,
|
|
232
|
+
content_type=content_type,
|
|
233
|
+
backend=backend,
|
|
234
|
+
model=model,
|
|
235
|
+
prompt_file=prompt_file,
|
|
236
|
+
language=language,
|
|
237
|
+
)
|
|
238
|
+
except Exception as e:
|
|
239
|
+
click.echo(f" ERROR: {e}", err=True)
|
|
240
|
+
sys.exit(1)
|
|
241
|
+
out = output_path(output_dir, resolved_title)
|
|
242
|
+
out.write_text(_frontmatter(resolved_title, video.url, video.description, draft) + markdown, encoding="utf-8")
|
|
243
|
+
click.echo(f" Written: {out}")
|
|
244
|
+
return
|
|
245
|
+
|
|
246
|
+
# Fast path: audio file provided — skip download, transcribe directly
|
|
247
|
+
if audio_file:
|
|
248
|
+
if urls or url_file:
|
|
249
|
+
raise click.UsageError("--audio-file cannot be combined with URLs or --url-file.")
|
|
250
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
251
|
+
audio_file = _resolve_input_file(audio_file, output_dir)
|
|
252
|
+
resolved_title = title or audio_file.stem
|
|
253
|
+
video = VideoInfo(title=resolved_title, channel="", url="", duration=None, audio_path=audio_file)
|
|
254
|
+
lang_note = f", language={language}" if language else ""
|
|
255
|
+
click.echo(f"\n[yt-instruct] Transcribing audio file: {audio_file} ({content_type}, backend={backend}{lang_note})")
|
|
256
|
+
click.echo(" Transcribing...")
|
|
257
|
+
try:
|
|
258
|
+
transcript = transcribe(video.audio_path, mistral_model)
|
|
259
|
+
except Exception as e:
|
|
260
|
+
click.echo(f" ERROR transcribing: {e}", err=True)
|
|
261
|
+
sys.exit(1)
|
|
262
|
+
click.echo(f" Transcript: {len(transcript)} chars")
|
|
263
|
+
if keep:
|
|
264
|
+
_transcript_cache_path(output_dir, resolved_title).write_text(transcript, encoding="utf-8")
|
|
265
|
+
click.echo(f" Generating ({content_type}, backend={backend}{lang_note})...")
|
|
266
|
+
try:
|
|
267
|
+
markdown = generate(
|
|
268
|
+
video=video,
|
|
269
|
+
transcript=transcript,
|
|
270
|
+
content_type=content_type,
|
|
271
|
+
backend=backend,
|
|
272
|
+
model=model,
|
|
273
|
+
prompt_file=prompt_file,
|
|
274
|
+
language=language,
|
|
275
|
+
)
|
|
276
|
+
except Exception as e:
|
|
277
|
+
click.echo(f" ERROR generating: {e}", err=True)
|
|
278
|
+
sys.exit(1)
|
|
279
|
+
out = output_path(output_dir, resolved_title)
|
|
280
|
+
out.write_text(_frontmatter(resolved_title, video.url, video.description, draft) + markdown, encoding="utf-8")
|
|
281
|
+
click.echo(f" Written: {out}")
|
|
282
|
+
return
|
|
283
|
+
|
|
284
|
+
all_urls = list(urls)
|
|
285
|
+
if url_file:
|
|
286
|
+
lines = url_file.read_text(encoding="utf-8").splitlines()
|
|
287
|
+
all_urls.extend(line.strip() for line in lines if line.strip() and not line.startswith("#"))
|
|
288
|
+
|
|
289
|
+
if not all_urls:
|
|
290
|
+
raise click.UsageError("No URLs provided. Pass URLs as arguments or use --url-file.")
|
|
291
|
+
|
|
292
|
+
# Expand playlists to individual video URLs
|
|
293
|
+
expanded: list[str] = []
|
|
294
|
+
for raw_url in all_urls:
|
|
295
|
+
try:
|
|
296
|
+
resolved = resolve_urls(raw_url)
|
|
297
|
+
if len(resolved) > 1:
|
|
298
|
+
click.echo(f"[yt-instruct] Playlist detected: {len(resolved)} videos in {raw_url}")
|
|
299
|
+
expanded.extend(resolved)
|
|
300
|
+
except Exception as e:
|
|
301
|
+
click.echo(f"[yt-instruct] WARNING: could not resolve {raw_url}: {e}", err=True)
|
|
302
|
+
expanded.append(raw_url) # try anyway
|
|
303
|
+
|
|
304
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
305
|
+
|
|
306
|
+
# Use a persistent temp dir so we can --keep audio if needed
|
|
307
|
+
tmp_dir = Path(tempfile.mkdtemp(prefix="yt-instruct-"))
|
|
308
|
+
results: list[tuple[str, str]] = [] # (title, markdown)
|
|
309
|
+
|
|
310
|
+
try:
|
|
311
|
+
for i, url in enumerate(expanded, 1):
|
|
312
|
+
prefix = f"[{i}/{len(expanded)}]" if len(expanded) > 1 else ""
|
|
313
|
+
click.echo(f"\n[yt-instruct]{prefix} Processing: {url}")
|
|
314
|
+
|
|
315
|
+
video = None
|
|
316
|
+
transcript = None
|
|
317
|
+
skip_download = False
|
|
318
|
+
skip_transcription = False
|
|
319
|
+
|
|
320
|
+
# Resume: fetch lightweight metadata, then check for existing output / cached transcript
|
|
321
|
+
if resume:
|
|
322
|
+
try:
|
|
323
|
+
meta = fetch_info(url)
|
|
324
|
+
except Exception as e:
|
|
325
|
+
click.echo(f" WARNING: could not fetch metadata for resume check: {e}", err=True)
|
|
326
|
+
meta = None
|
|
327
|
+
|
|
328
|
+
if meta:
|
|
329
|
+
out = output_path(output_dir, meta.title)
|
|
330
|
+
if out.exists():
|
|
331
|
+
click.echo(f" [resume] Already done, skipping: {out.name}")
|
|
332
|
+
continue
|
|
333
|
+
|
|
334
|
+
cached_transcript = _transcript_cache_path(output_dir, meta.title)
|
|
335
|
+
if cached_transcript.exists():
|
|
336
|
+
click.echo(f" [resume] Found cached transcript, skipping download and transcription.")
|
|
337
|
+
transcript = cached_transcript.read_text(encoding="utf-8").strip()
|
|
338
|
+
video = meta
|
|
339
|
+
skip_download = True
|
|
340
|
+
skip_transcription = True
|
|
341
|
+
|
|
342
|
+
# Step 1: Download
|
|
343
|
+
if not skip_download:
|
|
344
|
+
click.echo(" Downloading audio...")
|
|
345
|
+
try:
|
|
346
|
+
video = download_audio(url, tmp_dir, audio_format)
|
|
347
|
+
except Exception as e:
|
|
348
|
+
click.echo(f" ERROR downloading {url}: {e}", err=True)
|
|
349
|
+
if len(expanded) > 1 and i < len(expanded):
|
|
350
|
+
if not click.confirm(" Continue with remaining videos?", default=True):
|
|
351
|
+
break
|
|
352
|
+
continue
|
|
353
|
+
click.echo(f" Downloaded: {video.title!r} ({video.channel})")
|
|
354
|
+
|
|
355
|
+
# Step 2: Transcribe
|
|
356
|
+
if not skip_transcription:
|
|
357
|
+
click.echo(" Transcribing...")
|
|
358
|
+
try:
|
|
359
|
+
transcript = transcribe(video.audio_path, mistral_model)
|
|
360
|
+
except Exception as e:
|
|
361
|
+
click.echo(f" ERROR transcribing {url}: {e}", err=True)
|
|
362
|
+
continue
|
|
363
|
+
click.echo(f" Transcript: {len(transcript)} chars")
|
|
364
|
+
if keep:
|
|
365
|
+
_transcript_cache_path(output_dir, video.title).write_text(transcript, encoding="utf-8")
|
|
366
|
+
|
|
367
|
+
# Step 3: Generate
|
|
368
|
+
lang_note = f", language={language}" if language else ""
|
|
369
|
+
click.echo(f" Generating ({content_type}, backend={backend}{lang_note})...")
|
|
370
|
+
try:
|
|
371
|
+
markdown = generate(
|
|
372
|
+
video=video,
|
|
373
|
+
transcript=transcript,
|
|
374
|
+
content_type=content_type,
|
|
375
|
+
backend=backend,
|
|
376
|
+
model=model,
|
|
377
|
+
prompt_file=prompt_file,
|
|
378
|
+
language=language,
|
|
379
|
+
)
|
|
380
|
+
except Exception as e:
|
|
381
|
+
click.echo(f" ERROR generating for {url}: {e}", err=True)
|
|
382
|
+
continue
|
|
383
|
+
|
|
384
|
+
results.append((video.title, markdown))
|
|
385
|
+
|
|
386
|
+
if not merge:
|
|
387
|
+
out = output_path(output_dir, video.title)
|
|
388
|
+
out.write_text(_frontmatter(video.title, video.url, video.description, draft) + markdown, encoding="utf-8")
|
|
389
|
+
click.echo(f" Written: {out}")
|
|
390
|
+
|
|
391
|
+
finally:
|
|
392
|
+
import shutil
|
|
393
|
+
if keep:
|
|
394
|
+
# Move remaining intermediate audio files into output_dir
|
|
395
|
+
moved = []
|
|
396
|
+
for f in tmp_dir.iterdir():
|
|
397
|
+
dest = output_dir / f.name
|
|
398
|
+
shutil.move(str(f), dest)
|
|
399
|
+
moved.append(dest.name)
|
|
400
|
+
if moved:
|
|
401
|
+
click.echo(f"\nIntermediate audio files moved to {output_dir}/: {', '.join(moved)}")
|
|
402
|
+
shutil.rmtree(tmp_dir, ignore_errors=True)
|
|
403
|
+
|
|
404
|
+
if not results:
|
|
405
|
+
click.echo("\nNo documents generated.", err=True)
|
|
406
|
+
sys.exit(1)
|
|
407
|
+
|
|
408
|
+
# Merge mode: combine all into one file
|
|
409
|
+
if merge:
|
|
410
|
+
combined = "\n\n---\n\n".join(md for _, md in results)
|
|
411
|
+
merged_path = output_dir / "merged_instructions.md"
|
|
412
|
+
merged_path.write_text(combined, encoding="utf-8") # no frontmatter for merged docs
|
|
413
|
+
click.echo(f"\nMerged document written: {merged_path}")
|
|
414
|
+
else:
|
|
415
|
+
click.echo(f"\nDone. {len(results)} document(s) written to {output_dir}/")
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Audio downloader using yt-dlp."""
|
|
2
|
+
|
|
3
|
+
import shutil
|
|
4
|
+
import subprocess
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from yt_dlp import YoutubeDL
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class VideoInfo:
|
|
13
|
+
title: str
|
|
14
|
+
channel: str
|
|
15
|
+
url: str
|
|
16
|
+
duration: int | None
|
|
17
|
+
audio_path: Path | None
|
|
18
|
+
description: str = ""
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _check_ffmpeg() -> None:
|
|
22
|
+
"""Raise a clear error if ffmpeg is not available."""
|
|
23
|
+
if shutil.which("ffmpeg") is None:
|
|
24
|
+
raise RuntimeError(
|
|
25
|
+
"ffmpeg is required but not found. "
|
|
26
|
+
"Install it with: brew install ffmpeg (macOS) or apt install ffmpeg (Linux)"
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def fetch_info(url: str) -> "VideoInfo":
|
|
31
|
+
"""Fetch video metadata without downloading audio (lightweight network call)."""
|
|
32
|
+
opts = {"quiet": True, "no_warnings": True}
|
|
33
|
+
with YoutubeDL(opts) as ydl:
|
|
34
|
+
info = ydl.extract_info(url, download=False)
|
|
35
|
+
return VideoInfo(
|
|
36
|
+
title=info.get("title", url),
|
|
37
|
+
channel=info.get("uploader") or info.get("channel", "Unknown"),
|
|
38
|
+
url=url,
|
|
39
|
+
duration=info.get("duration"),
|
|
40
|
+
audio_path=None,
|
|
41
|
+
description=info.get("description") or "",
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def resolve_urls(url: str) -> list[str]:
|
|
46
|
+
"""Expand a URL to a list of video URLs. Handles playlists and single videos."""
|
|
47
|
+
opts = {
|
|
48
|
+
"extract_flat": "in_playlist",
|
|
49
|
+
"quiet": True,
|
|
50
|
+
"no_warnings": True,
|
|
51
|
+
}
|
|
52
|
+
with YoutubeDL(opts) as ydl:
|
|
53
|
+
info = ydl.extract_info(url, download=False)
|
|
54
|
+
|
|
55
|
+
if info.get("_type") == "playlist":
|
|
56
|
+
entries = info.get("entries") or []
|
|
57
|
+
urls = []
|
|
58
|
+
for entry in entries:
|
|
59
|
+
if entry and entry.get("url"):
|
|
60
|
+
urls.append(entry["url"])
|
|
61
|
+
elif entry and entry.get("id"):
|
|
62
|
+
urls.append(f"https://www.youtube.com/watch?v={entry['id']}")
|
|
63
|
+
return urls
|
|
64
|
+
|
|
65
|
+
return [url]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def download_audio(url: str, tmp_dir: Path, audio_format: str = "mp3") -> VideoInfo:
|
|
69
|
+
"""Download audio from a YouTube URL. Returns VideoInfo with audio_path set."""
|
|
70
|
+
_check_ffmpeg()
|
|
71
|
+
|
|
72
|
+
opts = {
|
|
73
|
+
"format": "bestaudio/best",
|
|
74
|
+
"postprocessors": [
|
|
75
|
+
{
|
|
76
|
+
"key": "FFmpegExtractAudio",
|
|
77
|
+
"preferredcodec": audio_format,
|
|
78
|
+
}
|
|
79
|
+
],
|
|
80
|
+
"outtmpl": str(tmp_dir / "%(id)s.%(ext)s"),
|
|
81
|
+
"quiet": True,
|
|
82
|
+
"no_warnings": True,
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
with YoutubeDL(opts) as ydl:
|
|
86
|
+
info = ydl.extract_info(url, download=True)
|
|
87
|
+
|
|
88
|
+
video_id = info["id"]
|
|
89
|
+
audio_path = tmp_dir / f"{video_id}.{audio_format}"
|
|
90
|
+
|
|
91
|
+
if not audio_path.exists():
|
|
92
|
+
# yt-dlp may have used a different extension
|
|
93
|
+
candidates = list(tmp_dir.glob(f"{video_id}.*"))
|
|
94
|
+
if not candidates:
|
|
95
|
+
raise FileNotFoundError(f"Downloaded audio not found in {tmp_dir}")
|
|
96
|
+
audio_path = candidates[0]
|
|
97
|
+
|
|
98
|
+
return VideoInfo(
|
|
99
|
+
title=info.get("title", video_id),
|
|
100
|
+
channel=info.get("uploader") or info.get("channel", "Unknown"),
|
|
101
|
+
url=url,
|
|
102
|
+
duration=info.get("duration"),
|
|
103
|
+
audio_path=audio_path,
|
|
104
|
+
description=info.get("description") or "",
|
|
105
|
+
)
|
yt_instruct/generator.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
"""Instruction document generator using Claude (Anthropic SDK or llm library) or NVIDIA NIM."""
|
|
2
|
+
|
|
3
|
+
import importlib.resources
|
|
4
|
+
import os
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from .downloader import VideoInfo
|
|
8
|
+
from .utils import format_duration
|
|
9
|
+
|
|
10
|
+
BUILTIN_PROMPTS_PACKAGE = "yt_instruct.prompts"
|
|
11
|
+
|
|
12
|
+
AUTO_CLASSIFY_PROMPT = """You are classifying a YouTube video. Based on the title and channel below,
|
|
13
|
+
respond with exactly one word — the content type that best fits:
|
|
14
|
+
- tutorial (how-to, step-by-step, practical skill)
|
|
15
|
+
- lecture (tech talk, academic, educational presentation)
|
|
16
|
+
- ib (IB student subject: maths, sciences, humanities, etc.)
|
|
17
|
+
|
|
18
|
+
Title: {title}
|
|
19
|
+
Channel: {channel}
|
|
20
|
+
|
|
21
|
+
Respond with one word only."""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _load_builtin_prompt(content_type: str) -> str:
|
|
25
|
+
"""Load a built-in prompt template from the prompts package."""
|
|
26
|
+
try:
|
|
27
|
+
ref = importlib.resources.files(BUILTIN_PROMPTS_PACKAGE).joinpath(
|
|
28
|
+
f"{content_type}.md"
|
|
29
|
+
)
|
|
30
|
+
return ref.read_text(encoding="utf-8")
|
|
31
|
+
except (FileNotFoundError, TypeError):
|
|
32
|
+
# Fall back to default
|
|
33
|
+
ref = importlib.resources.files(BUILTIN_PROMPTS_PACKAGE).joinpath("default.md")
|
|
34
|
+
return ref.read_text(encoding="utf-8")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _load_prompt(content_type: str, prompt_file: Path | None) -> str:
|
|
38
|
+
"""Load prompt template from user file or built-in resource."""
|
|
39
|
+
if prompt_file:
|
|
40
|
+
return Path(prompt_file).read_text(encoding="utf-8")
|
|
41
|
+
return _load_builtin_prompt(content_type)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _build_messages(
|
|
45
|
+
video: VideoInfo, transcript: str, content_type: str, system_prompt: str,
|
|
46
|
+
language: str | None = None,
|
|
47
|
+
) -> tuple[str, str]:
|
|
48
|
+
"""Return (system, user) message strings."""
|
|
49
|
+
system = system_prompt.format(
|
|
50
|
+
title=video.title,
|
|
51
|
+
channel=video.channel,
|
|
52
|
+
content_type=content_type,
|
|
53
|
+
duration=format_duration(video.duration),
|
|
54
|
+
)
|
|
55
|
+
if language:
|
|
56
|
+
system += f"\n\nWrite the output document in {language}."
|
|
57
|
+
user = (
|
|
58
|
+
f"Video: {video.title}\n"
|
|
59
|
+
f"Channel: {video.channel}\n"
|
|
60
|
+
f"URL: {video.url}\n"
|
|
61
|
+
f"Duration: {format_duration(video.duration)}\n\n"
|
|
62
|
+
f"Transcript:\n{transcript}"
|
|
63
|
+
)
|
|
64
|
+
return system, user
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _detect_content_type_anthropic(
|
|
68
|
+
video: VideoInfo, model: str, client
|
|
69
|
+
) -> str:
|
|
70
|
+
prompt = AUTO_CLASSIFY_PROMPT.format(title=video.title, channel=video.channel)
|
|
71
|
+
msg = client.messages.create(
|
|
72
|
+
model=model,
|
|
73
|
+
max_tokens=10,
|
|
74
|
+
messages=[{"role": "user", "content": prompt}],
|
|
75
|
+
)
|
|
76
|
+
result = msg.content[0].text.strip().lower()
|
|
77
|
+
return result if result in ("adhd", "tutorial", "lecture", "ib") else "tutorial"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _detect_content_type_llm(video: VideoInfo, model_name: str) -> str:
|
|
81
|
+
import llm
|
|
82
|
+
|
|
83
|
+
prompt = AUTO_CLASSIFY_PROMPT.format(title=video.title, channel=video.channel)
|
|
84
|
+
model_obj = llm.get_model(model_name)
|
|
85
|
+
result = model_obj.prompt(prompt).text().strip().lower()
|
|
86
|
+
return result if result in ("adhd", "tutorial", "lecture", "ib") else "tutorial"
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def generate_anthropic(
|
|
90
|
+
video: VideoInfo,
|
|
91
|
+
transcript: str,
|
|
92
|
+
content_type: str,
|
|
93
|
+
model: str,
|
|
94
|
+
prompt_file: Path | None,
|
|
95
|
+
language: str | None = None,
|
|
96
|
+
max_tokens: int = 4096,
|
|
97
|
+
) -> str:
|
|
98
|
+
"""Generate instruction document using Anthropic SDK."""
|
|
99
|
+
import anthropic
|
|
100
|
+
|
|
101
|
+
api_key = os.environ.get("ANTHROPIC_API_KEY")
|
|
102
|
+
if not api_key:
|
|
103
|
+
raise RuntimeError(
|
|
104
|
+
"ANTHROPIC_API_KEY environment variable is not set."
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
client = anthropic.Anthropic(api_key=api_key)
|
|
108
|
+
|
|
109
|
+
# Auto-detect content type if requested
|
|
110
|
+
resolved_type = content_type
|
|
111
|
+
if content_type == "auto":
|
|
112
|
+
resolved_type = _detect_content_type_anthropic(video, model, client)
|
|
113
|
+
|
|
114
|
+
system_prompt = _load_prompt(resolved_type, prompt_file)
|
|
115
|
+
system, user = _build_messages(video, transcript, resolved_type, system_prompt, language)
|
|
116
|
+
|
|
117
|
+
msg = client.messages.create(
|
|
118
|
+
model=model,
|
|
119
|
+
max_tokens=max_tokens,
|
|
120
|
+
system=system,
|
|
121
|
+
messages=[{"role": "user", "content": user}],
|
|
122
|
+
)
|
|
123
|
+
return msg.content[0].text
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def generate_llm(
|
|
127
|
+
video: VideoInfo,
|
|
128
|
+
transcript: str,
|
|
129
|
+
content_type: str,
|
|
130
|
+
model: str,
|
|
131
|
+
prompt_file: Path | None,
|
|
132
|
+
language: str | None = None,
|
|
133
|
+
) -> str:
|
|
134
|
+
"""Generate instruction document using simonw/llm library."""
|
|
135
|
+
import llm
|
|
136
|
+
|
|
137
|
+
resolved_type = content_type
|
|
138
|
+
if content_type == "auto":
|
|
139
|
+
resolved_type = _detect_content_type_llm(video, model)
|
|
140
|
+
|
|
141
|
+
system_prompt = _load_prompt(resolved_type, prompt_file)
|
|
142
|
+
system, user = _build_messages(video, transcript, resolved_type, system_prompt, language)
|
|
143
|
+
|
|
144
|
+
model_obj = llm.get_model(model)
|
|
145
|
+
return model_obj.prompt(user, system=system).text()
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _detect_content_type_nvidia(video: VideoInfo, model: str, client) -> str:
|
|
149
|
+
prompt = AUTO_CLASSIFY_PROMPT.format(title=video.title, channel=video.channel)
|
|
150
|
+
completion = client.chat.completions.create(
|
|
151
|
+
model=model,
|
|
152
|
+
messages=[{"role": "user", "content": prompt}],
|
|
153
|
+
max_tokens=10,
|
|
154
|
+
temperature=0.0,
|
|
155
|
+
)
|
|
156
|
+
result = completion.choices[0].message.content.strip().lower()
|
|
157
|
+
return result if result in ("adhd", "tutorial", "lecture", "ib") else "tutorial"
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def generate_nvidia(
|
|
161
|
+
video: VideoInfo,
|
|
162
|
+
transcript: str,
|
|
163
|
+
content_type: str,
|
|
164
|
+
model: str,
|
|
165
|
+
prompt_file: Path | None,
|
|
166
|
+
language: str | None = None,
|
|
167
|
+
max_tokens: int = 4096,
|
|
168
|
+
) -> str:
|
|
169
|
+
"""Generate instruction document using NVIDIA NIM (OpenAI-compatible API)."""
|
|
170
|
+
from openai import OpenAI
|
|
171
|
+
|
|
172
|
+
api_key = os.environ.get("NVIDIA_API_KEY")
|
|
173
|
+
if not api_key:
|
|
174
|
+
raise RuntimeError("NVIDIA_API_KEY environment variable is not set.")
|
|
175
|
+
|
|
176
|
+
client = OpenAI(
|
|
177
|
+
base_url="https://integrate.api.nvidia.com/v1",
|
|
178
|
+
api_key=api_key,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
resolved_type = content_type
|
|
182
|
+
if content_type == "auto":
|
|
183
|
+
resolved_type = _detect_content_type_nvidia(video, model, client)
|
|
184
|
+
|
|
185
|
+
system_prompt = _load_prompt(resolved_type, prompt_file)
|
|
186
|
+
system, user = _build_messages(video, transcript, resolved_type, system_prompt, language)
|
|
187
|
+
|
|
188
|
+
chunks = []
|
|
189
|
+
completion = client.chat.completions.create(
|
|
190
|
+
model=model,
|
|
191
|
+
messages=[
|
|
192
|
+
{"role": "system", "content": system},
|
|
193
|
+
{"role": "user", "content": user},
|
|
194
|
+
],
|
|
195
|
+
temperature=0.6,
|
|
196
|
+
top_p=0.9,
|
|
197
|
+
max_tokens=max_tokens,
|
|
198
|
+
stream=True,
|
|
199
|
+
)
|
|
200
|
+
for chunk in completion:
|
|
201
|
+
if not getattr(chunk, "choices", None):
|
|
202
|
+
continue
|
|
203
|
+
delta = chunk.choices[0].delta.content
|
|
204
|
+
if delta is not None:
|
|
205
|
+
chunks.append(delta)
|
|
206
|
+
return "".join(chunks)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def generate(
|
|
210
|
+
video: VideoInfo,
|
|
211
|
+
transcript: str,
|
|
212
|
+
content_type: str,
|
|
213
|
+
backend: str,
|
|
214
|
+
model: str,
|
|
215
|
+
prompt_file: Path | None,
|
|
216
|
+
language: str | None = None,
|
|
217
|
+
) -> str:
|
|
218
|
+
"""Dispatch to the appropriate backend."""
|
|
219
|
+
if backend == "anthropic":
|
|
220
|
+
return generate_anthropic(video, transcript, content_type, model, prompt_file, language)
|
|
221
|
+
elif backend == "llm":
|
|
222
|
+
return generate_llm(video, transcript, content_type, model, prompt_file, language)
|
|
223
|
+
elif backend == "nvidia":
|
|
224
|
+
return generate_nvidia(video, transcript, content_type, model, prompt_file, language)
|
|
225
|
+
else:
|
|
226
|
+
raise ValueError(f"Unknown backend: {backend!r}. Choose 'anthropic', 'llm', or 'nvidia'.")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# This package contains built-in prompt templates.
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
You are an expert technical writer specialising in academic and conference content. Given the transcript of a tech talk or lecture, produce a structured study document.
|
|
2
|
+
|
|
3
|
+
Video details:
|
|
4
|
+
- Title: {title}
|
|
5
|
+
- Channel: {channel}
|
|
6
|
+
- Duration: {duration}
|
|
7
|
+
|
|
8
|
+
Produce the following sections in order:
|
|
9
|
+
|
|
10
|
+
## Overview
|
|
11
|
+
The central thesis or topic of the talk and why it matters. Include the speaker's name and affiliation if mentioned.
|
|
12
|
+
Include a link to the source youtube video.
|
|
13
|
+
|
|
14
|
+
## Prerequisites
|
|
15
|
+
Background knowledge, concepts, or tools a reader should understand before engaging with this material.
|
|
16
|
+
|
|
17
|
+
## Main Points
|
|
18
|
+
For each major section or argument in the talk:
|
|
19
|
+
- A short heading summarising the point
|
|
20
|
+
- 2–5 bullet points capturing the key ideas, data, or conclusions
|
|
21
|
+
- Code blocks or diagrams described textually where relevant
|
|
22
|
+
|
|
23
|
+
## Key Concepts
|
|
24
|
+
Bullet list of terms, frameworks, or tools introduced. One-sentence definition each.
|
|
25
|
+
|
|
26
|
+
## Summary
|
|
27
|
+
One paragraph: the overall argument, findings, or message the speaker wanted to leave the audience with.
|
|
28
|
+
|
|
29
|
+
Write in clear, objective prose. Preserve the logical structure of the talk. Do not editorialize beyond what the speaker said.
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
You are an expert technical writer. Given a YouTube video transcript, produce a clear, structured markdown instruction document.
|
|
2
|
+
|
|
3
|
+
Video details:
|
|
4
|
+
- Title: {title}
|
|
5
|
+
- Channel: {channel}
|
|
6
|
+
- Content type: {content_type}
|
|
7
|
+
- Duration: {duration}
|
|
8
|
+
|
|
9
|
+
Produce the following sections in order:
|
|
10
|
+
|
|
11
|
+
## Overview
|
|
12
|
+
What this video is about and what the viewer will learn or be able to do after watching.
|
|
13
|
+
Include a link to the source youtube video.
|
|
14
|
+
|
|
15
|
+
## Prerequisites
|
|
16
|
+
What the reader needs to know, have installed, or have ready before starting.
|
|
17
|
+
|
|
18
|
+
## Step-by-step Instructions
|
|
19
|
+
Numbered, clear, actionable steps. Use sub-steps where needed. Include code blocks for any commands or code shown.
|
|
20
|
+
|
|
21
|
+
## Key Concepts
|
|
22
|
+
Bullet list of important terms, tools, or ideas introduced. Give a one-sentence explanation for each.
|
|
23
|
+
|
|
24
|
+
## Summary
|
|
25
|
+
A single paragraph recapping what was covered and the main takeaway.
|
|
26
|
+
|
|
27
|
+
Write in clear, direct prose. Use markdown formatting (headers, bullets, numbered lists, code fences). Do not invent steps not present in the transcript.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
You are an experienced IB teacher and study guide author. Given the transcript of an IB student video, produce a structured revision document aligned with IB assessment expectations.
|
|
2
|
+
|
|
3
|
+
Video details:
|
|
4
|
+
- Title: {title}
|
|
5
|
+
- Channel: {channel}
|
|
6
|
+
- Duration: {duration}
|
|
7
|
+
|
|
8
|
+
Produce the following sections in order:
|
|
9
|
+
|
|
10
|
+
## Overview
|
|
11
|
+
The subject, topic, and IB syllabus point(s) this video addresses. State the IB course and level (SL/HL) if identifiable.
|
|
12
|
+
|
|
13
|
+
## Prior Knowledge Required
|
|
14
|
+
Concepts and vocabulary the student should already know before studying this topic.
|
|
15
|
+
|
|
16
|
+
## Core Content
|
|
17
|
+
Numbered steps or structured explanation of the topic, as presented in the video:
|
|
18
|
+
- Include definitions, formulas, or worked examples in code/math blocks where appropriate
|
|
19
|
+
- Flag IB command terms used (e.g., "Evaluate", "Discuss", "Calculate") and what they require
|
|
20
|
+
|
|
21
|
+
## Key Vocabulary
|
|
22
|
+
Bullet list of subject-specific terms introduced. IB-accurate definitions.
|
|
23
|
+
|
|
24
|
+
## Exam Tips
|
|
25
|
+
Any advice given in the video about exam technique, common mistakes, or mark-scheme expectations.
|
|
26
|
+
|
|
27
|
+
## Summary
|
|
28
|
+
One paragraph recap of the key learning for this topic and how it fits into the broader IB syllabus.
|
|
29
|
+
|
|
30
|
+
Write at a level appropriate for an IB student. Be precise and accurate. Do not invent content not present in the transcript.
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
You are an experienced IB teacher and study guide author. Given the transcript of an IB student video, produce a structured revision document aligned with IB assessment expectations.
|
|
2
|
+
|
|
3
|
+
Video details:
|
|
4
|
+
- Title: {title}
|
|
5
|
+
- Channel: {channel}
|
|
6
|
+
- Duration: {duration}
|
|
7
|
+
|
|
8
|
+
Produce the following sections in order:
|
|
9
|
+
|
|
10
|
+
## Overview
|
|
11
|
+
The subject, topic, and IB syllabus point(s) this video addresses. State the IB course and level (SL/HL) if identifiable.
|
|
12
|
+
Include a link to the source youtube video.
|
|
13
|
+
|
|
14
|
+
## Prior Knowledge Required
|
|
15
|
+
Concepts and vocabulary the student should already know before studying this topic.
|
|
16
|
+
|
|
17
|
+
## Step-by-step Instructions
|
|
18
|
+
Numbered steps to accomplish a similar task as demonstrated in the video. For each step:
|
|
19
|
+
- State clearly what to do
|
|
20
|
+
- Include instructions and any IB command terms.
|
|
21
|
+
- Note any common pitfalls or "watch out" moments mentioned
|
|
22
|
+
|
|
23
|
+
## Core Content
|
|
24
|
+
Numbered steps or structured explanation of the topic, as presented in the video:
|
|
25
|
+
- Include definitions, formulas, or worked examples in code/math blocks where appropriate
|
|
26
|
+
- Flag IB command terms used (e.g., "Evaluate", "Discuss", "Calculate") and what they require
|
|
27
|
+
|
|
28
|
+
## Key Vocabulary
|
|
29
|
+
Bullet list of subject-specific terms introduced. IB-accurate definitions.
|
|
30
|
+
|
|
31
|
+
## Exam Tips
|
|
32
|
+
Any advice given in the video about exam technique, common mistakes, or mark-scheme expectations.
|
|
33
|
+
|
|
34
|
+
## Summary
|
|
35
|
+
One paragraph recap of the key learning for this topic and how it fits into the broader IB syllabus.
|
|
36
|
+
|
|
37
|
+
Write at a level appropriate for an IB student. Be precise and accurate. Do not invent content not present in the transcript.
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
You are an expert technical writer specialising in academic and conference content. Given the transcript of a tech talk or lecture, produce a structured study document.
|
|
2
|
+
|
|
3
|
+
Video details:
|
|
4
|
+
- Title: {title}
|
|
5
|
+
- Channel: {channel}
|
|
6
|
+
- Duration: {duration}
|
|
7
|
+
|
|
8
|
+
Produce the following sections in order:
|
|
9
|
+
|
|
10
|
+
## Overview
|
|
11
|
+
The central thesis or topic of the talk and why it matters. Include the speaker's name and affiliation if mentioned.
|
|
12
|
+
Include a link to the source youtube video.
|
|
13
|
+
|
|
14
|
+
## Prerequisites
|
|
15
|
+
Background knowledge, concepts, or tools a reader should understand before engaging with this material.
|
|
16
|
+
|
|
17
|
+
## Main Points
|
|
18
|
+
For each major section or argument in the talk:
|
|
19
|
+
- A short heading summarising the point
|
|
20
|
+
- 2–5 bullet points capturing the key ideas, data, or conclusions
|
|
21
|
+
- Code blocks or diagrams described textually where relevant
|
|
22
|
+
|
|
23
|
+
## Key Concepts
|
|
24
|
+
Bullet list of terms, frameworks, or tools introduced. One-sentence definition each.
|
|
25
|
+
|
|
26
|
+
## Summary
|
|
27
|
+
One paragraph: the overall argument, findings, or message the speaker wanted to leave the audience with.
|
|
28
|
+
|
|
29
|
+
Write in clear, objective prose. Preserve the logical structure of the talk. Do not editorialize beyond what the speaker said.
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
You are an expert technical writer specialising in how-to guides and tutorials. Given a YouTube tutorial transcript, produce a precise, hands-on instruction document a reader can follow without watching the video.
|
|
2
|
+
|
|
3
|
+
Video details:
|
|
4
|
+
- Title: {title}
|
|
5
|
+
- Channel: {channel}
|
|
6
|
+
- Duration: {duration}
|
|
7
|
+
|
|
8
|
+
Produce the following sections in order:
|
|
9
|
+
|
|
10
|
+
## Overview
|
|
11
|
+
What task or skill this tutorial teaches and what the finished result looks like.
|
|
12
|
+
Include a link to the source youtube video.
|
|
13
|
+
|
|
14
|
+
## Prerequisites
|
|
15
|
+
Tools, software, accounts, or prior knowledge required. Be specific about versions where mentioned.
|
|
16
|
+
|
|
17
|
+
## Step-by-step Instructions
|
|
18
|
+
Numbered steps in the exact order demonstrated. For each step:
|
|
19
|
+
- State clearly what to do
|
|
20
|
+
- Include code blocks (with language tag) for any commands, config, or code
|
|
21
|
+
- Note any common pitfalls or "watch out" moments mentioned
|
|
22
|
+
|
|
23
|
+
## Key Concepts
|
|
24
|
+
Bullet list of important terms, tools, or techniques introduced. One sentence each.
|
|
25
|
+
|
|
26
|
+
## Summary
|
|
27
|
+
One paragraph: what was built/accomplished and what the reader should now be able to do independently.
|
|
28
|
+
|
|
29
|
+
Write in imperative voice ("Click...", "Run...", "Open..."). Do not add steps not present in the transcript.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Audio transcription using Mistral's voxtral API."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from mistralai.client import Mistral
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def transcribe(audio_path: Path, model: str = "voxtral-mini-latest") -> str:
|
|
10
|
+
"""Transcribe audio file using Mistral. Returns transcript as plain text."""
|
|
11
|
+
api_key = os.environ.get("MISTRAL_API_KEY")
|
|
12
|
+
if not api_key:
|
|
13
|
+
raise RuntimeError(
|
|
14
|
+
"MISTRAL_API_KEY environment variable is not set. "
|
|
15
|
+
"Get your key at https://console.mistral.ai/"
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
client = Mistral(api_key=api_key)
|
|
19
|
+
|
|
20
|
+
with open(audio_path, "rb") as f:
|
|
21
|
+
result = client.audio.transcriptions.complete(
|
|
22
|
+
model=model,
|
|
23
|
+
file={"file_name": audio_path.name, "content": f},
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
transcript = result.text
|
|
27
|
+
if not transcript or not transcript.strip():
|
|
28
|
+
raise ValueError(f"Empty transcript returned for {audio_path.name}")
|
|
29
|
+
|
|
30
|
+
return transcript.strip()
|
yt_instruct/utils.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Utility helpers: slugify, path helpers, metadata formatting."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
import unicodedata
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def slugify(text: str, max_length: int = 80) -> str:
|
|
9
|
+
"""Convert text to a filesystem-safe slug."""
|
|
10
|
+
text = unicodedata.normalize("NFKD", text)
|
|
11
|
+
text = text.encode("ascii", "ignore").decode("ascii")
|
|
12
|
+
text = text.lower()
|
|
13
|
+
text = re.sub(r"[^\w\s-]", "", text)
|
|
14
|
+
text = re.sub(r"[\s_-]+", "-", text)
|
|
15
|
+
text = text.strip("-")
|
|
16
|
+
return text[:max_length]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def output_path(output_dir: Path, title: str, suffix: str = "_instructions.md") -> Path:
|
|
20
|
+
"""Return the output file path for a given video title."""
|
|
21
|
+
slug = slugify(title)
|
|
22
|
+
return output_dir / f"{slug}{suffix}"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def format_duration(seconds: int) -> str:
|
|
26
|
+
"""Format duration in seconds to HH:MM:SS or MM:SS."""
|
|
27
|
+
if seconds is None:
|
|
28
|
+
return "unknown"
|
|
29
|
+
h = seconds // 3600
|
|
30
|
+
m = (seconds % 3600) // 60
|
|
31
|
+
s = seconds % 60
|
|
32
|
+
if h:
|
|
33
|
+
return f"{h}:{m:02d}:{s:02d}"
|
|
34
|
+
return f"{m}:{s:02d}"
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: yt-instruct
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Convert YouTube videos into structured markdown instruction documents
|
|
5
|
+
License: MIT
|
|
6
|
+
Keywords: youtube,transcription,llm,instructions,mistral
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
Requires-Dist: click>=8.1
|
|
13
|
+
Requires-Dist: yt-dlp>=2024.1
|
|
14
|
+
Requires-Dist: mistralai>=1.0
|
|
15
|
+
Requires-Dist: anthropic>=0.40
|
|
16
|
+
Requires-Dist: openai>=1.0
|
|
17
|
+
Requires-Dist: llm>=0.17
|
|
18
|
+
Requires-Dist: llm-anthropic>=0.12
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
21
|
+
Requires-Dist: pytest-mock>=3.0; extra == "dev"
|
|
22
|
+
|
|
23
|
+
# yt-instruct
|
|
24
|
+
|
|
25
|
+
Convert YouTube videos into structured markdown instruction documents.
|
|
26
|
+
|
|
27
|
+
Downloads audio via yt-dlp, transcribes with Mistral's voxtral API, then generates a clean how-to document using Claude.
|
|
28
|
+
|
|
29
|
+
## Quick Start
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
# Run with uvx (no install needed)
|
|
33
|
+
uvx --from . yt-instruct https://www.youtube.com/watch?v=<id>
|
|
34
|
+
|
|
35
|
+
# Or install
|
|
36
|
+
pip install -e .
|
|
37
|
+
yt-instruct https://www.youtube.com/watch?v=<id>
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Requirements
|
|
41
|
+
|
|
42
|
+
- `ffmpeg` — `brew install ffmpeg` or `apt install ffmpeg`
|
|
43
|
+
- `MISTRAL_API_KEY` — [console.mistral.ai](https://console.mistral.ai/)
|
|
44
|
+
- `ANTHROPIC_API_KEY` — for default backend
|
|
45
|
+
- `NVIDIA_API_KEY` — only for `--backend nvidia`
|
|
46
|
+
|
|
47
|
+
## Usage
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
yt-instruct [OPTIONS] URL [URL...]
|
|
51
|
+
yt-instruct [OPTIONS] --url-file urls.txt
|
|
52
|
+
yt-instruct [OPTIONS] --transcript-file transcript.txt --title "Name"
|
|
53
|
+
yt-instruct [OPTIONS] --audio-file recording.mp3 --title "Name"
|
|
54
|
+
|
|
55
|
+
Options:
|
|
56
|
+
--output-dir PATH Output directory [default: .]
|
|
57
|
+
--keep Keep intermediate audio + transcript files
|
|
58
|
+
--merge Merge all videos into one document
|
|
59
|
+
--resume Skip already-generated outputs; reuse cached transcripts
|
|
60
|
+
--content-type [tutorial|lecture|ib|auto]
|
|
61
|
+
Prompt style [default: auto]
|
|
62
|
+
--backend [anthropic|llm|nvidia]
|
|
63
|
+
LLM backend [default: anthropic]
|
|
64
|
+
--model TEXT Model name [default: claude-sonnet-4-6]
|
|
65
|
+
--prompt-file PATH Custom system prompt (overrides built-in)
|
|
66
|
+
--language LANG Output language (e.g. 'French'). Defaults to English.
|
|
67
|
+
--transcript-file PATH Use existing transcript; skips download and transcription
|
|
68
|
+
--audio-file PATH Use existing audio file; skips download, transcribes directly
|
|
69
|
+
--title TEXT Video title for --transcript-file or --audio-file
|
|
70
|
+
--draft Set draft: true in the output frontmatter [default: false]
|
|
71
|
+
--mistral-model TEXT [default: voxtral-mini-latest]
|
|
72
|
+
--audio-format [mp3|m4a] [default: mp3]
|
|
73
|
+
--version Show version and exit
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Output Frontmatter
|
|
77
|
+
|
|
78
|
+
Every generated file includes YAML frontmatter:
|
|
79
|
+
|
|
80
|
+
```yaml
|
|
81
|
+
---
|
|
82
|
+
title: "Video Title"
|
|
83
|
+
url: https://youtu.be/...
|
|
84
|
+
description: "YouTube video description"
|
|
85
|
+
date: 2026-04-12
|
|
86
|
+
draft: false
|
|
87
|
+
---
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Use `--draft` to set `draft: true` (useful for Hugo, Jekyll, or similar static site generators).
|
|
91
|
+
Merged documents (`--merge`) do not include frontmatter.
|
|
92
|
+
|
|
93
|
+
## Content Types
|
|
94
|
+
|
|
95
|
+
| Type | Use for |
|
|
96
|
+
|------|---------|
|
|
97
|
+
| `auto` | Let the LLM detect (default) |
|
|
98
|
+
| `tutorial` | How-to / step-by-step videos |
|
|
99
|
+
| `lecture` | Tech talks, academic presentations |
|
|
100
|
+
| `ib` | IB student subject videos |
|
|
101
|
+
|
|
102
|
+
## Custom Prompts
|
|
103
|
+
|
|
104
|
+
Override the built-in prompt with your own file. Template variables:
|
|
105
|
+
`{title}`, `{channel}`, `{content_type}`, `{duration}`
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
yt-instruct <url> --prompt-file my_prompt.md
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## Using the `llm` backend
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
pip install llm llm-anthropic
|
|
115
|
+
llm keys set anthropic
|
|
116
|
+
yt-instruct <url> --backend llm --model claude-sonnet-4-6
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Using the `nvidia` backend
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
NVIDIA_API_KEY=... yt-instruct <url> --backend nvidia --model moonshotai/kimi-k2-instruct
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Batch Processing
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
# Multiple URLs
|
|
129
|
+
yt-instruct url1 url2 url3 --output-dir ./docs
|
|
130
|
+
|
|
131
|
+
# Playlist (automatically expanded)
|
|
132
|
+
yt-instruct https://www.youtube.com/playlist?list=<id> --output-dir ./docs
|
|
133
|
+
|
|
134
|
+
# From file
|
|
135
|
+
cat urls.txt | yt-instruct --url-file /dev/stdin
|
|
136
|
+
|
|
137
|
+
# Merge all into one doc
|
|
138
|
+
yt-instruct url1 url2 --merge --output-dir ./docs
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Skip Steps — Use Existing Files
|
|
142
|
+
|
|
143
|
+
`--audio-file` and `--transcript-file` resolve relative to `--output-dir` if the file isn't found at the given path. This lets you reference files already in the output directory without typing the full path:
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
# Start from an existing transcript (skips download + transcription)
|
|
147
|
+
yt-instruct --transcript-file transcript.txt --title "My Video" --output-dir ./docs
|
|
148
|
+
|
|
149
|
+
# File not found locally? Looked up in ./docs automatically
|
|
150
|
+
yt-instruct --transcript-file my_transcript.txt --output-dir ./docs
|
|
151
|
+
|
|
152
|
+
# Start from an existing audio file (skips download, still transcribes)
|
|
153
|
+
yt-instruct --audio-file recording.mp3 --output-dir ./docs
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## Resume an Interrupted Run
|
|
157
|
+
|
|
158
|
+
Use `--keep` to save transcripts alongside output files, then `--resume` to continue from where a previous run stopped:
|
|
159
|
+
|
|
160
|
+
```bash
|
|
161
|
+
# First run (interrupted partway through)
|
|
162
|
+
yt-instruct --url-file urls.txt --keep --output-dir ./docs
|
|
163
|
+
|
|
164
|
+
# Resume — skips videos with existing output; reuses cached transcripts
|
|
165
|
+
yt-instruct --url-file urls.txt --resume --output-dir ./docs
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
`--resume` checks at two levels per video:
|
|
169
|
+
1. Output `.md` already exists → skip entirely
|
|
170
|
+
2. Cached `*_transcript.txt` exists (saved by `--keep`) → skip download and transcription, regenerate only
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
yt_instruct/__init__.py,sha256=puGUXxk3GrYnjI7p-NMAxj2G7ak2PMN0kaR0VH2d6FU,113
|
|
2
|
+
yt_instruct/cli.py,sha256=2HpM9sSNAq6MHG8AV2l9kbv7ow97K5br7yuaAwFPMz0,15618
|
|
3
|
+
yt_instruct/downloader.py,sha256=ooKZH-oqCWwSd8sAZdrPTtcOYxPrn85Vy-nRXXSEPh8,3100
|
|
4
|
+
yt_instruct/generator.py,sha256=TL-2UwWbRQS1zp-FKSvjeA7gEG-lHnDY1K3FnwOkUCQ,7309
|
|
5
|
+
yt_instruct/transcriber.py,sha256=v6cYbsQUuqJ1zD4HZGtd_FvsNtszR_ksUUbTE-fIXHk,935
|
|
6
|
+
yt_instruct/utils.py,sha256=Rxi71MyxP31Su9oqdThc-WluIp4HoE_qZAXqefaXOiE,1018
|
|
7
|
+
yt_instruct/prompts/__init__.py,sha256=WnDEDYVY8aRBWOKOrshZb51w1Ly6j1wgLzvta3gDgIk,51
|
|
8
|
+
yt_instruct/prompts/adhd.md,sha256=xcTcJVFWba3v03JQsQ8zpbql2PtWurq4GqQovM8uVKY,1164
|
|
9
|
+
yt_instruct/prompts/default.md,sha256=Dk4H8TWLE3QLFk0WKY1YDvEKuR6UVCB0QQrJXECDknQ,1021
|
|
10
|
+
yt_instruct/prompts/ib copy.md,sha256=nOJPcDQEN_eYSMaEwp09zwJ6NuVywO7RP5Vn5jelHwk,1279
|
|
11
|
+
yt_instruct/prompts/ib.md,sha256=u7yTo56yhKatDXC88FI4XuOH2Md247HAZ8aG01kHnC8,1578
|
|
12
|
+
yt_instruct/prompts/lecture.md,sha256=xcTcJVFWba3v03JQsQ8zpbql2PtWurq4GqQovM8uVKY,1164
|
|
13
|
+
yt_instruct/prompts/tutorial.md,sha256=-ebKqa5UDbtfWMTraQmh7-aCtpBko69TnLKKl4hAKgE,1172
|
|
14
|
+
yt_instruct-1.0.0.dist-info/METADATA,sha256=sArVltXvahVBQpr6RBuiBb8dfyvL_i1kQvl1BDpQekQ,5613
|
|
15
|
+
yt_instruct-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
16
|
+
yt_instruct-1.0.0.dist-info/entry_points.txt,sha256=qPxnA0Z4pI31BtK6nHqkj0ht6plyOu0pdRQ1i0CIMl8,52
|
|
17
|
+
yt_instruct-1.0.0.dist-info/top_level.txt,sha256=hxhWCJlZPjBIQ16fG70OuZ8J1n2zTnWpRJqXTr02eGc,12
|
|
18
|
+
yt_instruct-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
yt_instruct
|