yt-instruct 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,170 @@
1
+ Metadata-Version: 2.4
2
+ Name: yt-instruct
3
+ Version: 1.0.0
4
+ Summary: Convert YouTube videos into structured markdown instruction documents
5
+ License: MIT
6
+ Keywords: youtube,transcription,llm,instructions,mistral
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Operating System :: OS Independent
10
+ Requires-Python: >=3.10
11
+ Description-Content-Type: text/markdown
12
+ Requires-Dist: click>=8.1
13
+ Requires-Dist: yt-dlp>=2024.1
14
+ Requires-Dist: mistralai>=1.0
15
+ Requires-Dist: anthropic>=0.40
16
+ Requires-Dist: openai>=1.0
17
+ Requires-Dist: llm>=0.17
18
+ Requires-Dist: llm-anthropic>=0.12
19
+ Provides-Extra: dev
20
+ Requires-Dist: pytest>=8.0; extra == "dev"
21
+ Requires-Dist: pytest-mock>=3.0; extra == "dev"
22
+
23
+ # yt-instruct
24
+
25
+ Convert YouTube videos into structured markdown instruction documents.
26
+
27
+ Downloads audio via yt-dlp, transcribes with Mistral's voxtral API, then generates a clean how-to document using Claude.
28
+
29
+ ## Quick Start
30
+
31
+ ```bash
32
+ # Run with uvx (no install needed)
33
+ uvx --from . yt-instruct https://www.youtube.com/watch?v=<id>
34
+
35
+ # Or install
36
+ pip install -e .
37
+ yt-instruct https://www.youtube.com/watch?v=<id>
38
+ ```
39
+
40
+ ## Requirements
41
+
42
+ - `ffmpeg` — `brew install ffmpeg` or `apt install ffmpeg`
43
+ - `MISTRAL_API_KEY` — [console.mistral.ai](https://console.mistral.ai/)
44
+ - `ANTHROPIC_API_KEY` — for default backend
45
+ - `NVIDIA_API_KEY` — only for `--backend nvidia`
46
+
47
+ ## Usage
48
+
49
+ ```
50
+ yt-instruct [OPTIONS] URL [URL...]
51
+ yt-instruct [OPTIONS] --url-file urls.txt
52
+ yt-instruct [OPTIONS] --transcript-file transcript.txt --title "Name"
53
+ yt-instruct [OPTIONS] --audio-file recording.mp3 --title "Name"
54
+
55
+ Options:
56
+ --output-dir PATH Output directory [default: .]
57
+ --keep Keep intermediate audio + transcript files
58
+ --merge Merge all videos into one document
59
+ --resume Skip already-generated outputs; reuse cached transcripts
60
+ --content-type [tutorial|lecture|ib|auto]
61
+ Prompt style [default: auto]
62
+ --backend [anthropic|llm|nvidia]
63
+ LLM backend [default: anthropic]
64
+ --model TEXT Model name [default: claude-sonnet-4-6]
65
+ --prompt-file PATH Custom system prompt (overrides built-in)
66
+ --language LANG Output language (e.g. 'French'). Defaults to English.
67
+ --transcript-file PATH Use existing transcript; skips download and transcription
68
+ --audio-file PATH Use existing audio file; skips download, transcribes directly
69
+ --title TEXT Video title for --transcript-file or --audio-file
70
+ --draft Set draft: true in the output frontmatter [default: false]
71
+ --mistral-model TEXT [default: voxtral-mini-latest]
72
+ --audio-format [mp3|m4a] [default: mp3]
73
+ --version Show version and exit
74
+ ```
75
+
76
+ ## Output Frontmatter
77
+
78
+ Every generated file includes YAML frontmatter:
79
+
80
+ ```yaml
81
+ ---
82
+ title: "Video Title"
83
+ url: https://youtu.be/...
84
+ description: "YouTube video description"
85
+ date: 2026-04-12
86
+ draft: false
87
+ ---
88
+ ```
89
+
90
+ Use `--draft` to set `draft: true` (useful for Hugo, Jekyll, or similar static site generators).
91
+ Merged documents (`--merge`) do not include frontmatter.
92
+
93
+ ## Content Types
94
+
95
+ | Type | Use for |
96
+ |------|---------|
97
+ | `auto` | Let the LLM detect (default) |
98
+ | `tutorial` | How-to / step-by-step videos |
99
+ | `lecture` | Tech talks, academic presentations |
100
+ | `ib` | IB student subject videos |
101
+
102
+ ## Custom Prompts
103
+
104
+ Override the built-in prompt with your own file. Template variables:
105
+ `{title}`, `{channel}`, `{content_type}`, `{duration}`
106
+
107
+ ```bash
108
+ yt-instruct <url> --prompt-file my_prompt.md
109
+ ```
110
+
111
+ ## Using the `llm` backend
112
+
113
+ ```bash
114
+ pip install llm llm-anthropic
115
+ llm keys set anthropic
116
+ yt-instruct <url> --backend llm --model claude-sonnet-4-6
117
+ ```
118
+
119
+ ## Using the `nvidia` backend
120
+
121
+ ```bash
122
+ NVIDIA_API_KEY=... yt-instruct <url> --backend nvidia --model moonshotai/kimi-k2-instruct
123
+ ```
124
+
125
+ ## Batch Processing
126
+
127
+ ```bash
128
+ # Multiple URLs
129
+ yt-instruct url1 url2 url3 --output-dir ./docs
130
+
131
+ # Playlist (automatically expanded)
132
+ yt-instruct https://www.youtube.com/playlist?list=<id> --output-dir ./docs
133
+
134
+ # From file
135
+ cat urls.txt | yt-instruct --url-file /dev/stdin
136
+
137
+ # Merge all into one doc
138
+ yt-instruct url1 url2 --merge --output-dir ./docs
139
+ ```
140
+
141
+ ## Skip Steps — Use Existing Files
142
+
143
+ `--audio-file` and `--transcript-file` resolve relative to `--output-dir` if the file isn't found at the given path. This lets you reference files already in the output directory without typing the full path:
144
+
145
+ ```bash
146
+ # Start from an existing transcript (skips download + transcription)
147
+ yt-instruct --transcript-file transcript.txt --title "My Video" --output-dir ./docs
148
+
149
+ # File not found locally? Looked up in ./docs automatically
150
+ yt-instruct --transcript-file my_transcript.txt --output-dir ./docs
151
+
152
+ # Start from an existing audio file (skips download, still transcribes)
153
+ yt-instruct --audio-file recording.mp3 --output-dir ./docs
154
+ ```
155
+
156
+ ## Resume an Interrupted Run
157
+
158
+ Use `--keep` to save transcripts alongside output files, then `--resume` to continue from where a previous run stopped:
159
+
160
+ ```bash
161
+ # First run (interrupted partway through)
162
+ yt-instruct --url-file urls.txt --keep --output-dir ./docs
163
+
164
+ # Resume — skips videos with existing output; reuses cached transcripts
165
+ yt-instruct --url-file urls.txt --resume --output-dir ./docs
166
+ ```
167
+
168
+ `--resume` checks at two levels per video:
169
+ 1. Output `.md` already exists → skip entirely
170
+ 2. Cached `*_transcript.txt` exists (saved by `--keep`) → skip download and transcription, regenerate only
@@ -0,0 +1,148 @@
1
+ # yt-instruct
2
+
3
+ Convert YouTube videos into structured markdown instruction documents.
4
+
5
+ Downloads audio via yt-dlp, transcribes with Mistral's voxtral API, then generates a clean how-to document using Claude.
6
+
7
+ ## Quick Start
8
+
9
+ ```bash
10
+ # Run with uvx (no install needed)
11
+ uvx --from . yt-instruct https://www.youtube.com/watch?v=<id>
12
+
13
+ # Or install
14
+ pip install -e .
15
+ yt-instruct https://www.youtube.com/watch?v=<id>
16
+ ```
17
+
18
+ ## Requirements
19
+
20
+ - `ffmpeg` — `brew install ffmpeg` or `apt install ffmpeg`
21
+ - `MISTRAL_API_KEY` — [console.mistral.ai](https://console.mistral.ai/)
22
+ - `ANTHROPIC_API_KEY` — for default backend
23
+ - `NVIDIA_API_KEY` — only for `--backend nvidia`
24
+
25
+ ## Usage
26
+
27
+ ```
28
+ yt-instruct [OPTIONS] URL [URL...]
29
+ yt-instruct [OPTIONS] --url-file urls.txt
30
+ yt-instruct [OPTIONS] --transcript-file transcript.txt --title "Name"
31
+ yt-instruct [OPTIONS] --audio-file recording.mp3 --title "Name"
32
+
33
+ Options:
34
+ --output-dir PATH Output directory [default: .]
35
+ --keep Keep intermediate audio + transcript files
36
+ --merge Merge all videos into one document
37
+ --resume Skip already-generated outputs; reuse cached transcripts
38
+ --content-type [tutorial|lecture|ib|auto]
39
+ Prompt style [default: auto]
40
+ --backend [anthropic|llm|nvidia]
41
+ LLM backend [default: anthropic]
42
+ --model TEXT Model name [default: claude-sonnet-4-6]
43
+ --prompt-file PATH Custom system prompt (overrides built-in)
44
+ --language LANG Output language (e.g. 'French'). Defaults to English.
45
+ --transcript-file PATH Use existing transcript; skips download and transcription
46
+ --audio-file PATH Use existing audio file; skips download, transcribes directly
47
+ --title TEXT Video title for --transcript-file or --audio-file
48
+ --draft Set draft: true in the output frontmatter [default: false]
49
+ --mistral-model TEXT [default: voxtral-mini-latest]
50
+ --audio-format [mp3|m4a] [default: mp3]
51
+ --version Show version and exit
52
+ ```
53
+
54
+ ## Output Frontmatter
55
+
56
+ Every generated file includes YAML frontmatter:
57
+
58
+ ```yaml
59
+ ---
60
+ title: "Video Title"
61
+ url: https://youtu.be/...
62
+ description: "YouTube video description"
63
+ date: 2026-04-12
64
+ draft: false
65
+ ---
66
+ ```
67
+
68
+ Use `--draft` to set `draft: true` (useful for Hugo, Jekyll, or similar static site generators).
69
+ Merged documents (`--merge`) do not include frontmatter.
70
+
71
+ ## Content Types
72
+
73
+ | Type | Use for |
74
+ |------|---------|
75
+ | `auto` | Let the LLM detect (default) |
76
+ | `tutorial` | How-to / step-by-step videos |
77
+ | `lecture` | Tech talks, academic presentations |
78
+ | `ib` | IB student subject videos |
79
+
80
+ ## Custom Prompts
81
+
82
+ Override the built-in prompt with your own file. Template variables:
83
+ `{title}`, `{channel}`, `{content_type}`, `{duration}`
84
+
85
+ ```bash
86
+ yt-instruct <url> --prompt-file my_prompt.md
87
+ ```
88
+
89
+ ## Using the `llm` backend
90
+
91
+ ```bash
92
+ pip install llm llm-anthropic
93
+ llm keys set anthropic
94
+ yt-instruct <url> --backend llm --model claude-sonnet-4-6
95
+ ```
96
+
97
+ ## Using the `nvidia` backend
98
+
99
+ ```bash
100
+ NVIDIA_API_KEY=... yt-instruct <url> --backend nvidia --model moonshotai/kimi-k2-instruct
101
+ ```
102
+
103
+ ## Batch Processing
104
+
105
+ ```bash
106
+ # Multiple URLs
107
+ yt-instruct url1 url2 url3 --output-dir ./docs
108
+
109
+ # Playlist (automatically expanded)
110
+ yt-instruct https://www.youtube.com/playlist?list=<id> --output-dir ./docs
111
+
112
+ # From file
113
+ cat urls.txt | yt-instruct --url-file /dev/stdin
114
+
115
+ # Merge all into one doc
116
+ yt-instruct url1 url2 --merge --output-dir ./docs
117
+ ```
118
+
119
+ ## Skip Steps — Use Existing Files
120
+
121
+ `--audio-file` and `--transcript-file` resolve relative to `--output-dir` if the file isn't found at the given path. This lets you reference files already in the output directory without typing the full path:
122
+
123
+ ```bash
124
+ # Start from an existing transcript (skips download + transcription)
125
+ yt-instruct --transcript-file transcript.txt --title "My Video" --output-dir ./docs
126
+
127
+ # File not found locally? Looked up in ./docs automatically
128
+ yt-instruct --transcript-file my_transcript.txt --output-dir ./docs
129
+
130
+ # Start from an existing audio file (skips download, still transcribes)
131
+ yt-instruct --audio-file recording.mp3 --output-dir ./docs
132
+ ```
133
+
134
+ ## Resume an Interrupted Run
135
+
136
+ Use `--keep` to save transcripts alongside output files, then `--resume` to continue from where a previous run stopped:
137
+
138
+ ```bash
139
+ # First run (interrupted partway through)
140
+ yt-instruct --url-file urls.txt --keep --output-dir ./docs
141
+
142
+ # Resume — skips videos with existing output; reuses cached transcripts
143
+ yt-instruct --url-file urls.txt --resume --output-dir ./docs
144
+ ```
145
+
146
+ `--resume` checks at two levels per video:
147
+ 1. Output `.md` already exists → skip entirely
148
+ 2. Cached `*_transcript.txt` exists (saved by `--keep`) → skip download and transcription, regenerate only
@@ -0,0 +1,38 @@
1
+ [project]
2
+ name = "yt-instruct"
3
+ version = "1.0.0"
4
+ description = "Convert YouTube videos into structured markdown instruction documents"
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ license = { text = "MIT" }
8
+ keywords = ["youtube", "transcription", "llm", "instructions", "mistral"]
9
+ classifiers = [
10
+ "Programming Language :: Python :: 3",
11
+ "License :: OSI Approved :: MIT License",
12
+ "Operating System :: OS Independent",
13
+ ]
14
+ dependencies = [
15
+ "click>=8.1",
16
+ "yt-dlp>=2024.1",
17
+ "mistralai>=1.0",
18
+ "anthropic>=0.40",
19
+ "openai>=1.0",
20
+ "llm>=0.17",
21
+ "llm-anthropic>=0.12",
22
+ ]
23
+
24
+ [project.scripts]
25
+ yt-instruct = "yt_instruct.cli:cli"
26
+
27
+ [project.optional-dependencies]
28
+ dev = ["pytest>=8.0", "pytest-mock>=3.0"]
29
+
30
+ [build-system]
31
+ requires = ["setuptools>=68", "wheel"]
32
+ build-backend = "setuptools.build_meta"
33
+
34
+ [tool.setuptools.packages.find]
35
+ where = ["src"]
36
+
37
+ [tool.setuptools.package-data]
38
+ yt_instruct = ["prompts/*.md"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,3 @@
1
+ """yt-instruct: Convert YouTube videos into structured markdown instruction documents."""
2
+
3
+ __version__ = "0.1.0"