yt-instruct 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- yt_instruct-1.0.0/PKG-INFO +170 -0
- yt_instruct-1.0.0/README.md +148 -0
- yt_instruct-1.0.0/pyproject.toml +38 -0
- yt_instruct-1.0.0/setup.cfg +4 -0
- yt_instruct-1.0.0/src/yt_instruct/__init__.py +3 -0
- yt_instruct-1.0.0/src/yt_instruct/cli.py +415 -0
- yt_instruct-1.0.0/src/yt_instruct/downloader.py +105 -0
- yt_instruct-1.0.0/src/yt_instruct/generator.py +226 -0
- yt_instruct-1.0.0/src/yt_instruct/prompts/__init__.py +1 -0
- yt_instruct-1.0.0/src/yt_instruct/prompts/adhd.md +29 -0
- yt_instruct-1.0.0/src/yt_instruct/prompts/default.md +27 -0
- yt_instruct-1.0.0/src/yt_instruct/prompts/ib copy.md +30 -0
- yt_instruct-1.0.0/src/yt_instruct/prompts/ib.md +37 -0
- yt_instruct-1.0.0/src/yt_instruct/prompts/lecture.md +29 -0
- yt_instruct-1.0.0/src/yt_instruct/prompts/tutorial.md +29 -0
- yt_instruct-1.0.0/src/yt_instruct/transcriber.py +30 -0
- yt_instruct-1.0.0/src/yt_instruct/utils.py +34 -0
- yt_instruct-1.0.0/src/yt_instruct.egg-info/PKG-INFO +170 -0
- yt_instruct-1.0.0/src/yt_instruct.egg-info/SOURCES.txt +21 -0
- yt_instruct-1.0.0/src/yt_instruct.egg-info/dependency_links.txt +1 -0
- yt_instruct-1.0.0/src/yt_instruct.egg-info/entry_points.txt +2 -0
- yt_instruct-1.0.0/src/yt_instruct.egg-info/requires.txt +11 -0
- yt_instruct-1.0.0/src/yt_instruct.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: yt-instruct
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Convert YouTube videos into structured markdown instruction documents
|
|
5
|
+
License: MIT
|
|
6
|
+
Keywords: youtube,transcription,llm,instructions,mistral
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
Requires-Dist: click>=8.1
|
|
13
|
+
Requires-Dist: yt-dlp>=2024.1
|
|
14
|
+
Requires-Dist: mistralai>=1.0
|
|
15
|
+
Requires-Dist: anthropic>=0.40
|
|
16
|
+
Requires-Dist: openai>=1.0
|
|
17
|
+
Requires-Dist: llm>=0.17
|
|
18
|
+
Requires-Dist: llm-anthropic>=0.12
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
21
|
+
Requires-Dist: pytest-mock>=3.0; extra == "dev"
|
|
22
|
+
|
|
23
|
+
# yt-instruct
|
|
24
|
+
|
|
25
|
+
Convert YouTube videos into structured markdown instruction documents.
|
|
26
|
+
|
|
27
|
+
Downloads audio via yt-dlp, transcribes with Mistral's voxtral API, then generates a clean how-to document using Claude.
|
|
28
|
+
|
|
29
|
+
## Quick Start
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
# Run with uvx (no install needed)
|
|
33
|
+
uvx --from . yt-instruct https://www.youtube.com/watch?v=<id>
|
|
34
|
+
|
|
35
|
+
# Or install
|
|
36
|
+
pip install -e .
|
|
37
|
+
yt-instruct https://www.youtube.com/watch?v=<id>
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Requirements
|
|
41
|
+
|
|
42
|
+
- `ffmpeg` — `brew install ffmpeg` or `apt install ffmpeg`
|
|
43
|
+
- `MISTRAL_API_KEY` — [console.mistral.ai](https://console.mistral.ai/)
|
|
44
|
+
- `ANTHROPIC_API_KEY` — for default backend
|
|
45
|
+
- `NVIDIA_API_KEY` — only for `--backend nvidia`
|
|
46
|
+
|
|
47
|
+
## Usage
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
yt-instruct [OPTIONS] URL [URL...]
|
|
51
|
+
yt-instruct [OPTIONS] --url-file urls.txt
|
|
52
|
+
yt-instruct [OPTIONS] --transcript-file transcript.txt --title "Name"
|
|
53
|
+
yt-instruct [OPTIONS] --audio-file recording.mp3 --title "Name"
|
|
54
|
+
|
|
55
|
+
Options:
|
|
56
|
+
--output-dir PATH Output directory [default: .]
|
|
57
|
+
--keep Keep intermediate audio + transcript files
|
|
58
|
+
--merge Merge all videos into one document
|
|
59
|
+
--resume Skip already-generated outputs; reuse cached transcripts
|
|
60
|
+
--content-type [tutorial|lecture|ib|auto]
|
|
61
|
+
Prompt style [default: auto]
|
|
62
|
+
--backend [anthropic|llm|nvidia]
|
|
63
|
+
LLM backend [default: anthropic]
|
|
64
|
+
--model TEXT Model name [default: claude-sonnet-4-6]
|
|
65
|
+
--prompt-file PATH Custom system prompt (overrides built-in)
|
|
66
|
+
--language LANG Output language (e.g. 'French'). Defaults to English.
|
|
67
|
+
--transcript-file PATH Use existing transcript; skips download and transcription
|
|
68
|
+
--audio-file PATH Use existing audio file; skips download, transcribes directly
|
|
69
|
+
--title TEXT Video title for --transcript-file or --audio-file
|
|
70
|
+
--draft Set draft: true in the output frontmatter [default: false]
|
|
71
|
+
--mistral-model TEXT [default: voxtral-mini-latest]
|
|
72
|
+
--audio-format [mp3|m4a] [default: mp3]
|
|
73
|
+
--version Show version and exit
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Output Frontmatter
|
|
77
|
+
|
|
78
|
+
Every generated file includes YAML frontmatter:
|
|
79
|
+
|
|
80
|
+
```yaml
|
|
81
|
+
---
|
|
82
|
+
title: "Video Title"
|
|
83
|
+
url: https://youtu.be/...
|
|
84
|
+
description: "YouTube video description"
|
|
85
|
+
date: 2026-04-12
|
|
86
|
+
draft: false
|
|
87
|
+
---
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Use `--draft` to set `draft: true` (useful for Hugo, Jekyll, or similar static site generators).
|
|
91
|
+
Merged documents (`--merge`) do not include frontmatter.
|
|
92
|
+
|
|
93
|
+
## Content Types
|
|
94
|
+
|
|
95
|
+
| Type | Use for |
|
|
96
|
+
|------|---------|
|
|
97
|
+
| `auto` | Let the LLM detect (default) |
|
|
98
|
+
| `tutorial` | How-to / step-by-step videos |
|
|
99
|
+
| `lecture` | Tech talks, academic presentations |
|
|
100
|
+
| `ib` | IB student subject videos |
|
|
101
|
+
|
|
102
|
+
## Custom Prompts
|
|
103
|
+
|
|
104
|
+
Override the built-in prompt with your own file. Template variables:
|
|
105
|
+
`{title}`, `{channel}`, `{content_type}`, `{duration}`
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
yt-instruct <url> --prompt-file my_prompt.md
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## Using the `llm` backend
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
pip install llm llm-anthropic
|
|
115
|
+
llm keys set anthropic
|
|
116
|
+
yt-instruct <url> --backend llm --model claude-sonnet-4-6
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Using the `nvidia` backend
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
NVIDIA_API_KEY=... yt-instruct <url> --backend nvidia --model moonshotai/kimi-k2-instruct
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Batch Processing
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
# Multiple URLs
|
|
129
|
+
yt-instruct url1 url2 url3 --output-dir ./docs
|
|
130
|
+
|
|
131
|
+
# Playlist (automatically expanded)
|
|
132
|
+
yt-instruct https://www.youtube.com/playlist?list=<id> --output-dir ./docs
|
|
133
|
+
|
|
134
|
+
# From file
|
|
135
|
+
cat urls.txt | yt-instruct --url-file /dev/stdin
|
|
136
|
+
|
|
137
|
+
# Merge all into one doc
|
|
138
|
+
yt-instruct url1 url2 --merge --output-dir ./docs
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Skip Steps — Use Existing Files
|
|
142
|
+
|
|
143
|
+
`--audio-file` and `--transcript-file` resolve relative to `--output-dir` if the file isn't found at the given path. This lets you reference files already in the output directory without typing the full path:
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
# Start from an existing transcript (skips download + transcription)
|
|
147
|
+
yt-instruct --transcript-file transcript.txt --title "My Video" --output-dir ./docs
|
|
148
|
+
|
|
149
|
+
# File not found locally? Looked up in ./docs automatically
|
|
150
|
+
yt-instruct --transcript-file my_transcript.txt --output-dir ./docs
|
|
151
|
+
|
|
152
|
+
# Start from an existing audio file (skips download, still transcribes)
|
|
153
|
+
yt-instruct --audio-file recording.mp3 --output-dir ./docs
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## Resume an Interrupted Run
|
|
157
|
+
|
|
158
|
+
Use `--keep` to save transcripts alongside output files, then `--resume` to continue from where a previous run stopped:
|
|
159
|
+
|
|
160
|
+
```bash
|
|
161
|
+
# First run (interrupted partway through)
|
|
162
|
+
yt-instruct --url-file urls.txt --keep --output-dir ./docs
|
|
163
|
+
|
|
164
|
+
# Resume — skips videos with existing output; reuses cached transcripts
|
|
165
|
+
yt-instruct --url-file urls.txt --resume --output-dir ./docs
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
`--resume` checks at two levels per video:
|
|
169
|
+
1. Output `.md` already exists → skip entirely
|
|
170
|
+
2. Cached `*_transcript.txt` exists (saved by `--keep`) → skip download and transcription, regenerate only
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# yt-instruct
|
|
2
|
+
|
|
3
|
+
Convert YouTube videos into structured markdown instruction documents.
|
|
4
|
+
|
|
5
|
+
Downloads audio via yt-dlp, transcribes with Mistral's voxtral API, then generates a clean how-to document using Claude.
|
|
6
|
+
|
|
7
|
+
## Quick Start
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
# Run with uvx (no install needed)
|
|
11
|
+
uvx --from . yt-instruct https://www.youtube.com/watch?v=<id>
|
|
12
|
+
|
|
13
|
+
# Or install
|
|
14
|
+
pip install -e .
|
|
15
|
+
yt-instruct https://www.youtube.com/watch?v=<id>
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
## Requirements
|
|
19
|
+
|
|
20
|
+
- `ffmpeg` — `brew install ffmpeg` or `apt install ffmpeg`
|
|
21
|
+
- `MISTRAL_API_KEY` — [console.mistral.ai](https://console.mistral.ai/)
|
|
22
|
+
- `ANTHROPIC_API_KEY` — for default backend
|
|
23
|
+
- `NVIDIA_API_KEY` — only for `--backend nvidia`
|
|
24
|
+
|
|
25
|
+
## Usage
|
|
26
|
+
|
|
27
|
+
```
|
|
28
|
+
yt-instruct [OPTIONS] URL [URL...]
|
|
29
|
+
yt-instruct [OPTIONS] --url-file urls.txt
|
|
30
|
+
yt-instruct [OPTIONS] --transcript-file transcript.txt --title "Name"
|
|
31
|
+
yt-instruct [OPTIONS] --audio-file recording.mp3 --title "Name"
|
|
32
|
+
|
|
33
|
+
Options:
|
|
34
|
+
--output-dir PATH Output directory [default: .]
|
|
35
|
+
--keep Keep intermediate audio + transcript files
|
|
36
|
+
--merge Merge all videos into one document
|
|
37
|
+
--resume Skip already-generated outputs; reuse cached transcripts
|
|
38
|
+
--content-type [tutorial|lecture|ib|auto]
|
|
39
|
+
Prompt style [default: auto]
|
|
40
|
+
--backend [anthropic|llm|nvidia]
|
|
41
|
+
LLM backend [default: anthropic]
|
|
42
|
+
--model TEXT Model name [default: claude-sonnet-4-6]
|
|
43
|
+
--prompt-file PATH Custom system prompt (overrides built-in)
|
|
44
|
+
--language LANG Output language (e.g. 'French'). Defaults to English.
|
|
45
|
+
--transcript-file PATH Use existing transcript; skips download and transcription
|
|
46
|
+
--audio-file PATH Use existing audio file; skips download, transcribes directly
|
|
47
|
+
--title TEXT Video title for --transcript-file or --audio-file
|
|
48
|
+
--draft Set draft: true in the output frontmatter [default: false]
|
|
49
|
+
--mistral-model TEXT [default: voxtral-mini-latest]
|
|
50
|
+
--audio-format [mp3|m4a] [default: mp3]
|
|
51
|
+
--version Show version and exit
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Output Frontmatter
|
|
55
|
+
|
|
56
|
+
Every generated file includes YAML frontmatter:
|
|
57
|
+
|
|
58
|
+
```yaml
|
|
59
|
+
---
|
|
60
|
+
title: "Video Title"
|
|
61
|
+
url: https://youtu.be/...
|
|
62
|
+
description: "YouTube video description"
|
|
63
|
+
date: 2026-04-12
|
|
64
|
+
draft: false
|
|
65
|
+
---
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Use `--draft` to set `draft: true` (useful for Hugo, Jekyll, or similar static site generators).
|
|
69
|
+
Merged documents (`--merge`) do not include frontmatter.
|
|
70
|
+
|
|
71
|
+
## Content Types
|
|
72
|
+
|
|
73
|
+
| Type | Use for |
|
|
74
|
+
|------|---------|
|
|
75
|
+
| `auto` | Let the LLM detect (default) |
|
|
76
|
+
| `tutorial` | How-to / step-by-step videos |
|
|
77
|
+
| `lecture` | Tech talks, academic presentations |
|
|
78
|
+
| `ib` | IB student subject videos |
|
|
79
|
+
|
|
80
|
+
## Custom Prompts
|
|
81
|
+
|
|
82
|
+
Override the built-in prompt with your own file. Template variables:
|
|
83
|
+
`{title}`, `{channel}`, `{content_type}`, `{duration}`
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
yt-instruct <url> --prompt-file my_prompt.md
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Using the `llm` backend
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
pip install llm llm-anthropic
|
|
93
|
+
llm keys set anthropic
|
|
94
|
+
yt-instruct <url> --backend llm --model claude-sonnet-4-6
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Using the `nvidia` backend
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
NVIDIA_API_KEY=... yt-instruct <url> --backend nvidia --model moonshotai/kimi-k2-instruct
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Batch Processing
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
# Multiple URLs
|
|
107
|
+
yt-instruct url1 url2 url3 --output-dir ./docs
|
|
108
|
+
|
|
109
|
+
# Playlist (automatically expanded)
|
|
110
|
+
yt-instruct https://www.youtube.com/playlist?list=<id> --output-dir ./docs
|
|
111
|
+
|
|
112
|
+
# From file
|
|
113
|
+
cat urls.txt | yt-instruct --url-file /dev/stdin
|
|
114
|
+
|
|
115
|
+
# Merge all into one doc
|
|
116
|
+
yt-instruct url1 url2 --merge --output-dir ./docs
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Skip Steps — Use Existing Files
|
|
120
|
+
|
|
121
|
+
`--audio-file` and `--transcript-file` resolve relative to `--output-dir` if the file isn't found at the given path. This lets you reference files already in the output directory without typing the full path:
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
# Start from an existing transcript (skips download + transcription)
|
|
125
|
+
yt-instruct --transcript-file transcript.txt --title "My Video" --output-dir ./docs
|
|
126
|
+
|
|
127
|
+
# File not found locally? Looked up in ./docs automatically
|
|
128
|
+
yt-instruct --transcript-file my_transcript.txt --output-dir ./docs
|
|
129
|
+
|
|
130
|
+
# Start from an existing audio file (skips download, still transcribes)
|
|
131
|
+
yt-instruct --audio-file recording.mp3 --output-dir ./docs
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
## Resume an Interrupted Run
|
|
135
|
+
|
|
136
|
+
Use `--keep` to save transcripts alongside output files, then `--resume` to continue from where a previous run stopped:
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
# First run (interrupted partway through)
|
|
140
|
+
yt-instruct --url-file urls.txt --keep --output-dir ./docs
|
|
141
|
+
|
|
142
|
+
# Resume — skips videos with existing output; reuses cached transcripts
|
|
143
|
+
yt-instruct --url-file urls.txt --resume --output-dir ./docs
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
`--resume` checks at two levels per video:
|
|
147
|
+
1. Output `.md` already exists → skip entirely
|
|
148
|
+
2. Cached `*_transcript.txt` exists (saved by `--keep`) → skip download and transcription, regenerate only
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "yt-instruct"
|
|
3
|
+
version = "1.0.0"
|
|
4
|
+
description = "Convert YouTube videos into structured markdown instruction documents"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
license = { text = "MIT" }
|
|
8
|
+
keywords = ["youtube", "transcription", "llm", "instructions", "mistral"]
|
|
9
|
+
classifiers = [
|
|
10
|
+
"Programming Language :: Python :: 3",
|
|
11
|
+
"License :: OSI Approved :: MIT License",
|
|
12
|
+
"Operating System :: OS Independent",
|
|
13
|
+
]
|
|
14
|
+
dependencies = [
|
|
15
|
+
"click>=8.1",
|
|
16
|
+
"yt-dlp>=2024.1",
|
|
17
|
+
"mistralai>=1.0",
|
|
18
|
+
"anthropic>=0.40",
|
|
19
|
+
"openai>=1.0",
|
|
20
|
+
"llm>=0.17",
|
|
21
|
+
"llm-anthropic>=0.12",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
[project.scripts]
|
|
25
|
+
yt-instruct = "yt_instruct.cli:cli"
|
|
26
|
+
|
|
27
|
+
[project.optional-dependencies]
|
|
28
|
+
dev = ["pytest>=8.0", "pytest-mock>=3.0"]
|
|
29
|
+
|
|
30
|
+
[build-system]
|
|
31
|
+
requires = ["setuptools>=68", "wheel"]
|
|
32
|
+
build-backend = "setuptools.build_meta"
|
|
33
|
+
|
|
34
|
+
[tool.setuptools.packages.find]
|
|
35
|
+
where = ["src"]
|
|
36
|
+
|
|
37
|
+
[tool.setuptools.package-data]
|
|
38
|
+
yt_instruct = ["prompts/*.md"]
|