PyPI - yt-instruct - Versions diffs - 1.0.0__tar.gz - Mend

yt-instruct 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

yt_instruct-1.0.0/PKG-INFO +170 -0
yt_instruct-1.0.0/README.md +148 -0
yt_instruct-1.0.0/pyproject.toml +38 -0
yt_instruct-1.0.0/setup.cfg +4 -0
yt_instruct-1.0.0/src/yt_instruct/__init__.py +3 -0
yt_instruct-1.0.0/src/yt_instruct/cli.py +415 -0
yt_instruct-1.0.0/src/yt_instruct/downloader.py +105 -0
yt_instruct-1.0.0/src/yt_instruct/generator.py +226 -0
yt_instruct-1.0.0/src/yt_instruct/prompts/__init__.py +1 -0
yt_instruct-1.0.0/src/yt_instruct/prompts/adhd.md +29 -0
yt_instruct-1.0.0/src/yt_instruct/prompts/default.md +27 -0
yt_instruct-1.0.0/src/yt_instruct/prompts/ib copy.md +30 -0
yt_instruct-1.0.0/src/yt_instruct/prompts/ib.md +37 -0
yt_instruct-1.0.0/src/yt_instruct/prompts/lecture.md +29 -0
yt_instruct-1.0.0/src/yt_instruct/prompts/tutorial.md +29 -0
yt_instruct-1.0.0/src/yt_instruct/transcriber.py +30 -0
yt_instruct-1.0.0/src/yt_instruct/utils.py +34 -0
yt_instruct-1.0.0/src/yt_instruct.egg-info/PKG-INFO +170 -0
yt_instruct-1.0.0/src/yt_instruct.egg-info/SOURCES.txt +21 -0
yt_instruct-1.0.0/src/yt_instruct.egg-info/dependency_links.txt +1 -0
yt_instruct-1.0.0/src/yt_instruct.egg-info/entry_points.txt +2 -0
yt_instruct-1.0.0/src/yt_instruct.egg-info/requires.txt +11 -0
yt_instruct-1.0.0/src/yt_instruct.egg-info/top_level.txt +1 -0

yt_instruct-1.0.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,170 @@
+Metadata-Version: 2.4
+Name: yt-instruct
+Version: 1.0.0
+Summary: Convert YouTube videos into structured markdown instruction documents
+License: MIT
+Keywords: youtube,transcription,llm,instructions,mistral
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+Requires-Dist: click>=8.1
+Requires-Dist: yt-dlp>=2024.1
+Requires-Dist: mistralai>=1.0
+Requires-Dist: anthropic>=0.40
+Requires-Dist: openai>=1.0
+Requires-Dist: llm>=0.17
+Requires-Dist: llm-anthropic>=0.12
+Provides-Extra: dev
+Requires-Dist: pytest>=8.0; extra == "dev"
+Requires-Dist: pytest-mock>=3.0; extra == "dev"
+# yt-instruct
+Convert YouTube videos into structured markdown instruction documents.
+Downloads audio via yt-dlp, transcribes with Mistral's voxtral API, then generates a clean how-to document using Claude.
+## Quick Start
+```bash
+# Run with uvx (no install needed)
+uvx --from . yt-instruct https://www.youtube.com/watch?v=<id>
+# Or install
+pip install -e .
+yt-instruct https://www.youtube.com/watch?v=<id>
+```
+## Requirements
+- `ffmpeg` — `brew install ffmpeg` or `apt install ffmpeg`
+- `MISTRAL_API_KEY` — [console.mistral.ai](https://console.mistral.ai/)
+- `ANTHROPIC_API_KEY` — for default backend
+- `NVIDIA_API_KEY` — only for `--backend nvidia`
+## Usage
+```
+yt-instruct [OPTIONS] URL [URL...]
+yt-instruct [OPTIONS] --url-file urls.txt
+yt-instruct [OPTIONS] --transcript-file transcript.txt --title "Name"
+yt-instruct [OPTIONS] --audio-file recording.mp3 --title "Name"
+Options:
+  --output-dir PATH              Output directory [default: .]
+  --keep                         Keep intermediate audio + transcript files
+  --merge                        Merge all videos into one document
+  --resume                       Skip already-generated outputs; reuse cached transcripts
+  --content-type [tutorial|lecture|ib|auto]
+                                 Prompt style [default: auto]
+  --backend [anthropic|llm|nvidia]
+                                 LLM backend [default: anthropic]
+  --model TEXT                   Model name [default: claude-sonnet-4-6]
+  --prompt-file PATH             Custom system prompt (overrides built-in)
+  --language LANG                Output language (e.g. 'French'). Defaults to English.
+  --transcript-file PATH         Use existing transcript; skips download and transcription
+  --audio-file PATH              Use existing audio file; skips download, transcribes directly
+  --title TEXT                   Video title for --transcript-file or --audio-file
+  --draft                        Set draft: true in the output frontmatter [default: false]
+  --mistral-model TEXT           [default: voxtral-mini-latest]
+  --audio-format [mp3|m4a]       [default: mp3]
+  --version                      Show version and exit
+```
+## Output Frontmatter
+Every generated file includes YAML frontmatter:
+```yaml
+---
+title: "Video Title"
+url: https://youtu.be/...
+description: "YouTube video description"
+date: 2026-04-12
+draft: false
+---
+```
+Use `--draft` to set `draft: true` (useful for Hugo, Jekyll, or similar static site generators).
+Merged documents (`--merge`) do not include frontmatter.
+## Content Types
+| Type | Use for |
+|------|---------|
+| `auto` | Let the LLM detect (default) |
+| `tutorial` | How-to / step-by-step videos |
+| `lecture` | Tech talks, academic presentations |
+| `ib` | IB student subject videos |
+## Custom Prompts
+Override the built-in prompt with your own file. Template variables:
+`{title}`, `{channel}`, `{content_type}`, `{duration}`
+```bash
+yt-instruct <url> --prompt-file my_prompt.md
+```
+## Using the `llm` backend
+```bash
+pip install llm llm-anthropic
+llm keys set anthropic
+yt-instruct <url> --backend llm --model claude-sonnet-4-6
+```
+## Using the `nvidia` backend
+```bash
+NVIDIA_API_KEY=... yt-instruct <url> --backend nvidia --model moonshotai/kimi-k2-instruct
+```
+## Batch Processing
+```bash
+# Multiple URLs
+yt-instruct url1 url2 url3 --output-dir ./docs
+# Playlist (automatically expanded)
+yt-instruct https://www.youtube.com/playlist?list=<id> --output-dir ./docs
+# From file
+cat urls.txt | yt-instruct --url-file /dev/stdin
+# Merge all into one doc
+yt-instruct url1 url2 --merge --output-dir ./docs
+```
+## Skip Steps — Use Existing Files
+`--audio-file` and `--transcript-file` resolve relative to `--output-dir` if the file isn't found at the given path. This lets you reference files already in the output directory without typing the full path:
+```bash
+# Start from an existing transcript (skips download + transcription)
+yt-instruct --transcript-file transcript.txt --title "My Video" --output-dir ./docs
+# File not found locally? Looked up in ./docs automatically
+yt-instruct --transcript-file my_transcript.txt --output-dir ./docs
+# Start from an existing audio file (skips download, still transcribes)
+yt-instruct --audio-file recording.mp3 --output-dir ./docs
+```
+## Resume an Interrupted Run
+Use `--keep` to save transcripts alongside output files, then `--resume` to continue from where a previous run stopped:
+```bash
+# First run (interrupted partway through)
+yt-instruct --url-file urls.txt --keep --output-dir ./docs
+# Resume — skips videos with existing output; reuses cached transcripts
+yt-instruct --url-file urls.txt --resume --output-dir ./docs
+```
+`--resume` checks at two levels per video:
+1. Output `.md` already exists → skip entirely
+2. Cached `*_transcript.txt` exists (saved by `--keep`) → skip download and transcription, regenerate only

yt_instruct-1.0.0/README.md ADDED Viewed

@@ -0,0 +1,148 @@
+# yt-instruct
+Convert YouTube videos into structured markdown instruction documents.
+Downloads audio via yt-dlp, transcribes with Mistral's voxtral API, then generates a clean how-to document using Claude.
+## Quick Start
+```bash
+# Run with uvx (no install needed)
+uvx --from . yt-instruct https://www.youtube.com/watch?v=<id>
+# Or install
+pip install -e .
+yt-instruct https://www.youtube.com/watch?v=<id>
+```
+## Requirements
+- `ffmpeg` — `brew install ffmpeg` or `apt install ffmpeg`
+- `MISTRAL_API_KEY` — [console.mistral.ai](https://console.mistral.ai/)
+- `ANTHROPIC_API_KEY` — for default backend
+- `NVIDIA_API_KEY` — only for `--backend nvidia`
+## Usage
+```
+yt-instruct [OPTIONS] URL [URL...]
+yt-instruct [OPTIONS] --url-file urls.txt
+yt-instruct [OPTIONS] --transcript-file transcript.txt --title "Name"
+yt-instruct [OPTIONS] --audio-file recording.mp3 --title "Name"
+Options:
+  --output-dir PATH              Output directory [default: .]
+  --keep                         Keep intermediate audio + transcript files
+  --merge                        Merge all videos into one document
+  --resume                       Skip already-generated outputs; reuse cached transcripts
+  --content-type [tutorial|lecture|ib|auto]
+                                 Prompt style [default: auto]
+  --backend [anthropic|llm|nvidia]
+                                 LLM backend [default: anthropic]
+  --model TEXT                   Model name [default: claude-sonnet-4-6]
+  --prompt-file PATH             Custom system prompt (overrides built-in)
+  --language LANG                Output language (e.g. 'French'). Defaults to English.
+  --transcript-file PATH         Use existing transcript; skips download and transcription
+  --audio-file PATH              Use existing audio file; skips download, transcribes directly
+  --title TEXT                   Video title for --transcript-file or --audio-file
+  --draft                        Set draft: true in the output frontmatter [default: false]
+  --mistral-model TEXT           [default: voxtral-mini-latest]
+  --audio-format [mp3|m4a]       [default: mp3]
+  --version                      Show version and exit
+```
+## Output Frontmatter
+Every generated file includes YAML frontmatter:
+```yaml
+---
+title: "Video Title"
+url: https://youtu.be/...
+description: "YouTube video description"
+date: 2026-04-12
+draft: false
+---
+```
+Use `--draft` to set `draft: true` (useful for Hugo, Jekyll, or similar static site generators).
+Merged documents (`--merge`) do not include frontmatter.
+## Content Types
+| Type | Use for |
+|------|---------|
+| `auto` | Let the LLM detect (default) |
+| `tutorial` | How-to / step-by-step videos |
+| `lecture` | Tech talks, academic presentations |
+| `ib` | IB student subject videos |
+## Custom Prompts
+Override the built-in prompt with your own file. Template variables:
+`{title}`, `{channel}`, `{content_type}`, `{duration}`
+```bash
+yt-instruct <url> --prompt-file my_prompt.md
+```
+## Using the `llm` backend
+```bash
+pip install llm llm-anthropic
+llm keys set anthropic
+yt-instruct <url> --backend llm --model claude-sonnet-4-6
+```
+## Using the `nvidia` backend
+```bash
+NVIDIA_API_KEY=... yt-instruct <url> --backend nvidia --model moonshotai/kimi-k2-instruct
+```
+## Batch Processing
+```bash
+# Multiple URLs
+yt-instruct url1 url2 url3 --output-dir ./docs
+# Playlist (automatically expanded)
+yt-instruct https://www.youtube.com/playlist?list=<id> --output-dir ./docs
+# From file
+cat urls.txt | yt-instruct --url-file /dev/stdin
+# Merge all into one doc
+yt-instruct url1 url2 --merge --output-dir ./docs
+```
+## Skip Steps — Use Existing Files
+`--audio-file` and `--transcript-file` resolve relative to `--output-dir` if the file isn't found at the given path. This lets you reference files already in the output directory without typing the full path:
+```bash
+# Start from an existing transcript (skips download + transcription)
+yt-instruct --transcript-file transcript.txt --title "My Video" --output-dir ./docs
+# File not found locally? Looked up in ./docs automatically
+yt-instruct --transcript-file my_transcript.txt --output-dir ./docs
+# Start from an existing audio file (skips download, still transcribes)
+yt-instruct --audio-file recording.mp3 --output-dir ./docs
+```
+## Resume an Interrupted Run
+Use `--keep` to save transcripts alongside output files, then `--resume` to continue from where a previous run stopped:
+```bash
+# First run (interrupted partway through)
+yt-instruct --url-file urls.txt --keep --output-dir ./docs
+# Resume — skips videos with existing output; reuses cached transcripts
+yt-instruct --url-file urls.txt --resume --output-dir ./docs
+```
+`--resume` checks at two levels per video:
+1. Output `.md` already exists → skip entirely
+2. Cached `*_transcript.txt` exists (saved by `--keep`) → skip download and transcription, regenerate only

yt_instruct-1.0.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,38 @@
+[project]
+name = "yt-instruct"
+version = "1.0.0"
+description = "Convert YouTube videos into structured markdown instruction documents"
+readme = "README.md"
+requires-python = ">=3.10"
+license = { text = "MIT" }
+keywords = ["youtube", "transcription", "llm", "instructions", "mistral"]
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+]
+dependencies = [
+    "click>=8.1",
+    "yt-dlp>=2024.1",
+    "mistralai>=1.0",
+    "anthropic>=0.40",
+    "openai>=1.0",
+    "llm>=0.17",
+    "llm-anthropic>=0.12",
+]
+[project.scripts]
+yt-instruct = "yt_instruct.cli:cli"
+[project.optional-dependencies]
+dev = ["pytest>=8.0", "pytest-mock>=3.0"]
+[build-system]
+requires = ["setuptools>=68", "wheel"]
+build-backend = "setuptools.build_meta"
+[tool.setuptools.packages.find]
+where = ["src"]
+[tool.setuptools.package-data]
+yt_instruct = ["prompts/*.md"]

yt_instruct-1.0.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

yt_instruct-1.0.0/src/yt_instruct/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""yt-instruct: Convert YouTube videos into structured markdown instruction documents."""
+__version__ = "0.1.0"