vidwise 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ {
2
+ "name": "vidwise",
3
+ "description": "LLMs can't watch videos. vidwise gives them eyes. Extract transcripts, frames, and visual guides from any video.",
4
+ "owner": {
5
+ "name": "Juan Pablo Djeredjian",
6
+ "email": "jpdjeredjian@gmail.com"
7
+ },
8
+ "plugins": [
9
+ {
10
+ "name": "vidwise",
11
+ "source": "./plugin",
12
+ "description": "Make any video AI-readable. Extract transcripts, frames, and visual guides from videos using Claude Code's native multimodal AI — no API key needed.",
13
+ "version": "0.1.0"
14
+ }
15
+ ]
16
+ }
@@ -0,0 +1,31 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ lint:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+ - uses: actions/setup-python@v5
15
+ with:
16
+ python-version: "3.12"
17
+ - run: pip install ruff
18
+ - run: ruff check src/
19
+
20
+ test:
21
+ runs-on: ubuntu-latest
22
+ strategy:
23
+ matrix:
24
+ python-version: ["3.10", "3.11", "3.12"]
25
+ steps:
26
+ - uses: actions/checkout@v4
27
+ - uses: actions/setup-python@v5
28
+ with:
29
+ python-version: ${{ matrix.python-version }}
30
+ - run: pip install -e ".[dev]"
31
+ - run: pytest
@@ -0,0 +1,20 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ jobs:
9
+ publish:
10
+ runs-on: ubuntu-latest
11
+ permissions:
12
+ id-token: write
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+ - uses: actions/setup-python@v5
16
+ with:
17
+ python-version: "3.12"
18
+ - run: pip install build
19
+ - run: python -m build
20
+ - uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,23 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .eggs/
8
+ *.egg
9
+ .venv/
10
+ venv/
11
+ env/
12
+ .mypy_cache/
13
+ .ruff_cache/
14
+ .pytest_cache/
15
+ *.wav
16
+ *.mp4
17
+ *.webm
18
+ *.mkv
19
+ *.avi
20
+ *.mov
21
+ output-*/
22
+ vidwise-output-*/
23
+ TODO.md
@@ -0,0 +1,19 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.1.0] - 2026-02-26
9
+
10
+ ### Added
11
+
12
+ - Core CLI with `vidwise <source>` command
13
+ - Whisper-powered transcription (`.txt`, `.srt`, `.json` outputs)
14
+ - Frame extraction every N seconds with timestamp-based naming
15
+ - Smart key frame selection via pixel-difference analysis
16
+ - URL support via yt-dlp (YouTube, Loom, 1000+ sites)
17
+ - AI-powered visual guide generation (Claude and OpenAI providers)
18
+ - Claude Code plugin with `/vidwise` slash command
19
+ - Parallel frame analysis via Claude Code subagents (no API key needed)
vidwise-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Juan Pablo Djeredjian
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
vidwise-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,240 @@
1
+ Metadata-Version: 2.4
2
+ Name: vidwise
3
+ Version: 0.1.0
4
+ Summary: LLMs can't watch videos. vidwise gives them eyes.
5
+ Project-URL: Homepage, https://github.com/jpdjere/vidwise
6
+ Project-URL: Repository, https://github.com/jpdjere/vidwise
7
+ Project-URL: Issues, https://github.com/jpdjere/vidwise/issues
8
+ Author: Juan Pablo Djeredjian
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: ai,frames,knowledge,llm,transcript,video,whisper
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Multimedia :: Video
22
+ Requires-Python: >=3.10
23
+ Requires-Dist: anthropic>=0.40
24
+ Requires-Dist: click>=8.0
25
+ Requires-Dist: openai-whisper>=20231117
26
+ Requires-Dist: openai>=1.0
27
+ Requires-Dist: yt-dlp>=2023.0
28
+ Provides-Extra: dev
29
+ Requires-Dist: pytest-cov; extra == 'dev'
30
+ Requires-Dist: pytest>=7.0; extra == 'dev'
31
+ Requires-Dist: ruff; extra == 'dev'
32
+ Provides-Extra: fast
33
+ Requires-Dist: faster-whisper>=1.0; extra == 'fast'
34
+ Description-Content-Type: text/markdown
35
+
36
+ <p align="center">
37
+ <img src="assets/banner.png" alt="vidwise — LLMs can't watch videos. vidwise gives them eyes." width="700">
38
+ </p>
39
+
40
+ <p align="center">
41
+ <a href="https://pypi.org/project/vidwise/"><img src="https://img.shields.io/pypi/v/vidwise?color=blue" alt="PyPI"></a>
42
+ <a href="https://pypi.org/project/vidwise/"><img src="https://img.shields.io/pypi/pyversions/vidwise" alt="Python"></a>
43
+ <a href="https://github.com/jpdjere/vidwise/blob/main/LICENSE"><img src="https://img.shields.io/github/license/jpdjere/vidwise" alt="License"></a>
44
+ <a href="https://github.com/jpdjere/vidwise/actions/workflows/ci.yml"><img src="https://github.com/jpdjere/vidwise/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
45
+ </p>
46
+
47
+ ---
48
+
49
+ Videos are the biggest blind spot for AI. A 5-minute Loom bug report, a 30-minute tutorial, a conference talk — all completely opaque to your LLM. You either watch the whole thing yourself or lose the knowledge.
50
+
51
+ **vidwise** extracts the visual and audio knowledge from any video into structured, LLM-consumable markdown. Feed the output to any LLM and it instantly "understands" the video.
52
+
53
+ ```
54
+ Video ─→ vidwise ─→ Transcript + Key Frames + Visual Guide ─→ LLM Context
55
+ ```
56
+
57
+ ## What can you do with it?
58
+
59
+ | Scenario | What happens |
60
+ |----------|-------------|
61
+ | **Debug a Loom bug report** | Feed the output to Claude → it "sees" the bug, the UI state, the error messages |
62
+ | **Absorb a tutorial** | 30-min coding video → structured knowledge your LLM can answer questions about |
63
+ | **Process a meeting** | Extract decisions, action items, and what was on screen |
64
+ | **Learn from a talk** | Turn any conference presentation into searchable, queryable knowledge |
65
+ | **Onboard faster** | Training videos become AI-queryable — new hires get instant answers |
66
+
67
+ ## Why vidwise?
68
+
69
+ | | |
70
+ |---|---|
71
+ | **See the whole picture** | Most tools only extract audio. vidwise captures both what was *said* and what was *shown* — UI states, error messages, slides, code, diagrams. |
72
+ | **Process once, query forever** | The output is a self-contained artifact. Feed it to any LLM, any number of times, at zero additional cost. No re-uploading, no re-processing. |
73
+ | **Works with any LLM** | Standard markdown + images. Claude, GPT, Gemini, Llama, Mistral — whatever you use. No vendor lock-in. |
74
+ | **Your video stays local** | Whisper and ffmpeg run on your machine. Nothing leaves your computer unless you opt into AI guide generation. |
75
+ | **Smart, not brute-force** | Pixel-difference analysis keeps only frames where the visual content actually changed. Less noise, better LLM understanding. |
76
+ | **Human-readable AND machine-readable** | The output isn't just for LLMs — `guide.md` is a visual walkthrough you can read, share, and bookmark. One command, two audiences. |
77
+ | **One command** | `vidwise recording.mp4` → transcript, key frames, and visual guide in a single portable directory. |
78
+
79
+ > **Not just for LLMs.** The visual guide vidwise generates is a fully readable document with embedded screenshots — open it in VS Code, Obsidian, or GitHub and you have a skimmable walkthrough of the entire video. Share it with your team, bookmark it for later, or feed it to any LLM. One artifact, two audiences.
80
+
81
+ ## Quick Start
82
+
83
+ ```bash
84
+ # Install
85
+ pip install vidwise
86
+
87
+ # Process a local video
88
+ vidwise recording.mp4
89
+
90
+ # Process a YouTube video
91
+ vidwise https://youtube.com/watch?v=abc
92
+
93
+ # With AI-powered visual guide
94
+ export ANTHROPIC_API_KEY=sk-... # or OPENAI_API_KEY
95
+ vidwise recording.mp4 --provider claude
96
+ ```
97
+
98
+ ### Prerequisites
99
+
100
+ - **Python 3.10+**
101
+ - **ffmpeg** — `brew install ffmpeg` (macOS) or `apt install ffmpeg` (Linux)
102
+
103
+ > **Lighter install?** `pip install "vidwise[fast]"` uses faster-whisper (~200MB) instead of openai-whisper (~2GB). 3-4x faster transcription, but without Apple Metal GPU support. vidwise auto-detects which backend is installed.
104
+
105
+ ## Usage
106
+
107
+ ```bash
108
+ vidwise <source> [options]
109
+ ```
110
+
111
+ | Option | Default | Description |
112
+ |--------|---------|-------------|
113
+ | `--model`, `-m` | `medium` | Whisper model: `tiny`, `base`, `small`, `medium`, `large` |
114
+ | `--output-dir`, `-o` | auto | Output directory path |
115
+ | `--no-guide` | off | Skip AI guide generation |
116
+ | `--provider`, `-p` | `auto` | AI provider: `auto`, `claude`, `openai` |
117
+ | `--frame-interval` | `2` | Seconds between frame captures |
118
+ | `--frame-threshold` | `0.05` | Pixel diff threshold for key frame selection |
119
+
120
+ ### Examples
121
+
122
+ ```bash
123
+ # Fast transcription of a short video
124
+ vidwise demo.mp4 --model tiny --no-guide
125
+
126
+ # YouTube tutorial with Claude-powered guide
127
+ vidwise https://youtube.com/watch?v=abc --model small --provider claude
128
+
129
+ # Loom bug report — default settings
130
+ vidwise https://loom.com/share/abc123def
131
+ ```
132
+
133
+ ## Output
134
+
135
+ vidwise creates a single self-contained directory:
136
+
137
+ ```
138
+ vidwise-abc123-2026-02-26/
139
+ ├── video.mp4 # Source video
140
+ ├── audio.wav # Extracted audio (16kHz mono)
141
+ ├── transcript.txt # Plain text transcript
142
+ ├── transcript.srt # Timestamped subtitles
143
+ ├── transcript.json # Full Whisper output with segments
144
+ ├── frames/ # Key frames every 2 seconds
145
+ │ ├── frame_0m00s.png
146
+ │ ├── frame_0m02s.png
147
+ │ ├── frame_0m04s.png
148
+ │ └── ...
149
+ └── guide.md # Visual guide with embedded frames (if AI enabled)
150
+ ```
151
+
152
+ The `guide.md` uses relative image paths — open it in any markdown viewer (VS Code, GitHub, Obsidian) and the images render inline.
153
+
154
+ ## How It Works
155
+
156
+ ```
157
+ ┌─────────────┐
158
+ │ Video URL │──→ yt-dlp download
159
+ │ or local │
160
+ └──────┬───────┘
161
+
162
+
163
+ ┌──────────────┐ ┌──────────────────┐
164
+ │ ffmpeg │────→│ audio.wav │──→ Whisper ──→ transcript.*
165
+ │ (parallel) │ │ (16kHz mono) │
166
+ │ │────→│ frames/ │──→ Key frame selection
167
+ │ │ │ (every 2 sec) │ (pixel diff filtering)
168
+ └──────────────┘ └──────────────────┘
169
+
170
+
171
+ ┌──────────────────┐
172
+ │ AI Analysis │ Claude API, OpenAI API,
173
+ │ (optional) │ or Claude Code (free)
174
+ └────────┬─────────┘
175
+
176
+
177
+ ┌──────────────────┐
178
+ │ guide.md │ Structured markdown with
179
+ │ │ embedded frame images
180
+ └──────────────────┘
181
+ ```
182
+
183
+ **Smart frame selection:** Not every frame matters. vidwise compares consecutive frames using pixel-difference analysis and only keeps frames where the visual content actually changed. A 10-minute video might have 300 raw frames but only ~40 meaningful ones.
184
+
185
+ ## Claude Code Plugin
186
+
187
+ If you use [Claude Code](https://docs.anthropic.com/en/docs/claude-code), install vidwise as a plugin for **AI-powered guide generation without needing an API key** — Claude Code's native multimodal AI handles the analysis:
188
+
189
+ ```bash
190
+ # Add the vidwise marketplace and install the plugin
191
+ /plugin marketplace add jpdjere/vidwise
192
+ /plugin install vidwise@vidwise
193
+
194
+ # Then use it:
195
+ /vidwise:vidwise recording.mp4
196
+ /vidwise:vidwise https://loom.com/share/abc123
197
+ ```
198
+
199
+ For local development or testing, you can also load directly:
200
+
201
+ ```bash
202
+ claude --plugin-dir /path/to/vidwise/plugin
203
+ ```
204
+
205
+ The plugin runs `vidwise --no-guide` for extraction, then uses Claude Code's built-in vision capabilities to analyze frames in parallel — completely free, no API key needed.
206
+
207
+ ## Whisper Model Sizes
208
+
209
+ | Model | Speed | Quality | Best For |
210
+ |-------|-------|---------|----------|
211
+ | `tiny` | ~1 min/min | Basic | Quick tests, long videos |
212
+ | `base` | ~2 min/min | Good | Short videos |
213
+ | `small` | ~4 min/min | Better | Videos >30 min |
214
+ | `medium` | ~8 min/min | Recommended | Default for most content |
215
+ | `large` | ~16 min/min | Best | When accuracy is critical |
216
+
217
+ *Speed estimates on Apple M-series. First run downloads model weights (one-time).*
218
+
219
+ ## Contributing
220
+
221
+ Contributions are welcome! Please open an issue first to discuss what you'd like to change.
222
+
223
+ ```bash
224
+ # Development setup
225
+ git clone https://github.com/jpdjere/vidwise
226
+ cd vidwise
227
+ python -m venv .venv
228
+ source .venv/bin/activate
229
+ pip install -e ".[dev]"
230
+
231
+ # Run tests
232
+ pytest
233
+
234
+ # Lint
235
+ ruff check src/
236
+ ```
237
+
238
+ ## License
239
+
240
+ [MIT](LICENSE)
@@ -0,0 +1,205 @@
1
+ <p align="center">
2
+ <img src="assets/banner.png" alt="vidwise — LLMs can't watch videos. vidwise gives them eyes." width="700">
3
+ </p>
4
+
5
+ <p align="center">
6
+ <a href="https://pypi.org/project/vidwise/"><img src="https://img.shields.io/pypi/v/vidwise?color=blue" alt="PyPI"></a>
7
+ <a href="https://pypi.org/project/vidwise/"><img src="https://img.shields.io/pypi/pyversions/vidwise" alt="Python"></a>
8
+ <a href="https://github.com/jpdjere/vidwise/blob/main/LICENSE"><img src="https://img.shields.io/github/license/jpdjere/vidwise" alt="License"></a>
9
+ <a href="https://github.com/jpdjere/vidwise/actions/workflows/ci.yml"><img src="https://github.com/jpdjere/vidwise/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
10
+ </p>
11
+
12
+ ---
13
+
14
+ Videos are the biggest blind spot for AI. A 5-minute Loom bug report, a 30-minute tutorial, a conference talk — all completely opaque to your LLM. You either watch the whole thing yourself or lose the knowledge.
15
+
16
+ **vidwise** extracts the visual and audio knowledge from any video into structured, LLM-consumable markdown. Feed the output to any LLM and it instantly "understands" the video.
17
+
18
+ ```
19
+ Video ─→ vidwise ─→ Transcript + Key Frames + Visual Guide ─→ LLM Context
20
+ ```
21
+
22
+ ## What can you do with it?
23
+
24
+ | Scenario | What happens |
25
+ |----------|-------------|
26
+ | **Debug a Loom bug report** | Feed the output to Claude → it "sees" the bug, the UI state, the error messages |
27
+ | **Absorb a tutorial** | 30-min coding video → structured knowledge your LLM can answer questions about |
28
+ | **Process a meeting** | Extract decisions, action items, and what was on screen |
29
+ | **Learn from a talk** | Turn any conference presentation into searchable, queryable knowledge |
30
+ | **Onboard faster** | Training videos become AI-queryable — new hires get instant answers |
31
+
32
+ ## Why vidwise?
33
+
34
+ | | |
35
+ |---|---|
36
+ | **See the whole picture** | Most tools only extract audio. vidwise captures both what was *said* and what was *shown* — UI states, error messages, slides, code, diagrams. |
37
+ | **Process once, query forever** | The output is a self-contained artifact. Feed it to any LLM, any number of times, at zero additional cost. No re-uploading, no re-processing. |
38
+ | **Works with any LLM** | Standard markdown + images. Claude, GPT, Gemini, Llama, Mistral — whatever you use. No vendor lock-in. |
39
+ | **Your video stays local** | Whisper and ffmpeg run on your machine. Nothing leaves your computer unless you opt into AI guide generation. |
40
+ | **Smart, not brute-force** | Pixel-difference analysis keeps only frames where the visual content actually changed. Less noise, better LLM understanding. |
41
+ | **Human-readable AND machine-readable** | The output isn't just for LLMs — `guide.md` is a visual walkthrough you can read, share, and bookmark. One command, two audiences. |
42
+ | **One command** | `vidwise recording.mp4` → transcript, key frames, and visual guide in a single portable directory. |
43
+
44
+ > **Not just for LLMs.** The visual guide vidwise generates is a fully readable document with embedded screenshots — open it in VS Code, Obsidian, or GitHub and you have a skimmable walkthrough of the entire video. Share it with your team, bookmark it for later, or feed it to any LLM. One artifact, two audiences.
45
+
46
+ ## Quick Start
47
+
48
+ ```bash
49
+ # Install
50
+ pip install vidwise
51
+
52
+ # Process a local video
53
+ vidwise recording.mp4
54
+
55
+ # Process a YouTube video
56
+ vidwise https://youtube.com/watch?v=abc
57
+
58
+ # With AI-powered visual guide
59
+ export ANTHROPIC_API_KEY=sk-... # or OPENAI_API_KEY
60
+ vidwise recording.mp4 --provider claude
61
+ ```
62
+
63
+ ### Prerequisites
64
+
65
+ - **Python 3.10+**
66
+ - **ffmpeg** — `brew install ffmpeg` (macOS) or `apt install ffmpeg` (Linux)
67
+
68
+ > **Lighter install?** `pip install "vidwise[fast]"` uses faster-whisper (~200MB) instead of openai-whisper (~2GB). 3-4x faster transcription, but without Apple Metal GPU support. vidwise auto-detects which backend is installed.
69
+
70
+ ## Usage
71
+
72
+ ```bash
73
+ vidwise <source> [options]
74
+ ```
75
+
76
+ | Option | Default | Description |
77
+ |--------|---------|-------------|
78
+ | `--model`, `-m` | `medium` | Whisper model: `tiny`, `base`, `small`, `medium`, `large` |
79
+ | `--output-dir`, `-o` | auto | Output directory path |
80
+ | `--no-guide` | off | Skip AI guide generation |
81
+ | `--provider`, `-p` | `auto` | AI provider: `auto`, `claude`, `openai` |
82
+ | `--frame-interval` | `2` | Seconds between frame captures |
83
+ | `--frame-threshold` | `0.05` | Pixel diff threshold for key frame selection |
84
+
85
+ ### Examples
86
+
87
+ ```bash
88
+ # Fast transcription of a short video
89
+ vidwise demo.mp4 --model tiny --no-guide
90
+
91
+ # YouTube tutorial with Claude-powered guide
92
+ vidwise https://youtube.com/watch?v=abc --model small --provider claude
93
+
94
+ # Loom bug report — default settings
95
+ vidwise https://loom.com/share/abc123def
96
+ ```
97
+
98
+ ## Output
99
+
100
+ vidwise creates a single self-contained directory:
101
+
102
+ ```
103
+ vidwise-abc123-2026-02-26/
104
+ ├── video.mp4 # Source video
105
+ ├── audio.wav # Extracted audio (16kHz mono)
106
+ ├── transcript.txt # Plain text transcript
107
+ ├── transcript.srt # Timestamped subtitles
108
+ ├── transcript.json # Full Whisper output with segments
109
+ ├── frames/ # Key frames every 2 seconds
110
+ │ ├── frame_0m00s.png
111
+ │ ├── frame_0m02s.png
112
+ │ ├── frame_0m04s.png
113
+ │ └── ...
114
+ └── guide.md # Visual guide with embedded frames (if AI enabled)
115
+ ```
116
+
117
+ The `guide.md` uses relative image paths — open it in any markdown viewer (VS Code, GitHub, Obsidian) and the images render inline.
118
+
119
+ ## How It Works
120
+
121
+ ```
122
+ ┌─────────────┐
123
+ │ Video URL │──→ yt-dlp download
124
+ │ or local │
125
+ └──────┬───────┘
126
+
127
+
128
+ ┌──────────────┐ ┌──────────────────┐
129
+ │ ffmpeg │────→│ audio.wav │──→ Whisper ──→ transcript.*
130
+ │ (parallel) │ │ (16kHz mono) │
131
+ │ │────→│ frames/ │──→ Key frame selection
132
+ │ │ │ (every 2 sec) │ (pixel diff filtering)
133
+ └──────────────┘ └──────────────────┘
134
+
135
+
136
+ ┌──────────────────┐
137
+ │ AI Analysis │ Claude API, OpenAI API,
138
+ │ (optional) │ or Claude Code (free)
139
+ └────────┬─────────┘
140
+
141
+
142
+ ┌──────────────────┐
143
+ │ guide.md │ Structured markdown with
144
+ │ │ embedded frame images
145
+ └──────────────────┘
146
+ ```
147
+
148
+ **Smart frame selection:** Not every frame matters. vidwise compares consecutive frames using pixel-difference analysis and only keeps frames where the visual content actually changed. A 10-minute video might have 300 raw frames but only ~40 meaningful ones.
149
+
150
+ ## Claude Code Plugin
151
+
152
+ If you use [Claude Code](https://docs.anthropic.com/en/docs/claude-code), install vidwise as a plugin for **AI-powered guide generation without needing an API key** — Claude Code's native multimodal AI handles the analysis:
153
+
154
+ ```bash
155
+ # Add the vidwise marketplace and install the plugin
156
+ /plugin marketplace add jpdjere/vidwise
157
+ /plugin install vidwise@vidwise
158
+
159
+ # Then use it:
160
+ /vidwise:vidwise recording.mp4
161
+ /vidwise:vidwise https://loom.com/share/abc123
162
+ ```
163
+
164
+ For local development or testing, you can also load directly:
165
+
166
+ ```bash
167
+ claude --plugin-dir /path/to/vidwise/plugin
168
+ ```
169
+
170
+ The plugin runs `vidwise --no-guide` for extraction, then uses Claude Code's built-in vision capabilities to analyze frames in parallel — completely free, no API key needed.
171
+
172
+ ## Whisper Model Sizes
173
+
174
+ | Model | Speed | Quality | Best For |
175
+ |-------|-------|---------|----------|
176
+ | `tiny` | ~1 min/min | Basic | Quick tests, long videos |
177
+ | `base` | ~2 min/min | Good | Short videos |
178
+ | `small` | ~4 min/min | Better | Videos >30 min |
179
+ | `medium` | ~8 min/min | Recommended | Default for most content |
180
+ | `large` | ~16 min/min | Best | When accuracy is critical |
181
+
182
+ *Speed estimates on Apple M-series. First run downloads model weights (one-time).*
183
+
184
+ ## Contributing
185
+
186
+ Contributions are welcome! Please open an issue first to discuss what you'd like to change.
187
+
188
+ ```bash
189
+ # Development setup
190
+ git clone https://github.com/jpdjere/vidwise
191
+ cd vidwise
192
+ python -m venv .venv
193
+ source .venv/bin/activate
194
+ pip install -e ".[dev]"
195
+
196
+ # Run tests
197
+ pytest
198
+
199
+ # Lint
200
+ ruff check src/
201
+ ```
202
+
203
+ ## License
204
+
205
+ [MIT](LICENSE)
Binary file
@@ -0,0 +1,63 @@
1
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 900 320" fill="none">
2
+ <defs>
3
+ <linearGradient id="g1" x1="0%" y1="0%" x2="100%" y2="100%">
4
+ <stop offset="0%" style="stop-color:#6366f1" />
5
+ <stop offset="100%" style="stop-color:#8b5cf6" />
6
+ </linearGradient>
7
+ <linearGradient id="g2" x1="0%" y1="0%" x2="100%" y2="100%">
8
+ <stop offset="0%" style="stop-color:#8b5cf6" />
9
+ <stop offset="100%" style="stop-color:#a78bfa" />
10
+ </linearGradient>
11
+ <linearGradient id="g3" x1="0%" y1="0%" x2="100%" y2="0%">
12
+ <stop offset="0%" style="stop-color:#6366f1" />
13
+ <stop offset="50%" style="stop-color:#8b5cf6" />
14
+ <stop offset="100%" style="stop-color:#a78bfa" />
15
+ </linearGradient>
16
+ </defs>
17
+
18
+ <!-- Centered icon group -->
19
+ <g transform="translate(290, 30)">
20
+ <!-- Video frame -->
21
+ <rect x="0" y="0" width="140" height="140" rx="20" fill="url(#g1)" />
22
+
23
+ <!-- Film strip notches -->
24
+ <rect x="8" y="8" width="12" height="8" rx="2" fill="white" opacity="0.3" />
25
+ <rect x="28" y="8" width="12" height="8" rx="2" fill="white" opacity="0.3" />
26
+ <rect x="48" y="8" width="12" height="8" rx="2" fill="white" opacity="0.3" />
27
+ <rect x="8" y="124" width="12" height="8" rx="2" fill="white" opacity="0.3" />
28
+ <rect x="28" y="124" width="12" height="8" rx="2" fill="white" opacity="0.3" />
29
+ <rect x="48" y="124" width="12" height="8" rx="2" fill="white" opacity="0.3" />
30
+
31
+ <!-- Play triangle -->
32
+ <polygon points="52,38 100,70 52,102" fill="white" opacity="0.95" />
33
+
34
+ <!-- Eye overlapping the video frame -->
35
+ <g transform="translate(85, 32)">
36
+ <path d="M0,38 Q55,-15 110,38 Q55,91 0,38 Z" fill="url(#g2)" />
37
+ <circle cx="55" cy="38" r="20" fill="white" />
38
+ <circle cx="55" cy="38" r="10" fill="#4f46e5" />
39
+ <circle cx="51" cy="33" r="3.5" fill="white" />
40
+ </g>
41
+
42
+ <!-- Subtle sparkle / AI indicator -->
43
+ <g transform="translate(178, 12)" fill="#a78bfa">
44
+ <path d="M8,0 L10,6 L16,8 L10,10 L8,16 L6,10 L0,8 L6,6 Z" opacity="0.7" />
45
+ </g>
46
+ <g transform="translate(200, 28)" fill="#c4b5fd">
47
+ <path d="M5,0 L6.2,3.8 L10,5 L6.2,6.2 L5,10 L3.8,6.2 L0,5 L3.8,3.8 Z" opacity="0.5" />
48
+ </g>
49
+ </g>
50
+
51
+ <!-- Title -->
52
+ <text x="450" y="230" text-anchor="middle" font-family="system-ui, -apple-system, 'Segoe UI', Roboto, sans-serif" font-size="72" font-weight="800" fill="#1e1b4b" letter-spacing="-2">
53
+ vid<tspan fill="url(#g1)">wise</tspan>
54
+ </text>
55
+
56
+ <!-- Tagline -->
57
+ <text x="450" y="270" text-anchor="middle" font-family="system-ui, -apple-system, 'Segoe UI', Roboto, sans-serif" font-size="21" fill="#6b7280" font-weight="500" letter-spacing="0.3">
58
+ LLMs can't watch videos. vidwise gives them eyes.
59
+ </text>
60
+
61
+ <!-- Accent line -->
62
+ <rect x="350" y="285" width="200" height="3" rx="1.5" fill="url(#g3)" opacity="0.4" />
63
+ </svg>