supervoxtral 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,100 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Python
3
+ # -----------------------------------------------------------------------------
4
+ __pycache__/
5
+ *.py[cod]
6
+ *$py.class
7
+
8
+ # C extensions
9
+ *.so
10
+
11
+ # Virtual environments
12
+ .venv/
13
+ venv/
14
+ env/
15
+ ENV/
16
+
17
+ # Distribution / packaging
18
+ .Python
19
+ build/
20
+ dist/
21
+ develop-eggs/
22
+ downloads/
23
+ eggs/
24
+ .eggs/
25
+ lib/
26
+ lib64/
27
+ parts/
28
+ sdist/
29
+ wheels/
30
+ share/python-wheels/
31
+ *.egg-info/
32
+ .installed.cfg
33
+ *.egg
34
+ MANIFEST
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ .pytest_cache/
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+
53
+ # Type checkers / linters
54
+ .mypy_cache/
55
+ .pyre/
56
+ .pytype/
57
+ .ruff_cache/
58
+
59
+ # -----------------------------------------------------------------------------
60
+ # Project artifacts
61
+ # -----------------------------------------------------------------------------
62
+ # Logs
63
+ logs/
64
+ !logs/.gitkeep
65
+
66
+ # Audio recordings
67
+ recordings/
68
+ !recordings/.gitkeep
69
+
70
+ # Transcripts (API responses)
71
+ transcripts/
72
+ !transcripts/.gitkeep
73
+
74
+ # Prompts
75
+ prompt/
76
+ !prompt/.gitkeep
77
+
78
+ # Local environment variables (use .env.example for template)
79
+ .env
80
+ .env.*
81
+ !.env.example
82
+
83
+ # OS / Editor cruft
84
+ .DS_Store
85
+ .AppleDouble
86
+ .LSOverride
87
+ Icon?
88
+ .Spotlight-V100
89
+ .Trashes
90
+
91
+ # IDEs
92
+ .vscode/
93
+ .idea/
94
+
95
+ # Misc
96
+ *.log
97
+ *.tmp
98
+ *.swp
99
+ *.swo
100
+ .python-version
@@ -0,0 +1,88 @@
1
+ # SuperVoxtral — Agent Guide
2
+
3
+ ## Project overview
4
+ Python CLI/GUI for audio recording + transcription via APIs (Mistral Voxtral). MVP: manual stop, API-based, zero-footprint defaults (temp files, no persistent dirs unless overridden), results in `transcripts/` when persisted.
5
+
6
+ ### Project structure
7
+ ```
8
+ supervoxtral/
9
+ ├── svx/ # Python package
10
+ │ ├── __init__.py
11
+ │ ├── cli.py # Typer CLI entrypoint (orchestration only, uses Config and Pipeline)
12
+ │ ├── core/ # Core logic (audio, config, prompts, storage)
13
+ │ │ ├── audio.py # Recording, ffmpeg detection/conversion
14
+ │ │ ├── config.py # Structured Config dataclasses, loading, resolution, logging setup
15
+ │ │ ├── pipeline.py # Centralized RecordingPipeline for CLI/GUI unification
16
+ │ │ ├── prompt.py # Prompt resolution (via Config)
17
+ │ │ └── storage.py # Save transcripts and raw JSON (conditional on keep_transcript_files)
18
+ │ ├── providers/ # API integrations
19
+ │ │ ├── __init__.py # Provider registry (get_provider with Config support)
20
+ │ │ ├── base.py # Provider protocol + shared types
21
+ │ │ └── mistral.py # Mistral Voxtral implementation (init from Config)
22
+ │ └── ui/ # GUI (Qt-based MVP)
23
+ │ └── qt_app.py # RecorderWindow/Worker using Pipeline and Config
24
+
25
+ ├── recordings/ # Audio files (WAV/MP3/Opus) (conditional)
26
+ ├── transcripts/ # API responses (txt/json) (conditional)
27
+ ├── logs/ # Application logs (conditional)
28
+ ├── pyproject.toml # Project metadata & deps
29
+ ├── .env.example # Template for secrets (unused; keys in config.toml)
30
+ └── README.md # User guide
31
+ ```
32
+
33
+ ## Typical Execution Flow
34
+
35
+ - **Entry**: `svx/cli.py` Typer `record` command parses args (e.g., --prompt, --save-all, --gui, --transcribe).
36
+ - **Config & Prompt**: Load `Config` via `Config.load()` (`core/config.py`); if transcribe_mode, skip prompt resolution; else resolve prompt with `cfg.resolve_prompt()` (`core/prompt.py`).
37
+ - **Pipeline**: Run `RecordingPipeline` (`core/pipeline.py`): record WAV/stop (`core/audio.py`), optional conversion (ffmpeg), get provider/init (`providers/__init__.py`, e.g., `mistral.py` from `cfg`); if transcribe_mode: no prompt, model override to voxtral-mini-latest (with warning if changed), pass transcribe_mode to provider.transcribe; transcribe, conditional save (`core/storage.py` based on `keep_*`/`save_all`), clipboard copy, logging setup.
38
+ - **Cleanup**: Temp files auto-deleted (tempfile) if `keep_*=false`; dirs created only if persistence enabled.
39
+ - **End**: Return `{"text": str, "raw": dict, "duration": float, "paths": dict}`; CLI prints result, GUI emits progress/updates via callback.
40
+
41
+ ## Build & test
42
+ ```bash
43
+ # Setup
44
+ uv pip install -e .
45
+
46
+ # Lint & format
47
+ black svx/
48
+ ruff check svx/
49
+
50
+ # Diagnostics (post-edits)
51
+ # Use `diagnostics` tool or run locally to check errors/warnings in pipeline.py, config.py, etc.
52
+ basedpyright svx
53
+
54
+ # Run
55
+ # Initialize user config (generates config.toml with zero-footprint defaults)
56
+ svx config init
57
+
58
+ # Record (provider/format configured in config.toml; tests zero-footprint)
59
+ svx record --prompt "What's in this file?"
60
+
61
+ # Test persistence
62
+ svx record --save-all --prompt "Test persistence"
63
+
64
+ # Test GUI
65
+ svx record --gui
66
+ ```
67
+
68
+ ## Maintenance
69
+
70
+ - use `uv` to install dependancies if needed
71
+ - update `pyproject.toml` then run uv `pip install -e .`
72
+ - When adding modules: Propagate Config instance; use RecordingPipeline for recording flows; handle temp files via keep_* flags.
73
+ - Test temp cleanup: Verify no leftovers in default mode (keep_*=false).
74
+
75
+
76
+ ## Code style
77
+ - **Imports**: `from __future__ import annotations` first, then stdlib, third-party, local
78
+ - **Formatting**: Black (100 line length), ruff for linting (E, F, I, UP rules)
79
+ - **Types**: Full type hints required, use `TypedDict` for data structures, `Protocol` for interfaces (e.g., Provider protocol, Config dataclasses with type hints)
80
+ - **Naming**: snake_case for functions/variables, PascalCase for classes, UPPER_CASE for constants
81
+ - **Error handling**: Custom exceptions inherit from standard types, use `ProviderError` for API failures
82
+ - **Docstrings**: Google-style with clear purpose/dependencies/`__all__` exports
83
+
84
+ ## Security
85
+ - API keys are configured in the user config file (`config.toml`), under provider-specific sections.
86
+ - Mistral: define `[providers.mistral].api_key`
87
+ - Environment variables are not used for API keys.
88
+ - Validate user inputs (e.g., paths in Config, prompt resolution).
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 vlebert
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,23 @@
1
+ Metadata-Version: 2.4
2
+ Name: supervoxtral
3
+ Version: 0.1.0
4
+ Summary: CLI/GUI audio recorder and transcription client using Mistral Voxtral (chat with audio and transcription).
5
+ License: MIT
6
+ License-File: LICENSE
7
+ Keywords: audio,cli,gui,mistral,transcription,voxtral,whisper
8
+ Requires-Python: >=3.11
9
+ Requires-Dist: mistralai
10
+ Requires-Dist: pyperclip
11
+ Requires-Dist: python-dotenv
12
+ Requires-Dist: rich
13
+ Requires-Dist: sounddevice
14
+ Requires-Dist: soundfile
15
+ Requires-Dist: typer
16
+ Provides-Extra: dev
17
+ Requires-Dist: black; extra == 'dev'
18
+ Requires-Dist: mypy; extra == 'dev'
19
+ Requires-Dist: pytest; extra == 'dev'
20
+ Requires-Dist: ruff; extra == 'dev'
21
+ Requires-Dist: types-python-dotenv; extra == 'dev'
22
+ Provides-Extra: gui
23
+ Requires-Dist: pyside6-essentials; extra == 'gui'
@@ -0,0 +1,237 @@
1
+ # supervoxtral
2
+
3
+ A simple Python CLI/GUI tool to record audio from your microphone, optionally convert it (WAV/MP3/Opus), and send it to Mistral Voxtral transcription/chat APIs.
4
+
5
+ ---
6
+
7
+ ## Requirements
8
+
9
+ - Python 3.11+
10
+ - ffmpeg (for MP3/Opus conversions)
11
+ - macOS: `brew install ffmpeg`
12
+ - Ubuntu/Debian: `sudo apt-get install ffmpeg`
13
+ - Windows: https://ffmpeg.org/download.html
14
+
15
+ ---
16
+
17
+ ## Installation
18
+
19
+ 1) Create and activate a virtual environment (example with venv):
20
+
21
+ - macOS/Linux:
22
+ ```
23
+ python -m venv .venv
24
+ source .venv/bin/activate
25
+ ```
26
+
27
+ - Windows (PowerShell):
28
+ ```
29
+ python -m venv .venv
30
+ .\.venv\Scripts\Activate.ps1
31
+ ```
32
+
33
+ 2) Install the package (editable mode during development is convenient):
34
+ ```
35
+ pip install -e .
36
+ ```
37
+
38
+ Optional extras:
39
+ - Dev tools:
40
+ ```
41
+ pip install -e ".[dev]"
42
+ ```
43
+
44
+ ---
45
+
46
+ ## Configuration (API keys and prompts)
47
+
48
+ API keys and default behavior are configured only in your user configuration file (config.toml), not via environment variables.
49
+
50
+ - Location of the user config:
51
+ - macOS: ~/Library/Application Support/SuperVoxtral/config.toml
52
+ - Linux: ${XDG_CONFIG_HOME:-~/.config}/supervoxtral/config.toml
53
+ - Windows: %APPDATA%/SuperVoxtral/config.toml
54
+
55
+ - Initialize your user config and user prompt file:
56
+
57
+ ```
58
+ svx config init
59
+ ```
60
+
61
+ This creates:
62
+
63
+ - config.toml (with sensible defaults, including zero-footprint mode)
64
+ - a user prompt file at: ~/Library/Application Support/SuperVoxtral/prompt/user.md (macOS)
65
+ - Linux: ${XDG_CONFIG_HOME:-~/.config}/supervoxtral/prompt/user.md
66
+ - Windows: %APPDATA%/SuperVoxtral/prompt/user.md
67
+
68
+ **Key config sections (edit `config.toml`):**
69
+ - **[defaults]**: provider (e.g., "mistral"), model, format (e.g., "opus"), language, rate, channels, device, copy (clipboard), keep_audio_files = false, keep_transcript_files = false, keep_log_files = false.
70
+ - Zero-footprint mode (defaults): When `keep_* = false`, files are handled in OS temporary directories (auto-cleaned, no project dirs created). Set to `true` for persistence (creates `recordings/`, etc.).
71
+ - **[providers.mistral]**: api_key = "your_mistral_key_here", model (e.g., "voxtral-small-latest").
72
+ - **[prompt]**: text (inline prompt), file (path to prompt.md).
73
+ - Resolution priority: CLI `--prompt`/`--prompt-file` > config.toml [prompt] > user.md fallback > "What's in this audio?".
74
+
75
+ **Configuration is centralized via a structured `Config` object loaded from your user configuration file (`config.toml`). CLI arguments override select values (e.g., prompt, log level), but most defaults (provider, model, keep flags) come from `config.toml`. No environment variables are used for API keys or settings.**
76
+
77
+ No `.env` or shell environment variables are used for API keys.
78
+
79
+
80
+ ---
81
+
82
+ ## Usage (CLI)
83
+
84
+ Make sure your virtual environment is activated and the project is installed (`pip install -e .`).
85
+
86
+ General command form:
87
+ ```
88
+ svx record [OPTIONS]
89
+ ```
90
+
91
+ **Unified entrypoint**: `svx record` handles both CLI and GUI modes via a centralized pipeline (`svx.core.pipeline.RecordingPipeline`). This ensures consistent behavior for recording, conversion, transcription, saving, clipboard copy, and logging across CLI and GUI.
92
+
93
+ **Zero-footprint defaults**: No directories created; outputs to console/clipboard. Use `--save-all` or config `keep_* = true` for persistence.
94
+
95
+ Note: the CLI now exposes a single recording entrypoint. Use `svx record --gui` to launch the GUI frontend. Most defaults (provider, format, model, language, rate, channels, device, keep_audio_files, copy) are configured via your user config (config.toml). The CLI only supports one-off overrides for: --prompt/--prompt-file, --log-level, --outfile-prefix, --gui, --save-all, --transcribe.
96
+
97
+ Planned MVP commands:
98
+
99
+ - Record with Mistral Voxtral (chat with audio) and a prompt (provider/format from config):
100
+ ```
101
+ svx record --prompt "What's in this file?"
102
+ ```
103
+ Tip: Outputs to console and clipboard (if copy=true in config). No files saved unless overridden.
104
+
105
+ Persist all outputs (one-off override):
106
+ ```
107
+ svx record --save-all --prompt "What's in this file?"
108
+ ```
109
+ Creates `recordings/`, `transcripts/`, `logs/` and saves files/logs.
110
+
111
+ - Pure transcription mode with Mistral Voxtral (no prompt, dedicated endpoint):
112
+ ```
113
+ svx record --transcribe
114
+ ```
115
+ Note: Prompts are ignored in this mode. Combine with --save-all for persistence:
116
+ ```
117
+ svx record --transcribe --save-all
118
+ ```
119
+
120
+ To start the GUI frontend:
121
+ ```
122
+ svx record --gui
123
+ ```
124
+ The GUI uses the same pipeline and respects config + CLI overrides (e.g., `--gui --save-all` propagates persistence).
125
+
126
+ The CLI defaults have been unified to favour the previous GUI defaults (e.g. `--format opus`, `--copy` enabled, and `--no-keep-audio-files` by default). The final effective values still respect the precedence: CLI explicit > user config defaults (config.toml) > built-in defaults.
127
+
128
+ ### Advanced prompt management
129
+
130
+ You can provide a user prompt, either inline or via a file:
131
+
132
+ #### User prompt (inline)
133
+ ```
134
+ svx record --user-prompt "Transcris puis résume ce qui est dit dans l'audio."
135
+ ```
136
+
137
+ #### User prompt from file
138
+ ```
139
+ svx record --user-prompt-file ~/Library/Application\ Support/SuperVoxtral/prompt/user.md
140
+ ```
141
+ (Adjust the path for your OS; see “Configuration” for locations.)
142
+
143
+ #### Resolution priority (no concatenation)
144
+ Order of precedence for determining the final prompt:
145
+ 1) `--user-prompt` (inline)
146
+ 2) `--user-prompt-file` (explicit file)
147
+ 3) `config.toml` → `[prompt].text`
148
+ 4) `config.toml` → `[prompt].file`
149
+ 5) User prompt file in your user config dir (`.../SuperVoxtral/prompt/user.md`)
150
+ 6) Default fallback: "What's in this audio?"
151
+
152
+ Note: the file and inline prompts are not concatenated; the first non-empty source wins. Uses `Config.resolve_prompt()` for unified resolution across CLI/GUI.
153
+
154
+ If no user prompt is provided (by any of the above), it defaults to "What's in this audio?".
155
+
156
+ A single user message is sent containing the audio and (optionally) text.
157
+
158
+ Flow:
159
+ - Starts recording WAV immediately.
160
+ - Press Enter to stop recording.
161
+ - Converts WAV to MP3 (if `--format mp3`) or Opus (if `--format opus`).
162
+ - Sends the audio to Mistral Voxtral as base64 input_audio plus your text prompt.
163
+ - Prints and saves the response to `transcripts/` (if keep_transcript_files=true or --save-all).
164
+
165
+ Flow:
166
+ - Starts recording WAV.
167
+ - Press Enter to stop.
168
+ - Sends the audio to Voxtral (transcription).
169
+ - Prints and saves the transcript.
170
+
171
+ Config-driven options (set these in config.toml under [defaults]):
172
+ - rate, channels, device
173
+ - provider, model, format, language
174
+ - keep_audio_files, copy
175
+
176
+ One-off CLI overrides:
177
+ - `--outfile-prefix mynote_2025-09-09` (custom file prefix)
178
+ - `--log-level debug` (verbose logs)
179
+ - `--user-prompt` (alias: `--prompt`; user prompt text, inline)
180
+ - `--user-prompt-file` (alias: `--prompt-file`; path to user prompt markdown file in your user config dir)
181
+ - `--transcribe` (pure transcription mode, ignores prompts)
182
+
183
+ Alternative invocation (without console script):
184
+ ```
185
+ python -m svx.cli record --prompt "..."
186
+ ```
187
+
188
+ ---
189
+
190
+ ## Provider details
191
+
192
+ ### Mistral Voxtral (chat with audio)
193
+ - Model: `voxtral-small-latest` by default (configurable)
194
+ - API: `mistralai` Python client
195
+ - Request structure:
196
+ - Messages with `content` array containing:
197
+ - `{ "type": "input_audio", "input_audio": "<base64>" }`
198
+ - `{ "type": "text", "text": "<prompt>" }`
199
+ - Output: text content from the chat response; saved to `transcripts/`.
200
+
201
+ Recommended formats:
202
+ - Opus reduces file size and upload time.
203
+
204
+ Authentication:
205
+ - Mistral: key read from `Config` (user config at `providers.mistral.api_key`).
206
+
207
+
208
+ ---
209
+
210
+ ## Recording formats and conversion
211
+
212
+ - Recording happens in WAV (PCM 16-bit, mono, 16k/32k Hz).
213
+ - Optional conversion via ffmpeg:
214
+ - WAV -> MP3:
215
+ ```
216
+ ffmpeg -y -i input.wav -codec:a libmp3lame -q:a 3 output.mp3
217
+ ```
218
+ - WAV -> Opus:
219
+ ```
220
+ ffmpeg -y -i input.wav -c:a libopus -b:a 24k output.opus
221
+ ```
222
+
223
+ The tool will send the converted file if you set `--format mp3` or `--format opus`; otherwise it sends the raw WAV.
224
+
225
+ ---
226
+
227
+ ## macOS notes
228
+
229
+ - Microphone permission: on first run, macOS will ask for microphone access. Approve it in System Settings > Privacy & Security > Microphone if needed.
230
+ - If you face issues with device selection, we will add a `--device` flag to choose a specific input device.
231
+
232
+
233
+ ---
234
+
235
+ ## License
236
+
237
+ MIT
@@ -0,0 +1,3 @@
1
+ # This file ensures the 'logs' directory is tracked by version control.
2
+ # Log files produced by the application will be stored here.
3
+ # Safe to keep empty; do not remove if you want the directory in the repo.
@@ -0,0 +1,8 @@
1
+ todo
2
+
3
+ - localisation reccording dans config
4
+ - paste directement ?
5
+ - nettoyer xml réponse (option)
6
+ - post prompt
7
+ - dépendance ffmpeg
8
+ - fichier ? ou sont il meme si suppr ? possible record en opus direct ?
File without changes
@@ -0,0 +1,57 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "supervoxtral"
7
+ version = "0.1.0"
8
+ description = "CLI/GUI audio recorder and transcription client using Mistral Voxtral (chat with audio and transcription)."
9
+ requires-python = ">=3.11"
10
+ license = { text = "MIT" }
11
+ keywords = [
12
+ "transcription",
13
+ "audio",
14
+ "mistral",
15
+ "voxtral",
16
+ "whisper",
17
+ "cli",
18
+ "gui",
19
+ ]
20
+ dependencies = [
21
+ "typer",
22
+ "rich",
23
+ "sounddevice",
24
+ "soundfile",
25
+ "python-dotenv",
26
+ "pyperclip",
27
+ "mistralai",
28
+ ]
29
+
30
+ [project.optional-dependencies]
31
+ gui = ["PySide6-Essentials"]
32
+ dev = ["black", "ruff", "mypy", "types-python-dotenv", "pytest"]
33
+
34
+ [project.scripts]
35
+ svx = "svx.cli:app"
36
+
37
+ [tool.black]
38
+ line-length = 100
39
+ target-version = ["py310"]
40
+
41
+ [tool.ruff]
42
+ line-length = 100
43
+ target-version = "py310"
44
+
45
+ [tool.ruff.lint]
46
+ select = ["E", "F", "I", "UP"]
47
+ ignore = []
48
+
49
+ [tool.hatch.build.targets.wheel]
50
+ packages = ["svx"]
51
+
52
+ [tool.basedpyright]
53
+ typeCheckingMode = "standard" # "basic" | "standard" | "strict" (défaut: "standard")
54
+ # reportUnknownArgumentType = false
55
+ # reportUnknownVariableType = false
56
+ # reportUnusedCallResult = false
57
+ # reportUnannotatedClassAttribute = false
@@ -0,0 +1,3 @@
1
+ # This file ensures the 'recordings' directory is tracked by version control.
2
+ # Audio recordings (WAV/MP3/Opus) generated by the CLI will be stored here.
3
+ # Safe to keep empty; do not remove if you want the directory in the repo.
@@ -0,0 +1,28 @@
1
+ """
2
+ SuperVoxtral package.
3
+
4
+ CLI/TUI tool to record audio and send it to transcription/chat providers
5
+ (e.g., Mistral Voxtral "chat with audio").
6
+
7
+ Expose package version via __version__.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ try:
13
+ from importlib.metadata import PackageNotFoundError, version
14
+ except Exception: # pragma: no cover - very old Python fallback
15
+ # Fallback for environments that might not have importlib.metadata
16
+ # (not expected with Python 3.10+)
17
+ PackageNotFoundError = Exception # type: ignore
18
+
19
+ def version(distribution_name: str) -> str: # type: ignore
20
+ return "0.0.0"
21
+
22
+
23
+ try:
24
+ __version__ = version("supervoxtral")
25
+ except PackageNotFoundError:
26
+ __version__ = "0.0.0"
27
+
28
+ __all__ = ["__version__"]