supervoxtral 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- supervoxtral-0.1.0/.gitignore +100 -0
- supervoxtral-0.1.0/AGENTS.md +88 -0
- supervoxtral-0.1.0/LICENSE +21 -0
- supervoxtral-0.1.0/PKG-INFO +23 -0
- supervoxtral-0.1.0/README.md +237 -0
- supervoxtral-0.1.0/logs/.gitkeep +3 -0
- supervoxtral-0.1.0/notes.md +8 -0
- supervoxtral-0.1.0/prompt/.gitkeep +0 -0
- supervoxtral-0.1.0/pyproject.toml +57 -0
- supervoxtral-0.1.0/recordings/.gitkeep +3 -0
- supervoxtral-0.1.0/svx/__init__.py +28 -0
- supervoxtral-0.1.0/svx/cli.py +264 -0
- supervoxtral-0.1.0/svx/core/__init__.py +92 -0
- supervoxtral-0.1.0/svx/core/audio.py +256 -0
- supervoxtral-0.1.0/svx/core/clipboard.py +122 -0
- supervoxtral-0.1.0/svx/core/config.py +400 -0
- supervoxtral-0.1.0/svx/core/pipeline.py +260 -0
- supervoxtral-0.1.0/svx/core/prompt.py +165 -0
- supervoxtral-0.1.0/svx/core/storage.py +118 -0
- supervoxtral-0.1.0/svx/providers/__init__.py +88 -0
- supervoxtral-0.1.0/svx/providers/base.py +83 -0
- supervoxtral-0.1.0/svx/providers/mistral.py +189 -0
- supervoxtral-0.1.0/svx/ui/qt_app.py +491 -0
- supervoxtral-0.1.0/transcripts/.gitkeep +3 -0
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Python
|
|
3
|
+
# -----------------------------------------------------------------------------
|
|
4
|
+
__pycache__/
|
|
5
|
+
*.py[cod]
|
|
6
|
+
*$py.class
|
|
7
|
+
|
|
8
|
+
# C extensions
|
|
9
|
+
*.so
|
|
10
|
+
|
|
11
|
+
# Virtual environments
|
|
12
|
+
.venv/
|
|
13
|
+
venv/
|
|
14
|
+
env/
|
|
15
|
+
ENV/
|
|
16
|
+
|
|
17
|
+
# Distribution / packaging
|
|
18
|
+
.Python
|
|
19
|
+
build/
|
|
20
|
+
dist/
|
|
21
|
+
develop-eggs/
|
|
22
|
+
downloads/
|
|
23
|
+
eggs/
|
|
24
|
+
.eggs/
|
|
25
|
+
lib/
|
|
26
|
+
lib64/
|
|
27
|
+
parts/
|
|
28
|
+
sdist/
|
|
29
|
+
wheels/
|
|
30
|
+
share/python-wheels/
|
|
31
|
+
*.egg-info/
|
|
32
|
+
.installed.cfg
|
|
33
|
+
*.egg
|
|
34
|
+
MANIFEST
|
|
35
|
+
|
|
36
|
+
# Installer logs
|
|
37
|
+
pip-log.txt
|
|
38
|
+
pip-delete-this-directory.txt
|
|
39
|
+
|
|
40
|
+
# Unit test / coverage reports
|
|
41
|
+
htmlcov/
|
|
42
|
+
.tox/
|
|
43
|
+
.nox/
|
|
44
|
+
.coverage
|
|
45
|
+
.coverage.*
|
|
46
|
+
.cache
|
|
47
|
+
.pytest_cache/
|
|
48
|
+
coverage.xml
|
|
49
|
+
*.cover
|
|
50
|
+
*.py,cover
|
|
51
|
+
.hypothesis/
|
|
52
|
+
|
|
53
|
+
# Type checkers / linters
|
|
54
|
+
.mypy_cache/
|
|
55
|
+
.pyre/
|
|
56
|
+
.pytype/
|
|
57
|
+
.ruff_cache/
|
|
58
|
+
|
|
59
|
+
# -----------------------------------------------------------------------------
|
|
60
|
+
# Project artifacts
|
|
61
|
+
# -----------------------------------------------------------------------------
|
|
62
|
+
# Logs
|
|
63
|
+
logs/
|
|
64
|
+
!logs/.gitkeep
|
|
65
|
+
|
|
66
|
+
# Audio recordings
|
|
67
|
+
recordings/
|
|
68
|
+
!recordings/.gitkeep
|
|
69
|
+
|
|
70
|
+
# Transcripts (API responses)
|
|
71
|
+
transcripts/
|
|
72
|
+
!transcripts/.gitkeep
|
|
73
|
+
|
|
74
|
+
# Prompts
|
|
75
|
+
prompt/
|
|
76
|
+
!prompt/.gitkeep
|
|
77
|
+
|
|
78
|
+
# Local environment variables (use .env.example for template)
|
|
79
|
+
.env
|
|
80
|
+
.env.*
|
|
81
|
+
!.env.example
|
|
82
|
+
|
|
83
|
+
# OS / Editor cruft
|
|
84
|
+
.DS_Store
|
|
85
|
+
.AppleDouble
|
|
86
|
+
.LSOverride
|
|
87
|
+
Icon?
|
|
88
|
+
.Spotlight-V100
|
|
89
|
+
.Trashes
|
|
90
|
+
|
|
91
|
+
# IDEs
|
|
92
|
+
.vscode/
|
|
93
|
+
.idea/
|
|
94
|
+
|
|
95
|
+
# Misc
|
|
96
|
+
*.log
|
|
97
|
+
*.tmp
|
|
98
|
+
*.swp
|
|
99
|
+
*.swo
|
|
100
|
+
.python-version
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# SuperVoxtral — Agent Guide
|
|
2
|
+
|
|
3
|
+
## Project overview
|
|
4
|
+
Python CLI/GUI for audio recording + transcription via APIs (Mistral Voxtral). MVP: manual stop, API-based, zero-footprint defaults (temp files, no persistent dirs unless overridden), results in `transcripts/` when persisted.
|
|
5
|
+
|
|
6
|
+
### Project structure
|
|
7
|
+
```
|
|
8
|
+
supervoxtral/
|
|
9
|
+
├── svx/ # Python package
|
|
10
|
+
│ ├── __init__.py
|
|
11
|
+
│ ├── cli.py # Typer CLI entrypoint (orchestration only, uses Config and Pipeline)
|
|
12
|
+
│ ├── core/ # Core logic (audio, config, prompts, storage)
|
|
13
|
+
│ │ ├── audio.py # Recording, ffmpeg detection/conversion
|
|
14
|
+
│ │ ├── config.py # Structured Config dataclasses, loading, resolution, logging setup
|
|
15
|
+
│ │ ├── pipeline.py # Centralized RecordingPipeline for CLI/GUI unification
|
|
16
|
+
│ │ ├── prompt.py # Prompt resolution (via Config)
|
|
17
|
+
│ │ └── storage.py # Save transcripts and raw JSON (conditional on keep_transcript_files)
|
|
18
|
+
│ ├── providers/ # API integrations
|
|
19
|
+
│ │ ├── __init__.py # Provider registry (get_provider with Config support)
|
|
20
|
+
│ │ ├── base.py # Provider protocol + shared types
|
|
21
|
+
│ │ └── mistral.py # Mistral Voxtral implementation (init from Config)
|
|
22
|
+
│ └── ui/ # GUI (Qt-based MVP)
|
|
23
|
+
│ └── qt_app.py # RecorderWindow/Worker using Pipeline and Config
|
|
24
|
+
|
|
25
|
+
├── recordings/ # Audio files (WAV/MP3/Opus) (conditional)
|
|
26
|
+
├── transcripts/ # API responses (txt/json) (conditional)
|
|
27
|
+
├── logs/ # Application logs (conditional)
|
|
28
|
+
├── pyproject.toml # Project metadata & deps
|
|
29
|
+
├── .env.example # Template for secrets (unused; keys in config.toml)
|
|
30
|
+
└── README.md # User guide
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Typical Execution Flow
|
|
34
|
+
|
|
35
|
+
- **Entry**: `svx/cli.py` Typer `record` command parses args (e.g., --prompt, --save-all, --gui, --transcribe).
|
|
36
|
+
- **Config & Prompt**: Load `Config` via `Config.load()` (`core/config.py`); if transcribe_mode, skip prompt resolution; else resolve prompt with `cfg.resolve_prompt()` (`core/prompt.py`).
|
|
37
|
+
- **Pipeline**: Run `RecordingPipeline` (`core/pipeline.py`): record WAV/stop (`core/audio.py`), optional conversion (ffmpeg), get provider/init (`providers/__init__.py`, e.g., `mistral.py` from `cfg`); if transcribe_mode: no prompt, model override to voxtral-mini-latest (with warning if changed), pass transcribe_mode to provider.transcribe; transcribe, conditional save (`core/storage.py` based on `keep_*`/`save_all`), clipboard copy, logging setup.
|
|
38
|
+
- **Cleanup**: Temp files auto-deleted (tempfile) if `keep_*=false`; dirs created only if persistence enabled.
|
|
39
|
+
- **End**: Return `{"text": str, "raw": dict, "duration": float, "paths": dict}`; CLI prints result, GUI emits progress/updates via callback.
|
|
40
|
+
|
|
41
|
+
## Build & test
|
|
42
|
+
```bash
|
|
43
|
+
# Setup
|
|
44
|
+
uv pip install -e .
|
|
45
|
+
|
|
46
|
+
# Lint & format
|
|
47
|
+
black svx/
|
|
48
|
+
ruff check svx/
|
|
49
|
+
|
|
50
|
+
# Diagnostics (post-edits)
|
|
51
|
+
# Use `diagnostics` tool or run locally to check errors/warnings in pipeline.py, config.py, etc.
|
|
52
|
+
basedpyright svx
|
|
53
|
+
|
|
54
|
+
# Run
|
|
55
|
+
# Initialize user config (generates config.toml with zero-footprint defaults)
|
|
56
|
+
svx config init
|
|
57
|
+
|
|
58
|
+
# Record (provider/format configured in config.toml; tests zero-footprint)
|
|
59
|
+
svx record --prompt "What's in this file?"
|
|
60
|
+
|
|
61
|
+
# Test persistence
|
|
62
|
+
svx record --save-all --prompt "Test persistence"
|
|
63
|
+
|
|
64
|
+
# Test GUI
|
|
65
|
+
svx record --gui
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Maintenance
|
|
69
|
+
|
|
70
|
+
- use `uv` to install dependancies if needed
|
|
71
|
+
- update `pyproject.toml` then run uv `pip install -e .`
|
|
72
|
+
- When adding modules: Propagate Config instance; use RecordingPipeline for recording flows; handle temp files via keep_* flags.
|
|
73
|
+
- Test temp cleanup: Verify no leftovers in default mode (keep_*=false).
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
## Code style
|
|
77
|
+
- **Imports**: `from __future__ import annotations` first, then stdlib, third-party, local
|
|
78
|
+
- **Formatting**: Black (100 line length), ruff for linting (E, F, I, UP rules)
|
|
79
|
+
- **Types**: Full type hints required, use `TypedDict` for data structures, `Protocol` for interfaces (e.g., Provider protocol, Config dataclasses with type hints)
|
|
80
|
+
- **Naming**: snake_case for functions/variables, PascalCase for classes, UPPER_CASE for constants
|
|
81
|
+
- **Error handling**: Custom exceptions inherit from standard types, use `ProviderError` for API failures
|
|
82
|
+
- **Docstrings**: Google-style with clear purpose/dependencies/`__all__` exports
|
|
83
|
+
|
|
84
|
+
## Security
|
|
85
|
+
- API keys are configured in the user config file (`config.toml`), under provider-specific sections.
|
|
86
|
+
- Mistral: define `[providers.mistral].api_key`
|
|
87
|
+
- Environment variables are not used for API keys.
|
|
88
|
+
- Validate user inputs (e.g., paths in Config, prompt resolution).
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 vlebert
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: supervoxtral
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CLI/GUI audio recorder and transcription client using Mistral Voxtral (chat with audio and transcription).
|
|
5
|
+
License: MIT
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Keywords: audio,cli,gui,mistral,transcription,voxtral,whisper
|
|
8
|
+
Requires-Python: >=3.11
|
|
9
|
+
Requires-Dist: mistralai
|
|
10
|
+
Requires-Dist: pyperclip
|
|
11
|
+
Requires-Dist: python-dotenv
|
|
12
|
+
Requires-Dist: rich
|
|
13
|
+
Requires-Dist: sounddevice
|
|
14
|
+
Requires-Dist: soundfile
|
|
15
|
+
Requires-Dist: typer
|
|
16
|
+
Provides-Extra: dev
|
|
17
|
+
Requires-Dist: black; extra == 'dev'
|
|
18
|
+
Requires-Dist: mypy; extra == 'dev'
|
|
19
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
20
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
21
|
+
Requires-Dist: types-python-dotenv; extra == 'dev'
|
|
22
|
+
Provides-Extra: gui
|
|
23
|
+
Requires-Dist: pyside6-essentials; extra == 'gui'
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
# supervoxtral
|
|
2
|
+
|
|
3
|
+
A simple Python CLI/GUI tool to record audio from your microphone, optionally convert it (WAV/MP3/Opus), and send it to Mistral Voxtral transcription/chat APIs.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Requirements
|
|
8
|
+
|
|
9
|
+
- Python 3.11+
|
|
10
|
+
- ffmpeg (for MP3/Opus conversions)
|
|
11
|
+
- macOS: `brew install ffmpeg`
|
|
12
|
+
- Ubuntu/Debian: `sudo apt-get install ffmpeg`
|
|
13
|
+
- Windows: https://ffmpeg.org/download.html
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
1) Create and activate a virtual environment (example with venv):
|
|
20
|
+
|
|
21
|
+
- macOS/Linux:
|
|
22
|
+
```
|
|
23
|
+
python -m venv .venv
|
|
24
|
+
source .venv/bin/activate
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
- Windows (PowerShell):
|
|
28
|
+
```
|
|
29
|
+
python -m venv .venv
|
|
30
|
+
.\.venv\Scripts\Activate.ps1
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
2) Install the package (editable mode during development is convenient):
|
|
34
|
+
```
|
|
35
|
+
pip install -e .
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Optional extras:
|
|
39
|
+
- Dev tools:
|
|
40
|
+
```
|
|
41
|
+
pip install -e ".[dev]"
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## Configuration (API keys and prompts)
|
|
47
|
+
|
|
48
|
+
API keys and default behavior are configured only in your user configuration file (config.toml), not via environment variables.
|
|
49
|
+
|
|
50
|
+
- Location of the user config:
|
|
51
|
+
- macOS: ~/Library/Application Support/SuperVoxtral/config.toml
|
|
52
|
+
- Linux: ${XDG_CONFIG_HOME:-~/.config}/supervoxtral/config.toml
|
|
53
|
+
- Windows: %APPDATA%/SuperVoxtral/config.toml
|
|
54
|
+
|
|
55
|
+
- Initialize your user config and user prompt file:
|
|
56
|
+
|
|
57
|
+
```
|
|
58
|
+
svx config init
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
This creates:
|
|
62
|
+
|
|
63
|
+
- config.toml (with sensible defaults, including zero-footprint mode)
|
|
64
|
+
- a user prompt file at: ~/Library/Application Support/SuperVoxtral/prompt/user.md (macOS)
|
|
65
|
+
- Linux: ${XDG_CONFIG_HOME:-~/.config}/supervoxtral/prompt/user.md
|
|
66
|
+
- Windows: %APPDATA%/SuperVoxtral/prompt/user.md
|
|
67
|
+
|
|
68
|
+
**Key config sections (edit `config.toml`):**
|
|
69
|
+
- **[defaults]**: provider (e.g., "mistral"), model, format (e.g., "opus"), language, rate, channels, device, copy (clipboard), keep_audio_files = false, keep_transcript_files = false, keep_log_files = false.
|
|
70
|
+
- Zero-footprint mode (defaults): When `keep_* = false`, files are handled in OS temporary directories (auto-cleaned, no project dirs created). Set to `true` for persistence (creates `recordings/`, etc.).
|
|
71
|
+
- **[providers.mistral]**: api_key = "your_mistral_key_here", model (e.g., "voxtral-small-latest").
|
|
72
|
+
- **[prompt]**: text (inline prompt), file (path to prompt.md).
|
|
73
|
+
- Resolution priority: CLI `--prompt`/`--prompt-file` > config.toml [prompt] > user.md fallback > "What's in this audio?".
|
|
74
|
+
|
|
75
|
+
**Configuration is centralized via a structured `Config` object loaded from your user configuration file (`config.toml`). CLI arguments override select values (e.g., prompt, log level), but most defaults (provider, model, keep flags) come from `config.toml`. No environment variables are used for API keys or settings.**
|
|
76
|
+
|
|
77
|
+
No `.env` or shell environment variables are used for API keys.
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## Usage (CLI)
|
|
83
|
+
|
|
84
|
+
Make sure your virtual environment is activated and the project is installed (`pip install -e .`).
|
|
85
|
+
|
|
86
|
+
General command form:
|
|
87
|
+
```
|
|
88
|
+
svx record [OPTIONS]
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
**Unified entrypoint**: `svx record` handles both CLI and GUI modes via a centralized pipeline (`svx.core.pipeline.RecordingPipeline`). This ensures consistent behavior for recording, conversion, transcription, saving, clipboard copy, and logging across CLI and GUI.
|
|
92
|
+
|
|
93
|
+
**Zero-footprint defaults**: No directories created; outputs to console/clipboard. Use `--save-all` or config `keep_* = true` for persistence.
|
|
94
|
+
|
|
95
|
+
Note: the CLI now exposes a single recording entrypoint. Use `svx record --gui` to launch the GUI frontend. Most defaults (provider, format, model, language, rate, channels, device, keep_audio_files, copy) are configured via your user config (config.toml). The CLI only supports one-off overrides for: --prompt/--prompt-file, --log-level, --outfile-prefix, --gui, --save-all, --transcribe.
|
|
96
|
+
|
|
97
|
+
Planned MVP commands:
|
|
98
|
+
|
|
99
|
+
- Record with Mistral Voxtral (chat with audio) and a prompt (provider/format from config):
|
|
100
|
+
```
|
|
101
|
+
svx record --prompt "What's in this file?"
|
|
102
|
+
```
|
|
103
|
+
Tip: Outputs to console and clipboard (if copy=true in config). No files saved unless overridden.
|
|
104
|
+
|
|
105
|
+
Persist all outputs (one-off override):
|
|
106
|
+
```
|
|
107
|
+
svx record --save-all --prompt "What's in this file?"
|
|
108
|
+
```
|
|
109
|
+
Creates `recordings/`, `transcripts/`, `logs/` and saves files/logs.
|
|
110
|
+
|
|
111
|
+
- Pure transcription mode with Mistral Voxtral (no prompt, dedicated endpoint):
|
|
112
|
+
```
|
|
113
|
+
svx record --transcribe
|
|
114
|
+
```
|
|
115
|
+
Note: Prompts are ignored in this mode. Combine with --save-all for persistence:
|
|
116
|
+
```
|
|
117
|
+
svx record --transcribe --save-all
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
To start the GUI frontend:
|
|
121
|
+
```
|
|
122
|
+
svx record --gui
|
|
123
|
+
```
|
|
124
|
+
The GUI uses the same pipeline and respects config + CLI overrides (e.g., `--gui --save-all` propagates persistence).
|
|
125
|
+
|
|
126
|
+
The CLI defaults have been unified to favour the previous GUI defaults (e.g. `--format opus`, `--copy` enabled, and `--no-keep-audio-files` by default). The final effective values still respect the precedence: CLI explicit > user config defaults (config.toml) > built-in defaults.
|
|
127
|
+
|
|
128
|
+
### Advanced prompt management
|
|
129
|
+
|
|
130
|
+
You can provide a user prompt, either inline or via a file:
|
|
131
|
+
|
|
132
|
+
#### User prompt (inline)
|
|
133
|
+
```
|
|
134
|
+
svx record --user-prompt "Transcris puis résume ce qui est dit dans l'audio."
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
#### User prompt from file
|
|
138
|
+
```
|
|
139
|
+
svx record --user-prompt-file ~/Library/Application\ Support/SuperVoxtral/prompt/user.md
|
|
140
|
+
```
|
|
141
|
+
(Adjust the path for your OS; see “Configuration” for locations.)
|
|
142
|
+
|
|
143
|
+
#### Resolution priority (no concatenation)
|
|
144
|
+
Order of precedence for determining the final prompt:
|
|
145
|
+
1) `--user-prompt` (inline)
|
|
146
|
+
2) `--user-prompt-file` (explicit file)
|
|
147
|
+
3) `config.toml` → `[prompt].text`
|
|
148
|
+
4) `config.toml` → `[prompt].file`
|
|
149
|
+
5) User prompt file in your user config dir (`.../SuperVoxtral/prompt/user.md`)
|
|
150
|
+
6) Default fallback: "What's in this audio?"
|
|
151
|
+
|
|
152
|
+
Note: the file and inline prompts are not concatenated; the first non-empty source wins. Uses `Config.resolve_prompt()` for unified resolution across CLI/GUI.
|
|
153
|
+
|
|
154
|
+
If no user prompt is provided (by any of the above), it defaults to "What's in this audio?".
|
|
155
|
+
|
|
156
|
+
A single user message is sent containing the audio and (optionally) text.
|
|
157
|
+
|
|
158
|
+
Flow:
|
|
159
|
+
- Starts recording WAV immediately.
|
|
160
|
+
- Press Enter to stop recording.
|
|
161
|
+
- Converts WAV to MP3 (if `--format mp3`) or Opus (if `--format opus`).
|
|
162
|
+
- Sends the audio to Mistral Voxtral as base64 input_audio plus your text prompt.
|
|
163
|
+
- Prints and saves the response to `transcripts/` (if keep_transcript_files=true or --save-all).
|
|
164
|
+
|
|
165
|
+
Flow:
|
|
166
|
+
- Starts recording WAV.
|
|
167
|
+
- Press Enter to stop.
|
|
168
|
+
- Sends the audio to Voxtral (transcription).
|
|
169
|
+
- Prints and saves the transcript.
|
|
170
|
+
|
|
171
|
+
Config-driven options (set these in config.toml under [defaults]):
|
|
172
|
+
- rate, channels, device
|
|
173
|
+
- provider, model, format, language
|
|
174
|
+
- keep_audio_files, copy
|
|
175
|
+
|
|
176
|
+
One-off CLI overrides:
|
|
177
|
+
- `--outfile-prefix mynote_2025-09-09` (custom file prefix)
|
|
178
|
+
- `--log-level debug` (verbose logs)
|
|
179
|
+
- `--user-prompt` (alias: `--prompt`; user prompt text, inline)
|
|
180
|
+
- `--user-prompt-file` (alias: `--prompt-file`; path to user prompt markdown file in your user config dir)
|
|
181
|
+
- `--transcribe` (pure transcription mode, ignores prompts)
|
|
182
|
+
|
|
183
|
+
Alternative invocation (without console script):
|
|
184
|
+
```
|
|
185
|
+
python -m svx.cli record --prompt "..."
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
---
|
|
189
|
+
|
|
190
|
+
## Provider details
|
|
191
|
+
|
|
192
|
+
### Mistral Voxtral (chat with audio)
|
|
193
|
+
- Model: `voxtral-small-latest` by default (configurable)
|
|
194
|
+
- API: `mistralai` Python client
|
|
195
|
+
- Request structure:
|
|
196
|
+
- Messages with `content` array containing:
|
|
197
|
+
- `{ "type": "input_audio", "input_audio": "<base64>" }`
|
|
198
|
+
- `{ "type": "text", "text": "<prompt>" }`
|
|
199
|
+
- Output: text content from the chat response; saved to `transcripts/`.
|
|
200
|
+
|
|
201
|
+
Recommended formats:
|
|
202
|
+
- Opus reduces file size and upload time.
|
|
203
|
+
|
|
204
|
+
Authentication:
|
|
205
|
+
- Mistral: key read from `Config` (user config at `providers.mistral.api_key`).
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
---
|
|
209
|
+
|
|
210
|
+
## Recording formats and conversion
|
|
211
|
+
|
|
212
|
+
- Recording happens in WAV (PCM 16-bit, mono, 16k/32k Hz).
|
|
213
|
+
- Optional conversion via ffmpeg:
|
|
214
|
+
- WAV -> MP3:
|
|
215
|
+
```
|
|
216
|
+
ffmpeg -y -i input.wav -codec:a libmp3lame -q:a 3 output.mp3
|
|
217
|
+
```
|
|
218
|
+
- WAV -> Opus:
|
|
219
|
+
```
|
|
220
|
+
ffmpeg -y -i input.wav -c:a libopus -b:a 24k output.opus
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
The tool will send the converted file if you set `--format mp3` or `--format opus`; otherwise it sends the raw WAV.
|
|
224
|
+
|
|
225
|
+
---
|
|
226
|
+
|
|
227
|
+
## macOS notes
|
|
228
|
+
|
|
229
|
+
- Microphone permission: on first run, macOS will ask for microphone access. Approve it in System Settings > Privacy & Security > Microphone if needed.
|
|
230
|
+
- If you face issues with device selection, we will add a `--device` flag to choose a specific input device.
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
---
|
|
234
|
+
|
|
235
|
+
## License
|
|
236
|
+
|
|
237
|
+
MIT
|
|
File without changes
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "supervoxtral"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "CLI/GUI audio recorder and transcription client using Mistral Voxtral (chat with audio and transcription)."
|
|
9
|
+
requires-python = ">=3.11"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
keywords = [
|
|
12
|
+
"transcription",
|
|
13
|
+
"audio",
|
|
14
|
+
"mistral",
|
|
15
|
+
"voxtral",
|
|
16
|
+
"whisper",
|
|
17
|
+
"cli",
|
|
18
|
+
"gui",
|
|
19
|
+
]
|
|
20
|
+
dependencies = [
|
|
21
|
+
"typer",
|
|
22
|
+
"rich",
|
|
23
|
+
"sounddevice",
|
|
24
|
+
"soundfile",
|
|
25
|
+
"python-dotenv",
|
|
26
|
+
"pyperclip",
|
|
27
|
+
"mistralai",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[project.optional-dependencies]
|
|
31
|
+
gui = ["PySide6-Essentials"]
|
|
32
|
+
dev = ["black", "ruff", "mypy", "types-python-dotenv", "pytest"]
|
|
33
|
+
|
|
34
|
+
[project.scripts]
|
|
35
|
+
svx = "svx.cli:app"
|
|
36
|
+
|
|
37
|
+
[tool.black]
|
|
38
|
+
line-length = 100
|
|
39
|
+
target-version = ["py310"]
|
|
40
|
+
|
|
41
|
+
[tool.ruff]
|
|
42
|
+
line-length = 100
|
|
43
|
+
target-version = "py310"
|
|
44
|
+
|
|
45
|
+
[tool.ruff.lint]
|
|
46
|
+
select = ["E", "F", "I", "UP"]
|
|
47
|
+
ignore = []
|
|
48
|
+
|
|
49
|
+
[tool.hatch.build.targets.wheel]
|
|
50
|
+
packages = ["svx"]
|
|
51
|
+
|
|
52
|
+
[tool.basedpyright]
|
|
53
|
+
typeCheckingMode = "standard" # "basic" | "standard" | "strict" (défaut: "standard")
|
|
54
|
+
# reportUnknownArgumentType = false
|
|
55
|
+
# reportUnknownVariableType = false
|
|
56
|
+
# reportUnusedCallResult = false
|
|
57
|
+
# reportUnannotatedClassAttribute = false
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SuperVoxtral package.
|
|
3
|
+
|
|
4
|
+
CLI/TUI tool to record audio and send it to transcription/chat providers
|
|
5
|
+
(e.g., Mistral Voxtral "chat with audio").
|
|
6
|
+
|
|
7
|
+
Expose package version via __version__.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
14
|
+
except Exception: # pragma: no cover - very old Python fallback
|
|
15
|
+
# Fallback for environments that might not have importlib.metadata
|
|
16
|
+
# (not expected with Python 3.10+)
|
|
17
|
+
PackageNotFoundError = Exception # type: ignore
|
|
18
|
+
|
|
19
|
+
def version(distribution_name: str) -> str: # type: ignore
|
|
20
|
+
return "0.0.0"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
__version__ = version("supervoxtral")
|
|
25
|
+
except PackageNotFoundError:
|
|
26
|
+
__version__ = "0.0.0"
|
|
27
|
+
|
|
28
|
+
__all__ = ["__version__"]
|