supervoxtral 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- supervoxtral-0.1.0.dist-info/METADATA +23 -0
- supervoxtral-0.1.0.dist-info/RECORD +18 -0
- supervoxtral-0.1.0.dist-info/WHEEL +4 -0
- supervoxtral-0.1.0.dist-info/entry_points.txt +2 -0
- supervoxtral-0.1.0.dist-info/licenses/LICENSE +21 -0
- svx/__init__.py +28 -0
- svx/cli.py +264 -0
- svx/core/__init__.py +92 -0
- svx/core/audio.py +256 -0
- svx/core/clipboard.py +122 -0
- svx/core/config.py +400 -0
- svx/core/pipeline.py +260 -0
- svx/core/prompt.py +165 -0
- svx/core/storage.py +118 -0
- svx/providers/__init__.py +88 -0
- svx/providers/base.py +83 -0
- svx/providers/mistral.py +189 -0
- svx/ui/qt_app.py +491 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: supervoxtral
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CLI/GUI audio recorder and transcription client using Mistral Voxtral (chat with audio and transcription).
|
|
5
|
+
License: MIT
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Keywords: audio,cli,gui,mistral,transcription,voxtral,whisper
|
|
8
|
+
Requires-Python: >=3.11
|
|
9
|
+
Requires-Dist: mistralai
|
|
10
|
+
Requires-Dist: pyperclip
|
|
11
|
+
Requires-Dist: python-dotenv
|
|
12
|
+
Requires-Dist: rich
|
|
13
|
+
Requires-Dist: sounddevice
|
|
14
|
+
Requires-Dist: soundfile
|
|
15
|
+
Requires-Dist: typer
|
|
16
|
+
Provides-Extra: dev
|
|
17
|
+
Requires-Dist: black; extra == 'dev'
|
|
18
|
+
Requires-Dist: mypy; extra == 'dev'
|
|
19
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
20
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
21
|
+
Requires-Dist: types-python-dotenv; extra == 'dev'
|
|
22
|
+
Provides-Extra: gui
|
|
23
|
+
Requires-Dist: pyside6-essentials; extra == 'gui'
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
svx/__init__.py,sha256=qPEe5u3PT8yOQN4MiOLj_Bd18HqcRb6fxnPDfdMUP7w,742
|
|
2
|
+
svx/cli.py,sha256=8CHNw0ySKJb3ToG_65sPl2QbvSSoKxp1Jm1zRQJvjUI,8925
|
|
3
|
+
svx/core/__init__.py,sha256=mhzXuIXo3kUzjWme0Bxhe4TQZQELlyEiG_89LUAPC7M,2856
|
|
4
|
+
svx/core/audio.py,sha256=r0m5T1uzdsJ1j9YXgQ5clv15dvMwZBp_bk2aLpjnrkc,7684
|
|
5
|
+
svx/core/clipboard.py,sha256=IFtiN2SnYKQIu0WXx0hCK8syvDXanBpm1Jr2a8X7y9s,3692
|
|
6
|
+
svx/core/config.py,sha256=ApPzZ7AymAChjQ7s3G2nLsFe23tp6dEMAMZjKPgDQuU,14568
|
|
7
|
+
svx/core/pipeline.py,sha256=YdhkwwXC_50rUHaiwHJCrKGp4gwT93s1IAmvvYeCUfY,10243
|
|
8
|
+
svx/core/prompt.py,sha256=NodPOpkMTcnPH-qg8q5hQcxhoNpRJaqrQ00-nXKMQlQ,5153
|
|
9
|
+
svx/core/storage.py,sha256=5_xKYEpvDhaixRxmSTBlyX_jt8ssjHwHzX9VodcrtJw,3213
|
|
10
|
+
svx/providers/__init__.py,sha256=SzlSWpZSUIptbSrAnGfi0d0NX4hYTpT0ObWpYyskDdA,2634
|
|
11
|
+
svx/providers/base.py,sha256=YoiI8KWVRGISh7dx9XXPr1Q1a7ZDu8vfeJFlPbcKr20,2695
|
|
12
|
+
svx/providers/mistral.py,sha256=vrBatNZg0zGNkJ5Qfnfz6ZwP6QtBgIt9sT_w59zkSO0,6636
|
|
13
|
+
svx/ui/qt_app.py,sha256=_cEhISVJcudDAsZrq9J1AZf_xPFvnp59s7gPipTpj2A,16509
|
|
14
|
+
supervoxtral-0.1.0.dist-info/METADATA,sha256=fim_1ZOzN1J2q96AgyGVW4ptroiHuIT7sANTIrYFL5A,753
|
|
15
|
+
supervoxtral-0.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
16
|
+
supervoxtral-0.1.0.dist-info/entry_points.txt,sha256=phJhRy3VkYHC6AR_tUB5CypHzG0ePRR9sB13HWE1vEg,36
|
|
17
|
+
supervoxtral-0.1.0.dist-info/licenses/LICENSE,sha256=fCEBKmC4i-1WZAwoKjKWegfDd8qNsG8ECB7JyqoswyQ,1064
|
|
18
|
+
supervoxtral-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 vlebert
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
svx/__init__.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SuperVoxtral package.
|
|
3
|
+
|
|
4
|
+
CLI/TUI tool to record audio and send it to transcription/chat providers
|
|
5
|
+
(e.g., Mistral Voxtral "chat with audio").
|
|
6
|
+
|
|
7
|
+
Expose package version via __version__.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
14
|
+
except Exception: # pragma: no cover - very old Python fallback
|
|
15
|
+
# Fallback for environments that might not have importlib.metadata
|
|
16
|
+
# (not expected with Python 3.10+)
|
|
17
|
+
PackageNotFoundError = Exception # type: ignore
|
|
18
|
+
|
|
19
|
+
def version(distribution_name: str) -> str: # type: ignore
|
|
20
|
+
return "0.0.0"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
__version__ = version("supervoxtral")
|
|
25
|
+
except PackageNotFoundError:
|
|
26
|
+
__version__ = "0.0.0"
|
|
27
|
+
|
|
28
|
+
__all__ = ["__version__"]
|
svx/cli.py
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import threading
|
|
5
|
+
from dataclasses import asdict
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import typer
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
from rich.panel import Panel
|
|
11
|
+
from rich.prompt import Prompt
|
|
12
|
+
|
|
13
|
+
import svx.core.config as config
|
|
14
|
+
from svx.core.config import (
|
|
15
|
+
Config,
|
|
16
|
+
ProviderConfig,
|
|
17
|
+
)
|
|
18
|
+
from svx.core.pipeline import RecordingPipeline
|
|
19
|
+
from svx.core.prompt import init_user_prompt_file
|
|
20
|
+
|
|
21
|
+
app = typer.Typer(help="SuperVoxtral CLI: record audio and send to transcription/chat providers.")
|
|
22
|
+
console = Console()
|
|
23
|
+
|
|
24
|
+
# Config subcommands (open/show user configuration)
|
|
25
|
+
config_app = typer.Typer(help="Config utilities (open/show user configuration)")
|
|
26
|
+
app.add_typer(config_app, name="config")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@config_app.command("open")
|
|
30
|
+
def config_open() -> None:
|
|
31
|
+
"""
|
|
32
|
+
Open the user configuration directory in the platform's file manager.
|
|
33
|
+
"""
|
|
34
|
+
path = config.USER_CONFIG_DIR
|
|
35
|
+
if not path.exists():
|
|
36
|
+
console.print(f"[yellow]User config directory does not exist:[/yellow] {path}")
|
|
37
|
+
console.print("It will be created on demand when saving config or prompts.")
|
|
38
|
+
try:
|
|
39
|
+
typer.launch(str(path))
|
|
40
|
+
console.print(f"Opened config directory: {path}")
|
|
41
|
+
except Exception as e:
|
|
42
|
+
console.print(f"[red]Failed to open config directory with system handler:[/red] {e}")
|
|
43
|
+
console.print(f"Please open it manually: {path}")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@config_app.command("show")
|
|
47
|
+
def config_show() -> None:
|
|
48
|
+
"""
|
|
49
|
+
Display the effective configuration and relevant paths.
|
|
50
|
+
"""
|
|
51
|
+
# Ensure base environment and directories are available (but do not change user state)
|
|
52
|
+
config.setup_environment(log_level="INFO")
|
|
53
|
+
|
|
54
|
+
cfg = Config.load()
|
|
55
|
+
|
|
56
|
+
# Helper to mask secrets for display
|
|
57
|
+
def _mask_secret(val: str | None, keep: int = 4) -> str:
|
|
58
|
+
try:
|
|
59
|
+
if not val:
|
|
60
|
+
return "(not set)"
|
|
61
|
+
v = str(val)
|
|
62
|
+
if len(v) <= keep * 2 + 3:
|
|
63
|
+
return v[:keep] + "..." + v[-keep:]
|
|
64
|
+
return v[:keep] + "..." + v[-keep:]
|
|
65
|
+
except Exception:
|
|
66
|
+
return "(error)"
|
|
67
|
+
|
|
68
|
+
mistral_key = str(cfg.providers.get("mistral", ProviderConfig()).api_key or "")
|
|
69
|
+
|
|
70
|
+
# Gather info
|
|
71
|
+
user_config_file = cfg.user_config_file
|
|
72
|
+
user_prompt_file = cfg.user_prompt_dir / "user.md"
|
|
73
|
+
|
|
74
|
+
defaults_section = asdict(cfg.defaults)
|
|
75
|
+
prompt_section = asdict(cfg.prompt)
|
|
76
|
+
|
|
77
|
+
# Resolve prompt source (same logic as record command, but read-only)
|
|
78
|
+
resolved_prompt = cfg.resolve_prompt(None, None)
|
|
79
|
+
resolved_prompt_source = "resolved from config"
|
|
80
|
+
resolved_prompt_excerpt = resolved_prompt
|
|
81
|
+
|
|
82
|
+
# Short excerpt
|
|
83
|
+
excerpt = resolved_prompt_excerpt.replace("\n", " ")[:200]
|
|
84
|
+
if len(resolved_prompt_excerpt) > 200:
|
|
85
|
+
excerpt += "..."
|
|
86
|
+
|
|
87
|
+
# Print summary
|
|
88
|
+
console.print("[bold underline]SuperVoxtral - Configuration (effective)[/bold underline]")
|
|
89
|
+
console.print(
|
|
90
|
+
f"[cyan]User config file:[/cyan] {user_config_file} (exists={user_config_file.exists()})"
|
|
91
|
+
)
|
|
92
|
+
console.print(
|
|
93
|
+
f"[cyan]User prompt file:[/cyan] {user_prompt_file} (exists={user_prompt_file.exists()})"
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
console.print()
|
|
97
|
+
console.print("[bold]Provider credentials (from config.toml)[/bold]")
|
|
98
|
+
console.print(f" providers.mistral.api_key: {_mask_secret(mistral_key)}")
|
|
99
|
+
console.print()
|
|
100
|
+
console.print("[bold]User config sections (loaded from config.toml)[/bold]")
|
|
101
|
+
console.print(f" defaults: {defaults_section or '(none)'}")
|
|
102
|
+
console.print(f" prompt: {prompt_section or '(none)'}")
|
|
103
|
+
console.print()
|
|
104
|
+
console.print(f"[bold]Resolved prompt source:[/bold] {resolved_prompt_source}")
|
|
105
|
+
console.print(f"[bold]Prompt excerpt:[/bold] {excerpt}")
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@config_app.command("init")
|
|
109
|
+
def config_init(
|
|
110
|
+
force: bool = typer.Option(False, "--force", "-f", help="Overwrite existing files"),
|
|
111
|
+
) -> None:
|
|
112
|
+
"""
|
|
113
|
+
Initialize the user configuration directory with an active config.toml and a prompt/user.md.
|
|
114
|
+
Does not overwrite existing files unless --force is specified.
|
|
115
|
+
"""
|
|
116
|
+
# Delegate initialization to core modules
|
|
117
|
+
prompt_path = init_user_prompt_file(force=force)
|
|
118
|
+
cfg_path = config.init_user_config(force=force, prompt_file=prompt_path)
|
|
119
|
+
|
|
120
|
+
console.print(f"Ensured user config: {cfg_path}")
|
|
121
|
+
console.print(f"Ensured user prompt: {prompt_path}")
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@app.command()
|
|
125
|
+
def record(
|
|
126
|
+
user_prompt: str | None = typer.Option(
|
|
127
|
+
None,
|
|
128
|
+
"--user-prompt",
|
|
129
|
+
"--prompt",
|
|
130
|
+
help="User prompt text (inline) to use for this run.",
|
|
131
|
+
),
|
|
132
|
+
user_prompt_file: Path | None = typer.Option(
|
|
133
|
+
None,
|
|
134
|
+
"--user-prompt-file",
|
|
135
|
+
"--prompt-file",
|
|
136
|
+
help="Path to a text file containing the user prompt for this run.",
|
|
137
|
+
),
|
|
138
|
+
transcribe: bool = typer.Option(
|
|
139
|
+
False,
|
|
140
|
+
"--transcribe",
|
|
141
|
+
help="Use pure transcription mode (no prompt, dedicated endpoint).",
|
|
142
|
+
),
|
|
143
|
+
outfile_prefix: str | None = typer.Option(
|
|
144
|
+
None,
|
|
145
|
+
"--outfile-prefix",
|
|
146
|
+
help="Custom output file prefix (default uses timestamp).",
|
|
147
|
+
),
|
|
148
|
+
gui: bool = typer.Option(
|
|
149
|
+
False,
|
|
150
|
+
"--gui/--no-gui",
|
|
151
|
+
help="Launch the GUI frontend instead of the CLI recording flow.",
|
|
152
|
+
),
|
|
153
|
+
save_all: bool = typer.Option(
|
|
154
|
+
False,
|
|
155
|
+
"--save-all",
|
|
156
|
+
help="Override config to keep all files (audio, transcripts, logs) for this run.",
|
|
157
|
+
),
|
|
158
|
+
log_level: str = typer.Option(
|
|
159
|
+
"INFO",
|
|
160
|
+
"--log-level",
|
|
161
|
+
help="Logging level (DEBUG, INFO, WARNING, ERROR).",
|
|
162
|
+
),
|
|
163
|
+
):
|
|
164
|
+
"""
|
|
165
|
+
Record audio from the microphone and send it to the selected provider.
|
|
166
|
+
|
|
167
|
+
This CLI accepts only a small set of runtime flags. Most defaults (provider, format,
|
|
168
|
+
model, language, sample rate, channels, device,
|
|
169
|
+
file retention, copy-to-clipboard)
|
|
170
|
+
must be configured in the user's `config.toml` under [defaults].
|
|
171
|
+
|
|
172
|
+
Priority for option resolution:
|
|
173
|
+
1) CLI explicit (only for --prompt/--prompt-file, --log-level,
|
|
174
|
+
--outfile-prefix, --gui, --transcribe)
|
|
175
|
+
2) defaults in user config (config.toml)
|
|
176
|
+
3) coded CLI defaults (used when user config is absent)
|
|
177
|
+
|
|
178
|
+
Flow:
|
|
179
|
+
- Records WAV until you press Enter (CLI mode).
|
|
180
|
+
- Optionally converts to MP3/Opus depending on config.
|
|
181
|
+
- Sends the file per provider rules.
|
|
182
|
+
- Prints and saves the result.
|
|
183
|
+
|
|
184
|
+
Note: In --transcribe mode, prompts (--user-prompt or --user-prompt-file) are ignored,
|
|
185
|
+
as it uses a dedicated transcription endpoint without prompting.
|
|
186
|
+
"""
|
|
187
|
+
cfg = Config.load(log_level=log_level)
|
|
188
|
+
|
|
189
|
+
if transcribe and (user_prompt or user_prompt_file):
|
|
190
|
+
console.print("[yellow]Transcribe mode: prompt is ignored.[/yellow]")
|
|
191
|
+
user_prompt = None
|
|
192
|
+
user_prompt_file = None
|
|
193
|
+
|
|
194
|
+
# If GUI requested, launch GUI with the resolved parameters and exit.
|
|
195
|
+
if gui:
|
|
196
|
+
from svx.ui.qt_app import run_gui
|
|
197
|
+
|
|
198
|
+
# Pass config object to the GUI call
|
|
199
|
+
run_gui(
|
|
200
|
+
cfg=cfg,
|
|
201
|
+
user_prompt=user_prompt,
|
|
202
|
+
user_prompt_file=user_prompt_file,
|
|
203
|
+
save_all=save_all,
|
|
204
|
+
outfile_prefix=outfile_prefix,
|
|
205
|
+
transcribe_mode=transcribe,
|
|
206
|
+
)
|
|
207
|
+
return
|
|
208
|
+
|
|
209
|
+
try:
|
|
210
|
+
|
|
211
|
+
def progress_cb(msg: str) -> None:
|
|
212
|
+
console.print(f"[bold cyan]{msg}[/bold cyan]")
|
|
213
|
+
|
|
214
|
+
stop_event = threading.Event()
|
|
215
|
+
console.print(Panel.fit("Recording... Press Enter to stop.", title="SuperVoxtral"))
|
|
216
|
+
|
|
217
|
+
def _wait_for_enter():
|
|
218
|
+
try:
|
|
219
|
+
Prompt.ask("Press Enter to stop", default="", show_default=False)
|
|
220
|
+
except (KeyboardInterrupt, EOFError):
|
|
221
|
+
pass
|
|
222
|
+
finally:
|
|
223
|
+
stop_event.set()
|
|
224
|
+
|
|
225
|
+
waiter = threading.Thread(target=_wait_for_enter, daemon=True)
|
|
226
|
+
waiter.start()
|
|
227
|
+
|
|
228
|
+
pipeline = RecordingPipeline(
|
|
229
|
+
cfg=cfg,
|
|
230
|
+
user_prompt=user_prompt,
|
|
231
|
+
user_prompt_file=user_prompt_file,
|
|
232
|
+
save_all=save_all,
|
|
233
|
+
outfile_prefix=outfile_prefix,
|
|
234
|
+
transcribe_mode=transcribe,
|
|
235
|
+
progress_callback=progress_cb,
|
|
236
|
+
)
|
|
237
|
+
result = pipeline.run(stop_event=stop_event)
|
|
238
|
+
waiter.join()
|
|
239
|
+
|
|
240
|
+
text = result["text"]
|
|
241
|
+
duration = result["duration"]
|
|
242
|
+
paths = result["paths"]
|
|
243
|
+
|
|
244
|
+
console.print(f"Recording completed in {duration:.1f}s")
|
|
245
|
+
if paths.get("wav"):
|
|
246
|
+
console.print(f"Audio saved to {paths['wav']}")
|
|
247
|
+
else:
|
|
248
|
+
console.print("Audio file (temporary) deleted after processing.")
|
|
249
|
+
|
|
250
|
+
console.print(Panel.fit(text, title=f"{cfg.defaults.provider.capitalize()} Response"))
|
|
251
|
+
|
|
252
|
+
if paths.get("txt"):
|
|
253
|
+
console.print(f"Saved transcript: {paths['txt']}")
|
|
254
|
+
if paths.get("json"):
|
|
255
|
+
console.print(f"Saved raw JSON: {paths['json']}")
|
|
256
|
+
|
|
257
|
+
except Exception as e:
|
|
258
|
+
logging.exception("Error in record command")
|
|
259
|
+
typer.secho(f"Error: {e}", fg=typer.colors.RED)
|
|
260
|
+
raise typer.Exit(code=1)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
if __name__ == "__main__":
|
|
264
|
+
app()
|
svx/core/__init__.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core package for audio recording, encoding, configuration, and storage.
|
|
3
|
+
|
|
4
|
+
This module provides lightweight placeholders and shared types/constants that other
|
|
5
|
+
modules (e.g., recorder, encoder, config, storage) can import and extend later.
|
|
6
|
+
|
|
7
|
+
Planned submodules:
|
|
8
|
+
- recorder.py: microphone capture to WAV (streamed) with manual stop
|
|
9
|
+
- encoder.py: conversion to MP3/Opus via ffmpeg (optional)
|
|
10
|
+
- config.py: environment/config management, defaults, validation
|
|
11
|
+
- storage.py: file naming, directory management, transcript persistence
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Literal
|
|
19
|
+
|
|
20
|
+
# Defaults and shared constants
|
|
21
|
+
DEFAULT_SAMPLE_RATE: int = 16000
|
|
22
|
+
DEFAULT_CHANNELS: int = 1
|
|
23
|
+
SUPPORTED_FORMATS: tuple[str, ...] = ("wav", "mp3", "opus")
|
|
24
|
+
DEFAULT_WAV_SUBTYPE: str = "PCM_16"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass(slots=True)
|
|
28
|
+
class RecordingSettings:
|
|
29
|
+
"""Parameters used during raw WAV recording."""
|
|
30
|
+
|
|
31
|
+
samplerate: int = DEFAULT_SAMPLE_RATE
|
|
32
|
+
channels: int = DEFAULT_CHANNELS
|
|
33
|
+
# On some platforms, device can be an int index or a device name string.
|
|
34
|
+
device: int | str | None = None
|
|
35
|
+
subtype: str = DEFAULT_WAV_SUBTYPE # e.g., "PCM_16"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass(slots=True)
|
|
39
|
+
class EncodingSettings:
|
|
40
|
+
"""Parameters for post-recording encoding/export."""
|
|
41
|
+
|
|
42
|
+
# Output format to send to provider; recording is always WAV
|
|
43
|
+
output_format: Literal["wav", "mp3", "opus"] = "wav"
|
|
44
|
+
# For opus/mp3, recommended bitrates/quality settings (ffmpeg-specific)
|
|
45
|
+
opus_bitrate: str = "64k" # used if output_format == "opus"
|
|
46
|
+
mp3_quality: int = 3 # LAME VBR quality (lower is better; typical 0-9)
|
|
47
|
+
keep_wav: bool = True # keep raw WAV after conversion
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass(slots=True)
|
|
51
|
+
class Paths:
|
|
52
|
+
"""Project paths used by the CLI and providers."""
|
|
53
|
+
|
|
54
|
+
root: Path = Path.cwd()
|
|
55
|
+
recordings_dir: Path = Path("recordings")
|
|
56
|
+
transcripts_dir: Path = Path("transcripts")
|
|
57
|
+
logs_dir: Path = Path("logs")
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def abs_root(self) -> Path:
|
|
61
|
+
return self.root.resolve()
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def abs_recordings(self) -> Path:
|
|
65
|
+
return (self.abs_root / self.recordings_dir).resolve()
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def abs_transcripts(self) -> Path:
|
|
69
|
+
return (self.abs_root / self.transcripts_dir).resolve()
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def abs_logs(self) -> Path:
|
|
73
|
+
return (self.abs_root / self.logs_dir).resolve()
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def ensure_directories(paths: Paths) -> None:
|
|
77
|
+
"""Create needed directories if they don't exist."""
|
|
78
|
+
paths.abs_recordings.mkdir(parents=True, exist_ok=True)
|
|
79
|
+
paths.abs_transcripts.mkdir(parents=True, exist_ok=True)
|
|
80
|
+
paths.abs_logs.mkdir(parents=True, exist_ok=True)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
__all__ = [
|
|
84
|
+
"DEFAULT_SAMPLE_RATE",
|
|
85
|
+
"DEFAULT_CHANNELS",
|
|
86
|
+
"SUPPORTED_FORMATS",
|
|
87
|
+
"DEFAULT_WAV_SUBTYPE",
|
|
88
|
+
"RecordingSettings",
|
|
89
|
+
"EncodingSettings",
|
|
90
|
+
"Paths",
|
|
91
|
+
"ensure_directories",
|
|
92
|
+
]
|