voice-mode 3.34.3__tar.gz → 4.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {voice_mode-3.34.3 → voice_mode-4.0.1}/.gitignore +6 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/CHANGELOG.md +43 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/PKG-INFO +5 -2
- {voice_mode-3.34.3 → voice_mode-4.0.1}/pyproject.toml +61 -3
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/__version__.py +1 -1
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/cli.py +5 -0
- voice_mode-4.0.1/voice_mode/cli_commands/transcribe.py +141 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/config.py +139 -37
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/providers.py +7 -8
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/resources/configuration.py +2 -2
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/configuration_management.py +106 -5
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/converse.py +98 -0
- voice_mode-4.0.1/voice_mode/tools/transcription/__init__.py +14 -0
- voice_mode-4.0.1/voice_mode/tools/transcription/backends.py +287 -0
- voice_mode-4.0.1/voice_mode/tools/transcription/core.py +136 -0
- voice_mode-4.0.1/voice_mode/tools/transcription/formats.py +144 -0
- voice_mode-4.0.1/voice_mode/tools/transcription/types.py +52 -0
- voice_mode-3.34.3/voice_mode/voice_preferences.py +0 -125
- {voice_mode-3.34.3 → voice_mode-4.0.1}/README.md +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/build_hooks.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/__init__.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/__main__.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/cli_commands/__init__.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/cli_commands/exchanges.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/conversation_logger.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/core.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/data/versions.json +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/exchanges/__init__.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/exchanges/conversations.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/exchanges/filters.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/exchanges/formatters.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/exchanges/models.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/exchanges/reader.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/exchanges/stats.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/README.md +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/app/api/connection-details/route.ts +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/app/favicon.ico +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/app/globals.css +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/app/layout.tsx +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/app/page.tsx +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/components/CloseIcon.tsx +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/components/NoAgentNotification.tsx +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/components/TranscriptionView.tsx +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/hooks/useCombinedTranscriptions.ts +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/hooks/useLocalMicTrack.ts +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/next-env.d.ts +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/next.config.mjs +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/package-lock.json +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/package.json +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/pnpm-lock.yaml +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/postcss.config.mjs +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/tailwind.config.ts +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/tsconfig.json +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/prompts/README.md +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/prompts/__init__.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/prompts/converse.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/prompts/release_notes.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/prompts/services.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/provider_discovery.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/resources/__init__.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/resources/audio_files.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/resources/changelog.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/resources/statistics.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/resources/version.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/resources/whisper_models.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/server.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/shared.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/simple_failover.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/statistics.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/streaming.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/__init__.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/launchd/com.voicemode.frontend.plist +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/launchd/com.voicemode.kokoro.plist +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/launchd/com.voicemode.livekit.plist +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/launchd/com.voicemode.whisper.plist +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/launchd/start-kokoro-with-health-check.sh +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/launchd/start-whisper-with-health-check.sh +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/scripts/__init__.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/scripts/start-whisper-server.sh +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/systemd/voicemode-frontend.service +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/systemd/voicemode-kokoro.service +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/systemd/voicemode-livekit.service +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/systemd/voicemode-whisper.service +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/__init__.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/dependencies.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/devices.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/diagnostics.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/providers.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/service.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/kokoro/install.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/kokoro/uninstall.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/list_versions.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/livekit/__init__.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/livekit/frontend.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/livekit/install.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/livekit/production_server.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/livekit/uninstall.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/version_info.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/whisper/__init__.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/whisper/install.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/whisper/list_models.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/whisper/model_active.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/whisper/model_benchmark.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/whisper/model_install.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/whisper/model_remove.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/whisper/models.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/whisper/uninstall.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/statistics.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/voice_registry.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/__init__.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/audio_diagnostics.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/event_logger.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/ffmpeg_check.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/format_migration.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/gpu_detection.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/migration_helpers.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/services/common.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/services/coreml_setup.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/services/kokoro_helpers.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/services/livekit_helpers.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/services/whisper_helpers.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/services/whisper_version.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/version_helpers.py +0 -0
- {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/version.py +0 -0
@@ -7,6 +7,49 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
9
|
|
10
|
+
## [4.0.1] - 2025-09-01
|
11
|
+
|
12
|
+
### Removed
|
13
|
+
- Removed `whisperx` optional dependency to fix PyPI upload compatibility
|
14
|
+
- The dependency was specified as a Git URL which is not allowed for PyPI packages
|
15
|
+
- WhisperX functionality was recently added and not essential for core features
|
16
|
+
|
17
|
+
## [4.0.0] - 2025-08-31
|
18
|
+
|
19
|
+
### BREAKING CHANGES
|
20
|
+
- **Unified voice configuration system**
|
21
|
+
- **BREAKING**: Replaced `.voices.txt` files with unified `.voicemode.env` configuration
|
22
|
+
- Changed environment variable from `VOICEMODE_TTS_VOICES` to `VOICEMODE_VOICES` for simplicity
|
23
|
+
- Implemented cascading configuration: env vars > project configs > global config
|
24
|
+
- Added directory tree walking for project-specific configuration discovery
|
25
|
+
- Supports runtime configuration reloading via MCP tools
|
26
|
+
- **Migration Required**: Users must migrate from `.voices.txt` to `.voicemode.env` with `VOICEMODE_VOICES=voice1,voice2` format
|
27
|
+
|
28
|
+
### Added
|
29
|
+
|
30
|
+
- **Comprehensive test coverage reporting system**
|
31
|
+
- Integration with pytest-cov for coverage measurement
|
32
|
+
- HTML coverage reports generated in htmlcov/ directory
|
33
|
+
- Coverage badges and metrics for monitoring code quality
|
34
|
+
- Automated coverage reporting in CI/CD pipeline
|
35
|
+
|
36
|
+
- **Word-level timestamps for transcription**
|
37
|
+
- Enhanced transcription output with word-level timing information
|
38
|
+
- Support for SubRip (SRT) format output with precise word timestamps
|
39
|
+
- New transcription CLI command for processing audio files
|
40
|
+
- Comprehensive transcription backend supporting multiple formats
|
41
|
+
- Word timing data available for improved accessibility and analysis
|
42
|
+
|
43
|
+
- **Enhanced voice selection guide**
|
44
|
+
- Comprehensive documentation for voice selection across different providers
|
45
|
+
- Clear migration instructions from old `.voices.txt` system
|
46
|
+
|
47
|
+
### Removed
|
48
|
+
- **Legacy voice preference system**
|
49
|
+
- Removed 578 lines of old `voice_preferences.py` system
|
50
|
+
- Eliminated unreliable `.voices.txt` file parsing
|
51
|
+
- Removed associated test files for deprecated voice preference system
|
52
|
+
|
10
53
|
## [3.34.3] - 2025-08-26
|
11
54
|
|
12
55
|
### Changed
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: voice-mode
|
3
|
-
Version:
|
3
|
+
Version: 4.0.1
|
4
4
|
Summary: VoiceMode - Voice interaction capabilities for AI assistants (formerly voice-mcp)
|
5
5
|
Project-URL: Homepage, https://github.com/mbailey/voicemode
|
6
6
|
Project-URL: Repository, https://github.com/mbailey/voicemode
|
@@ -66,9 +66,12 @@ Requires-Dist: pandas>=2.0.0; extra == 'notebooks'
|
|
66
66
|
Provides-Extra: scripts
|
67
67
|
Requires-Dist: flask>=3.0.0; extra == 'scripts'
|
68
68
|
Provides-Extra: test
|
69
|
+
Requires-Dist: coverage[toml]>=7.4.0; extra == 'test'
|
69
70
|
Requires-Dist: pytest-asyncio>=0.21.0; extra == 'test'
|
70
|
-
Requires-Dist: pytest-cov>=4.
|
71
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == 'test'
|
71
72
|
Requires-Dist: pytest-mock>=3.10.0; extra == 'test'
|
73
|
+
Requires-Dist: pytest-timeout>=2.2.0; extra == 'test'
|
74
|
+
Requires-Dist: pytest-xdist>=3.5.0; extra == 'test'
|
72
75
|
Requires-Dist: pytest>=7.0.0; extra == 'test'
|
73
76
|
Description-Content-Type: text/markdown
|
74
77
|
|
@@ -67,8 +67,11 @@ dev = [
|
|
67
67
|
test = [
|
68
68
|
"pytest>=7.0.0",
|
69
69
|
"pytest-asyncio>=0.21.0",
|
70
|
-
"pytest-cov>=4.
|
70
|
+
"pytest-cov>=4.1.0",
|
71
71
|
"pytest-mock>=3.10.0",
|
72
|
+
"pytest-xdist>=3.5.0", # For parallel testing
|
73
|
+
"pytest-timeout>=2.2.0", # For test timeouts
|
74
|
+
"coverage[toml]>=7.4.0",
|
72
75
|
]
|
73
76
|
notebooks = [
|
74
77
|
"gradio>=4.0.0",
|
@@ -144,10 +147,65 @@ exclude = [
|
|
144
147
|
[tool.hatch.version]
|
145
148
|
path = "voice_mode/__version__.py"
|
146
149
|
|
150
|
+
[tool.hatch.metadata]
|
151
|
+
allow-direct-references = true
|
152
|
+
|
147
153
|
[tool.pytest.ini_options]
|
154
|
+
minversion = "7.0"
|
148
155
|
testpaths = ["tests"]
|
149
156
|
python_files = "test_*.py"
|
150
157
|
python_classes = "Test*"
|
151
158
|
python_functions = "test_*"
|
152
|
-
|
153
|
-
addopts =
|
159
|
+
asyncio_mode = "auto"
|
160
|
+
addopts = [
|
161
|
+
"-ra",
|
162
|
+
"--strict-markers",
|
163
|
+
"--strict-config",
|
164
|
+
"--ignore=tests/manual",
|
165
|
+
"--cov=voice_mode",
|
166
|
+
"--cov-branch",
|
167
|
+
"--cov-report=term-missing:skip-covered",
|
168
|
+
"--cov-report=html",
|
169
|
+
"--cov-report=xml",
|
170
|
+
]
|
171
|
+
markers = [
|
172
|
+
"unit: Unit tests (fast, isolated)",
|
173
|
+
"integration: Integration tests (may interact with services)",
|
174
|
+
"slow: Tests that take > 1s",
|
175
|
+
"manual: Manual tests requiring human interaction",
|
176
|
+
]
|
177
|
+
filterwarnings = [
|
178
|
+
"ignore::DeprecationWarning",
|
179
|
+
]
|
180
|
+
|
181
|
+
[tool.coverage.run]
|
182
|
+
source = ["voice_mode"]
|
183
|
+
branch = true
|
184
|
+
parallel = true
|
185
|
+
omit = [
|
186
|
+
"*/tests/*",
|
187
|
+
"*/test_*.py",
|
188
|
+
"*/__pycache__/*",
|
189
|
+
"*/site-packages/*",
|
190
|
+
"test-env/*",
|
191
|
+
]
|
192
|
+
|
193
|
+
[tool.coverage.report]
|
194
|
+
exclude_lines = [
|
195
|
+
"pragma: no cover",
|
196
|
+
"def __repr__",
|
197
|
+
"if TYPE_CHECKING:",
|
198
|
+
"raise NotImplementedError",
|
199
|
+
"if __name__ == .__main__.:",
|
200
|
+
"@abstractmethod",
|
201
|
+
"except ImportError:",
|
202
|
+
]
|
203
|
+
precision = 2
|
204
|
+
skip_covered = true
|
205
|
+
show_missing = true
|
206
|
+
|
207
|
+
[tool.coverage.html]
|
208
|
+
directory = "htmlcov"
|
209
|
+
|
210
|
+
[tool.coverage.xml]
|
211
|
+
output = "coverage.xml"
|
@@ -1359,13 +1359,18 @@ def cli():
|
|
1359
1359
|
|
1360
1360
|
# Import subcommand groups
|
1361
1361
|
from voice_mode.cli_commands import exchanges as exchanges_cmd
|
1362
|
+
from voice_mode.cli_commands import transcribe as transcribe_cmd
|
1362
1363
|
|
1363
1364
|
# Add subcommands to legacy CLI
|
1364
1365
|
cli.add_command(exchanges_cmd.exchanges)
|
1366
|
+
cli.add_command(transcribe_cmd.transcribe)
|
1365
1367
|
|
1366
1368
|
# Add exchanges to main CLI
|
1367
1369
|
voice_mode_main_cli.add_command(exchanges_cmd.exchanges)
|
1368
1370
|
|
1371
|
+
# Add transcribe to main CLI
|
1372
|
+
voice_mode_main_cli.add_command(transcribe_cmd.transcribe)
|
1373
|
+
|
1369
1374
|
|
1370
1375
|
# Converse command - direct voice conversation from CLI
|
1371
1376
|
@voice_mode_main_cli.command()
|
@@ -0,0 +1,141 @@
|
|
1
|
+
"""CLI command for audio transcription."""
|
2
|
+
|
3
|
+
import click
|
4
|
+
import json
|
5
|
+
import asyncio
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import Optional
|
8
|
+
|
9
|
+
from voice_mode.tools.transcription import (
|
10
|
+
transcribe_audio,
|
11
|
+
TranscriptionBackend,
|
12
|
+
OutputFormat
|
13
|
+
)
|
14
|
+
|
15
|
+
|
16
|
+
@click.group()
|
17
|
+
def transcribe():
|
18
|
+
"""Audio transcription with word-level timestamps."""
|
19
|
+
pass
|
20
|
+
|
21
|
+
|
22
|
+
@transcribe.command("audio")
|
23
|
+
@click.argument('audio_file', type=click.Path(exists=True))
|
24
|
+
@click.option('--words', is_flag=True, help='Include word-level timestamps')
|
25
|
+
@click.option(
|
26
|
+
'--backend',
|
27
|
+
type=click.Choice(['openai', 'whisperx', 'whisper-cpp']),
|
28
|
+
default='openai',
|
29
|
+
help='Transcription backend to use'
|
30
|
+
)
|
31
|
+
@click.option(
|
32
|
+
'--format',
|
33
|
+
'output_format',
|
34
|
+
type=click.Choice(['json', 'srt', 'vtt', 'csv']),
|
35
|
+
default='json',
|
36
|
+
help='Output format for transcription'
|
37
|
+
)
|
38
|
+
@click.option('--output', '-o', type=click.Path(), help='Save transcription to file')
|
39
|
+
@click.option('--language', help='Language code (e.g., en, es, fr)')
|
40
|
+
@click.option('--model', default='whisper-1', help='Model to use (for OpenAI backend)')
|
41
|
+
def audio_command(
|
42
|
+
audio_file: str,
|
43
|
+
words: bool,
|
44
|
+
backend: str,
|
45
|
+
output_format: str,
|
46
|
+
output: Optional[str],
|
47
|
+
language: Optional[str],
|
48
|
+
model: str
|
49
|
+
):
|
50
|
+
"""
|
51
|
+
Transcribe audio with optional word-level timestamps.
|
52
|
+
|
53
|
+
Examples:
|
54
|
+
|
55
|
+
voice-mode transcribe audio recording.mp3
|
56
|
+
|
57
|
+
voice-mode transcribe audio interview.wav --words
|
58
|
+
|
59
|
+
voice-mode transcribe audio podcast.mp3 --words --format srt -o subtitles.srt
|
60
|
+
|
61
|
+
voice-mode transcribe audio spanish.mp3 --language es --backend whisperx
|
62
|
+
"""
|
63
|
+
async def run():
|
64
|
+
# Perform transcription
|
65
|
+
result = await transcribe_audio(
|
66
|
+
audio_file=audio_file,
|
67
|
+
word_timestamps=words,
|
68
|
+
backend=TranscriptionBackend(backend),
|
69
|
+
output_format=OutputFormat(output_format),
|
70
|
+
language=language,
|
71
|
+
model=model
|
72
|
+
)
|
73
|
+
|
74
|
+
# Check for errors
|
75
|
+
if not result.get("success", False):
|
76
|
+
error_msg = result.get("error", "Unknown error occurred")
|
77
|
+
click.echo(f"Error: {error_msg}", err=True)
|
78
|
+
return
|
79
|
+
|
80
|
+
# Format output
|
81
|
+
if output_format == 'json':
|
82
|
+
# Remove internal fields for cleaner output
|
83
|
+
output_result = {k: v for k, v in result.items()
|
84
|
+
if k not in ['formatted_content']}
|
85
|
+
content = json.dumps(output_result, indent=2)
|
86
|
+
elif "formatted_content" in result:
|
87
|
+
content = result["formatted_content"]
|
88
|
+
else:
|
89
|
+
# Fallback to JSON if format conversion failed
|
90
|
+
content = json.dumps(result, indent=2)
|
91
|
+
|
92
|
+
# Write output
|
93
|
+
if output:
|
94
|
+
Path(output).write_text(content)
|
95
|
+
click.echo(f"Transcription saved to {output}")
|
96
|
+
else:
|
97
|
+
click.echo(content)
|
98
|
+
|
99
|
+
# Run async function
|
100
|
+
asyncio.run(run())
|
101
|
+
|
102
|
+
|
103
|
+
# For backward compatibility, also provide a direct command
|
104
|
+
@click.command('transcribe-audio')
|
105
|
+
@click.argument('audio_file', type=click.Path(exists=True))
|
106
|
+
@click.option('--words', is_flag=True, help='Include word-level timestamps')
|
107
|
+
@click.option(
|
108
|
+
'--backend',
|
109
|
+
type=click.Choice(['openai', 'whisperx', 'whisper-cpp']),
|
110
|
+
default='openai',
|
111
|
+
help='Transcription backend'
|
112
|
+
)
|
113
|
+
@click.option(
|
114
|
+
'--format',
|
115
|
+
'output_format',
|
116
|
+
type=click.Choice(['json', 'srt', 'vtt', 'csv']),
|
117
|
+
default='json',
|
118
|
+
help='Output format'
|
119
|
+
)
|
120
|
+
@click.option('--output', '-o', type=click.Path(), help='Save to file')
|
121
|
+
@click.option('--language', help='Language code')
|
122
|
+
@click.option('--model', default='whisper-1', help='Model to use')
|
123
|
+
def transcribe_audio_command(
|
124
|
+
audio_file: str,
|
125
|
+
words: bool,
|
126
|
+
backend: str,
|
127
|
+
output_format: str,
|
128
|
+
output: Optional[str],
|
129
|
+
language: Optional[str],
|
130
|
+
model: str
|
131
|
+
):
|
132
|
+
"""Direct transcription command for backward compatibility."""
|
133
|
+
audio_command.callback(
|
134
|
+
audio_file=audio_file,
|
135
|
+
words=words,
|
136
|
+
backend=backend,
|
137
|
+
output_format=output_format,
|
138
|
+
output=output,
|
139
|
+
language=language,
|
140
|
+
model=model
|
141
|
+
)
|
@@ -15,21 +15,66 @@ from datetime import datetime
|
|
15
15
|
|
16
16
|
# ==================== ENVIRONMENT CONFIGURATION ====================
|
17
17
|
|
18
|
+
def find_voicemode_env_files() -> list[Path]:
|
19
|
+
"""
|
20
|
+
Find .voicemode.env files by walking up the directory tree.
|
21
|
+
|
22
|
+
Looks for (in order of priority - closest to current directory wins):
|
23
|
+
1. .voicemode.env in current or parent directories
|
24
|
+
2. .voicemode/voicemode.env in current or parent directories
|
25
|
+
3. ~/.voicemode/voicemode.env in user home (global config)
|
26
|
+
|
27
|
+
Returns:
|
28
|
+
List of Path objects in loading order (global first, then project-specific)
|
29
|
+
"""
|
30
|
+
config_files = []
|
31
|
+
|
32
|
+
# First add global config (lowest priority - loaded first)
|
33
|
+
global_config = Path.home() / ".voicemode" / "voicemode.env"
|
34
|
+
|
35
|
+
# Backwards compatibility: check for old filename
|
36
|
+
if not global_config.exists():
|
37
|
+
old_global = Path.home() / ".voicemode" / ".voicemode.env"
|
38
|
+
if old_global.exists():
|
39
|
+
global_config = old_global
|
40
|
+
|
41
|
+
if global_config.exists():
|
42
|
+
config_files.append(global_config)
|
43
|
+
|
44
|
+
# Then walk up directory tree for project-specific configs (higher priority)
|
45
|
+
current_dir = Path.cwd()
|
46
|
+
project_configs = []
|
47
|
+
|
48
|
+
while current_dir != current_dir.parent:
|
49
|
+
# Check for standalone .voicemode.env first
|
50
|
+
standalone_file = current_dir / ".voicemode.env"
|
51
|
+
if standalone_file.exists():
|
52
|
+
project_configs.append(standalone_file)
|
53
|
+
break # Stop at first found (closest wins)
|
54
|
+
|
55
|
+
# Then check .voicemode/voicemode.env
|
56
|
+
dir_file = current_dir / ".voicemode" / "voicemode.env"
|
57
|
+
# Skip if this is the global config file (already added)
|
58
|
+
if dir_file.exists() and dir_file != global_config:
|
59
|
+
project_configs.append(dir_file)
|
60
|
+
break # Stop at first found (closest wins)
|
61
|
+
|
62
|
+
current_dir = current_dir.parent
|
63
|
+
|
64
|
+
# Add project configs (they were collected closest-first, so add as-is)
|
65
|
+
config_files.extend(project_configs)
|
66
|
+
|
67
|
+
return config_files
|
68
|
+
|
69
|
+
|
18
70
|
def load_voicemode_env():
|
19
|
-
"""Load configuration from voicemode.env
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
if old_path.exists():
|
27
|
-
config_path = old_path
|
28
|
-
print(f"Warning: Using deprecated .voicemode.env - please rename to voicemode.env")
|
29
|
-
|
30
|
-
if not config_path.exists():
|
31
|
-
# Create default template
|
32
|
-
config_path.parent.mkdir(parents=True, exist_ok=True)
|
71
|
+
"""Load configuration from voicemode.env files, with cascading from global to project-specific."""
|
72
|
+
config_files = find_voicemode_env_files()
|
73
|
+
|
74
|
+
# If no config files found, create default global config
|
75
|
+
if not config_files:
|
76
|
+
default_path = Path.home() / ".voicemode" / "voicemode.env"
|
77
|
+
default_path.parent.mkdir(parents=True, exist_ok=True)
|
33
78
|
default_config = '''# Voice Mode Configuration File
|
34
79
|
# This file is automatically generated and can be customized
|
35
80
|
# Environment variables always take precedence over this file
|
@@ -66,8 +111,8 @@ def load_voicemode_env():
|
|
66
111
|
# Comma-separated list of STT endpoints
|
67
112
|
# VOICEMODE_STT_BASE_URLS=http://127.0.0.1:2022/v1,https://api.openai.com/v1
|
68
113
|
|
69
|
-
# Comma-separated list of preferred voices
|
70
|
-
#
|
114
|
+
# Comma-separated list of preferred voices
|
115
|
+
# VOICEMODE_VOICES=af_sky,alloy
|
71
116
|
|
72
117
|
# Comma-separated list of preferred models
|
73
118
|
# VOICEMODE_TTS_MODELS=tts-1,tts-1-hd,gpt-4o-mini-tts
|
@@ -127,26 +172,28 @@ def load_voicemode_env():
|
|
127
172
|
# LIVEKIT_API_KEY=devkey
|
128
173
|
# LIVEKIT_API_SECRET=secret
|
129
174
|
'''
|
130
|
-
with open(
|
175
|
+
with open(default_path, 'w') as f:
|
131
176
|
f.write(default_config)
|
132
|
-
os.chmod(
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
177
|
+
os.chmod(default_path, 0o600) # Secure permissions
|
178
|
+
config_files = [default_path]
|
179
|
+
|
180
|
+
# Load configuration from all files in order (global first, project-specific last)
|
181
|
+
for config_path in config_files:
|
182
|
+
if config_path.exists():
|
183
|
+
with open(config_path, 'r') as f:
|
184
|
+
for line in f:
|
185
|
+
line = line.strip()
|
186
|
+
# Skip comments and empty lines
|
187
|
+
if not line or line.startswith('#'):
|
188
|
+
continue
|
189
|
+
# Parse KEY=VALUE format
|
190
|
+
if '=' in line:
|
191
|
+
key, value = line.split('=', 1)
|
192
|
+
key = key.strip()
|
193
|
+
value = value.strip()
|
194
|
+
# Only set if not already in environment (env vars take precedence)
|
195
|
+
if key and key not in os.environ:
|
196
|
+
os.environ[key] = value
|
150
197
|
|
151
198
|
# Load configuration file before other configuration
|
152
199
|
load_voicemode_env()
|
@@ -222,13 +269,68 @@ def parse_comma_list(env_var: str, fallback: str) -> list:
|
|
222
269
|
# New provider endpoint lists configuration
|
223
270
|
TTS_BASE_URLS = parse_comma_list("VOICEMODE_TTS_BASE_URLS", "http://127.0.0.1:8880/v1,https://api.openai.com/v1")
|
224
271
|
STT_BASE_URLS = parse_comma_list("VOICEMODE_STT_BASE_URLS", "http://127.0.0.1:2022/v1,https://api.openai.com/v1")
|
225
|
-
TTS_VOICES = parse_comma_list("
|
272
|
+
TTS_VOICES = parse_comma_list("VOICEMODE_VOICES", "af_sky,alloy")
|
226
273
|
TTS_MODELS = parse_comma_list("VOICEMODE_TTS_MODELS", "tts-1,tts-1-hd,gpt-4o-mini-tts")
|
227
274
|
|
275
|
+
# Voice preferences cache
|
276
|
+
_cached_voice_preferences: Optional[list] = None
|
277
|
+
_voice_preferences_loaded = False
|
278
|
+
|
279
|
+
def get_voice_preferences() -> list[str]:
|
280
|
+
"""
|
281
|
+
Get voice preferences from configuration.
|
282
|
+
|
283
|
+
Uses the VOICEMODE_VOICES configuration which is loaded from:
|
284
|
+
1. Environment variables (highest priority)
|
285
|
+
2. Project-specific .voicemode.env files
|
286
|
+
3. Global ~/.voicemode/voicemode.env file
|
287
|
+
4. Built-in defaults
|
288
|
+
|
289
|
+
Returns:
|
290
|
+
List of voice names in preference order
|
291
|
+
"""
|
292
|
+
global _cached_voice_preferences, _voice_preferences_loaded
|
293
|
+
|
294
|
+
# Return cached preferences if already loaded
|
295
|
+
if _voice_preferences_loaded:
|
296
|
+
return _cached_voice_preferences or []
|
297
|
+
|
298
|
+
_voice_preferences_loaded = True
|
299
|
+
|
300
|
+
# Get voices from TTS_VOICES configuration
|
301
|
+
_cached_voice_preferences = TTS_VOICES.copy()
|
302
|
+
|
303
|
+
logger.info(f"Voice preferences loaded: {_cached_voice_preferences}")
|
304
|
+
return _cached_voice_preferences
|
305
|
+
|
306
|
+
def clear_voice_preferences_cache():
|
307
|
+
"""Clear the voice preferences cache, forcing a reload on next access."""
|
308
|
+
global _cached_voice_preferences, _voice_preferences_loaded
|
309
|
+
_cached_voice_preferences = None
|
310
|
+
_voice_preferences_loaded = False
|
311
|
+
logger.debug("Voice preferences cache cleared")
|
312
|
+
|
313
|
+
def reload_configuration():
|
314
|
+
"""Reload configuration from files and clear all caches."""
|
315
|
+
# Clear voice preferences cache
|
316
|
+
clear_voice_preferences_cache()
|
317
|
+
|
318
|
+
# Reload environment configuration
|
319
|
+
load_voicemode_env()
|
320
|
+
|
321
|
+
# Update global configuration variables
|
322
|
+
global TTS_VOICES, TTS_MODELS, TTS_BASE_URLS, STT_BASE_URLS
|
323
|
+
TTS_BASE_URLS = parse_comma_list("VOICEMODE_TTS_BASE_URLS", "http://127.0.0.1:8880/v1,https://api.openai.com/v1")
|
324
|
+
STT_BASE_URLS = parse_comma_list("VOICEMODE_STT_BASE_URLS", "http://127.0.0.1:2022/v1,https://api.openai.com/v1")
|
325
|
+
TTS_VOICES = parse_comma_list("VOICEMODE_VOICES", "af_sky,alloy")
|
326
|
+
TTS_MODELS = parse_comma_list("VOICEMODE_TTS_MODELS", "tts-1,tts-1-hd,gpt-4o-mini-tts")
|
327
|
+
|
328
|
+
logger.info("Configuration reloaded successfully")
|
329
|
+
|
228
330
|
# Legacy variables have been removed - use the new list-based configuration:
|
229
331
|
# - VOICEMODE_TTS_BASE_URLS (comma-separated list)
|
230
332
|
# - VOICEMODE_STT_BASE_URLS (comma-separated list)
|
231
|
-
# -
|
333
|
+
# - VOICEMODE_VOICES (comma-separated list)
|
232
334
|
# - VOICEMODE_TTS_MODELS (comma-separated list)
|
233
335
|
|
234
336
|
# LiveKit configuration
|
@@ -9,9 +9,8 @@ import logging
|
|
9
9
|
from typing import Dict, Optional, List, Any, Tuple
|
10
10
|
from openai import AsyncOpenAI
|
11
11
|
|
12
|
-
from .config import TTS_VOICES, TTS_MODELS, TTS_BASE_URLS, OPENAI_API_KEY
|
12
|
+
from .config import TTS_VOICES, TTS_MODELS, TTS_BASE_URLS, OPENAI_API_KEY, get_voice_preferences
|
13
13
|
from .provider_discovery import provider_registry, EndpointInfo
|
14
|
-
from .voice_preferences import get_preferred_voices
|
15
14
|
|
16
15
|
logger = logging.getLogger("voice-mode")
|
17
16
|
|
@@ -68,14 +67,14 @@ async def get_tts_client_and_voice(
|
|
68
67
|
return client, selected_voice, selected_model, endpoint_info
|
69
68
|
|
70
69
|
# Voice-first selection algorithm
|
71
|
-
# Get user preferences
|
72
|
-
|
73
|
-
combined_voice_list =
|
70
|
+
# Get user preferences from configuration
|
71
|
+
voice_preferences = get_voice_preferences()
|
72
|
+
combined_voice_list = voice_preferences
|
74
73
|
|
75
74
|
logger.info(f"TTS Provider Selection (voice-first)")
|
76
|
-
if
|
77
|
-
logger.info(f"
|
78
|
-
logger.info(f"
|
75
|
+
if voice_preferences:
|
76
|
+
logger.info(f" Voice preferences: {voice_preferences}")
|
77
|
+
logger.info(f" Voice list: {combined_voice_list}")
|
79
78
|
logger.info(f" Preferred models: {TTS_MODELS}")
|
80
79
|
logger.info(f" Available endpoints: {TTS_BASE_URLS}")
|
81
80
|
|
@@ -267,7 +267,7 @@ async def environment_variables() -> str:
|
|
267
267
|
("VOICEMODE_AUTO_START_KOKORO", "Auto-start Kokoro service (true/false)"),
|
268
268
|
("VOICEMODE_TTS_BASE_URLS", "Comma-separated list of TTS endpoints"),
|
269
269
|
("VOICEMODE_STT_BASE_URLS", "Comma-separated list of STT endpoints"),
|
270
|
-
("
|
270
|
+
("VOICEMODE_VOICES", "Comma-separated list of preferred voices"),
|
271
271
|
("VOICEMODE_TTS_MODELS", "Comma-separated list of preferred models"),
|
272
272
|
# Audio Settings
|
273
273
|
("VOICEMODE_AUDIO_FORMAT", "Audio format for recording (pcm/mp3/wav/flac/aac/opus)"),
|
@@ -358,7 +358,7 @@ async def environment_template() -> str:
|
|
358
358
|
f"export VOICEMODE_AUTO_START_KOKORO=\"{str(AUTO_START_KOKORO).lower()}\"",
|
359
359
|
f"export VOICEMODE_TTS_BASE_URLS=\"{','.join(TTS_BASE_URLS)}\"",
|
360
360
|
f"export VOICEMODE_STT_BASE_URLS=\"{','.join(STT_BASE_URLS)}\"",
|
361
|
-
f"export
|
361
|
+
f"export VOICEMODE_VOICES=\"{','.join(TTS_VOICES)}\"",
|
362
362
|
f"export VOICEMODE_TTS_MODELS=\"{','.join(TTS_MODELS)}\"",
|
363
363
|
"",
|
364
364
|
"# Audio Settings",
|