voice-mode 3.34.3__tar.gz → 4.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. {voice_mode-3.34.3 → voice_mode-4.0.1}/.gitignore +6 -0
  2. {voice_mode-3.34.3 → voice_mode-4.0.1}/CHANGELOG.md +43 -0
  3. {voice_mode-3.34.3 → voice_mode-4.0.1}/PKG-INFO +5 -2
  4. {voice_mode-3.34.3 → voice_mode-4.0.1}/pyproject.toml +61 -3
  5. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/__version__.py +1 -1
  6. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/cli.py +5 -0
  7. voice_mode-4.0.1/voice_mode/cli_commands/transcribe.py +141 -0
  8. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/config.py +139 -37
  9. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/providers.py +7 -8
  10. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/resources/configuration.py +2 -2
  11. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/configuration_management.py +106 -5
  12. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/converse.py +98 -0
  13. voice_mode-4.0.1/voice_mode/tools/transcription/__init__.py +14 -0
  14. voice_mode-4.0.1/voice_mode/tools/transcription/backends.py +287 -0
  15. voice_mode-4.0.1/voice_mode/tools/transcription/core.py +136 -0
  16. voice_mode-4.0.1/voice_mode/tools/transcription/formats.py +144 -0
  17. voice_mode-4.0.1/voice_mode/tools/transcription/types.py +52 -0
  18. voice_mode-3.34.3/voice_mode/voice_preferences.py +0 -125
  19. {voice_mode-3.34.3 → voice_mode-4.0.1}/README.md +0 -0
  20. {voice_mode-3.34.3 → voice_mode-4.0.1}/build_hooks.py +0 -0
  21. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/__init__.py +0 -0
  22. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/__main__.py +0 -0
  23. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/cli_commands/__init__.py +0 -0
  24. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/cli_commands/exchanges.py +0 -0
  25. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/conversation_logger.py +0 -0
  26. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/core.py +0 -0
  27. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/data/versions.json +0 -0
  28. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/exchanges/__init__.py +0 -0
  29. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/exchanges/conversations.py +0 -0
  30. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/exchanges/filters.py +0 -0
  31. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/exchanges/formatters.py +0 -0
  32. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/exchanges/models.py +0 -0
  33. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/exchanges/reader.py +0 -0
  34. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/exchanges/stats.py +0 -0
  35. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/README.md +0 -0
  36. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/app/api/connection-details/route.ts +0 -0
  37. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/app/favicon.ico +0 -0
  38. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/app/globals.css +0 -0
  39. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/app/layout.tsx +0 -0
  40. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/app/page.tsx +0 -0
  41. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/components/CloseIcon.tsx +0 -0
  42. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/components/NoAgentNotification.tsx +0 -0
  43. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/components/TranscriptionView.tsx +0 -0
  44. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/hooks/useCombinedTranscriptions.ts +0 -0
  45. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/hooks/useLocalMicTrack.ts +0 -0
  46. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/next-env.d.ts +0 -0
  47. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/next.config.mjs +0 -0
  48. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/package-lock.json +0 -0
  49. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/package.json +0 -0
  50. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/pnpm-lock.yaml +0 -0
  51. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/postcss.config.mjs +0 -0
  52. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/tailwind.config.ts +0 -0
  53. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/frontend/tsconfig.json +0 -0
  54. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/prompts/README.md +0 -0
  55. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/prompts/__init__.py +0 -0
  56. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/prompts/converse.py +0 -0
  57. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/prompts/release_notes.py +0 -0
  58. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/prompts/services.py +0 -0
  59. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/provider_discovery.py +0 -0
  60. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/resources/__init__.py +0 -0
  61. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/resources/audio_files.py +0 -0
  62. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/resources/changelog.py +0 -0
  63. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/resources/statistics.py +0 -0
  64. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/resources/version.py +0 -0
  65. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/resources/whisper_models.py +0 -0
  66. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/server.py +0 -0
  67. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/shared.py +0 -0
  68. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/simple_failover.py +0 -0
  69. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/statistics.py +0 -0
  70. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/streaming.py +0 -0
  71. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/__init__.py +0 -0
  72. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/launchd/com.voicemode.frontend.plist +0 -0
  73. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/launchd/com.voicemode.kokoro.plist +0 -0
  74. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/launchd/com.voicemode.livekit.plist +0 -0
  75. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/launchd/com.voicemode.whisper.plist +0 -0
  76. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/launchd/start-kokoro-with-health-check.sh +0 -0
  77. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/launchd/start-whisper-with-health-check.sh +0 -0
  78. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/scripts/__init__.py +0 -0
  79. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/scripts/start-whisper-server.sh +0 -0
  80. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/systemd/voicemode-frontend.service +0 -0
  81. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/systemd/voicemode-kokoro.service +0 -0
  82. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/systemd/voicemode-livekit.service +0 -0
  83. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/templates/systemd/voicemode-whisper.service +0 -0
  84. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/__init__.py +0 -0
  85. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/dependencies.py +0 -0
  86. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/devices.py +0 -0
  87. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/diagnostics.py +0 -0
  88. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/providers.py +0 -0
  89. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/service.py +0 -0
  90. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/kokoro/install.py +0 -0
  91. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/kokoro/uninstall.py +0 -0
  92. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/list_versions.py +0 -0
  93. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/livekit/__init__.py +0 -0
  94. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/livekit/frontend.py +0 -0
  95. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/livekit/install.py +0 -0
  96. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/livekit/production_server.py +0 -0
  97. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/livekit/uninstall.py +0 -0
  98. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/version_info.py +0 -0
  99. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/whisper/__init__.py +0 -0
  100. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/whisper/install.py +0 -0
  101. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/whisper/list_models.py +0 -0
  102. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/whisper/model_active.py +0 -0
  103. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/whisper/model_benchmark.py +0 -0
  104. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/whisper/model_install.py +0 -0
  105. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/whisper/model_remove.py +0 -0
  106. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/whisper/models.py +0 -0
  107. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/services/whisper/uninstall.py +0 -0
  108. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/statistics.py +0 -0
  109. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/tools/voice_registry.py +0 -0
  110. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/__init__.py +0 -0
  111. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/audio_diagnostics.py +0 -0
  112. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/event_logger.py +0 -0
  113. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/ffmpeg_check.py +0 -0
  114. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/format_migration.py +0 -0
  115. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/gpu_detection.py +0 -0
  116. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/migration_helpers.py +0 -0
  117. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/services/common.py +0 -0
  118. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/services/coreml_setup.py +0 -0
  119. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/services/kokoro_helpers.py +0 -0
  120. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/services/livekit_helpers.py +0 -0
  121. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/services/whisper_helpers.py +0 -0
  122. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/services/whisper_version.py +0 -0
  123. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/utils/version_helpers.py +0 -0
  124. {voice_mode-3.34.3 → voice_mode-4.0.1}/voice_mode/version.py +0 -0
@@ -117,3 +117,9 @@ models/
117
117
  *.mlpackage/
118
118
  *.mlmodel
119
119
  *.mlmodelc/
120
+
121
+ # Coverage reports
122
+ htmlcov/
123
+ .coverage
124
+ .coverage.*
125
+ coverage.xml
@@ -7,6 +7,49 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [4.0.1] - 2025-09-01
11
+
12
+ ### Removed
13
+ - Removed `whisperx` optional dependency to fix PyPI upload compatibility
14
+ - The dependency was specified as a Git URL which is not allowed for PyPI packages
15
+ - WhisperX functionality was recently added and not essential for core features
16
+
17
+ ## [4.0.0] - 2025-08-31
18
+
19
+ ### BREAKING CHANGES
20
+ - **Unified voice configuration system**
21
+ - **BREAKING**: Replaced `.voices.txt` files with unified `.voicemode.env` configuration
22
+ - Changed environment variable from `VOICEMODE_TTS_VOICES` to `VOICEMODE_VOICES` for simplicity
23
+ - Implemented cascading configuration: env vars > project configs > global config
24
+ - Added directory tree walking for project-specific configuration discovery
25
+ - Supports runtime configuration reloading via MCP tools
26
+ - **Migration Required**: Users must migrate from `.voices.txt` to `.voicemode.env` with `VOICEMODE_VOICES=voice1,voice2` format
27
+
28
+ ### Added
29
+
30
+ - **Comprehensive test coverage reporting system**
31
+ - Integration with pytest-cov for coverage measurement
32
+ - HTML coverage reports generated in htmlcov/ directory
33
+ - Coverage badges and metrics for monitoring code quality
34
+ - Automated coverage reporting in CI/CD pipeline
35
+
36
+ - **Word-level timestamps for transcription**
37
+ - Enhanced transcription output with word-level timing information
38
+ - Support for SubRip (SRT) format output with precise word timestamps
39
+ - New transcription CLI command for processing audio files
40
+ - Comprehensive transcription backend supporting multiple formats
41
+ - Word timing data available for improved accessibility and analysis
42
+
43
+ - **Enhanced voice selection guide**
44
+ - Comprehensive documentation for voice selection across different providers
45
+ - Clear migration instructions from old `.voices.txt` system
46
+
47
+ ### Removed
48
+ - **Legacy voice preference system**
49
+ - Removed 578 lines of old `voice_preferences.py` system
50
+ - Eliminated unreliable `.voices.txt` file parsing
51
+ - Removed associated test files for deprecated voice preference system
52
+
10
53
  ## [3.34.3] - 2025-08-26
11
54
 
12
55
  ### Changed
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: voice-mode
3
- Version: 3.34.3
3
+ Version: 4.0.1
4
4
  Summary: VoiceMode - Voice interaction capabilities for AI assistants (formerly voice-mcp)
5
5
  Project-URL: Homepage, https://github.com/mbailey/voicemode
6
6
  Project-URL: Repository, https://github.com/mbailey/voicemode
@@ -66,9 +66,12 @@ Requires-Dist: pandas>=2.0.0; extra == 'notebooks'
66
66
  Provides-Extra: scripts
67
67
  Requires-Dist: flask>=3.0.0; extra == 'scripts'
68
68
  Provides-Extra: test
69
+ Requires-Dist: coverage[toml]>=7.4.0; extra == 'test'
69
70
  Requires-Dist: pytest-asyncio>=0.21.0; extra == 'test'
70
- Requires-Dist: pytest-cov>=4.0.0; extra == 'test'
71
+ Requires-Dist: pytest-cov>=4.1.0; extra == 'test'
71
72
  Requires-Dist: pytest-mock>=3.10.0; extra == 'test'
73
+ Requires-Dist: pytest-timeout>=2.2.0; extra == 'test'
74
+ Requires-Dist: pytest-xdist>=3.5.0; extra == 'test'
72
75
  Requires-Dist: pytest>=7.0.0; extra == 'test'
73
76
  Description-Content-Type: text/markdown
74
77
 
@@ -67,8 +67,11 @@ dev = [
67
67
  test = [
68
68
  "pytest>=7.0.0",
69
69
  "pytest-asyncio>=0.21.0",
70
- "pytest-cov>=4.0.0",
70
+ "pytest-cov>=4.1.0",
71
71
  "pytest-mock>=3.10.0",
72
+ "pytest-xdist>=3.5.0", # For parallel testing
73
+ "pytest-timeout>=2.2.0", # For test timeouts
74
+ "coverage[toml]>=7.4.0",
72
75
  ]
73
76
  notebooks = [
74
77
  "gradio>=4.0.0",
@@ -144,10 +147,65 @@ exclude = [
144
147
  [tool.hatch.version]
145
148
  path = "voice_mode/__version__.py"
146
149
 
150
+ [tool.hatch.metadata]
151
+ allow-direct-references = true
152
+
147
153
  [tool.pytest.ini_options]
154
+ minversion = "7.0"
148
155
  testpaths = ["tests"]
149
156
  python_files = "test_*.py"
150
157
  python_classes = "Test*"
151
158
  python_functions = "test_*"
152
- # Exclude manual test directory
153
- addopts = "--ignore=tests/manual"
159
+ asyncio_mode = "auto"
160
+ addopts = [
161
+ "-ra",
162
+ "--strict-markers",
163
+ "--strict-config",
164
+ "--ignore=tests/manual",
165
+ "--cov=voice_mode",
166
+ "--cov-branch",
167
+ "--cov-report=term-missing:skip-covered",
168
+ "--cov-report=html",
169
+ "--cov-report=xml",
170
+ ]
171
+ markers = [
172
+ "unit: Unit tests (fast, isolated)",
173
+ "integration: Integration tests (may interact with services)",
174
+ "slow: Tests that take > 1s",
175
+ "manual: Manual tests requiring human interaction",
176
+ ]
177
+ filterwarnings = [
178
+ "ignore::DeprecationWarning",
179
+ ]
180
+
181
+ [tool.coverage.run]
182
+ source = ["voice_mode"]
183
+ branch = true
184
+ parallel = true
185
+ omit = [
186
+ "*/tests/*",
187
+ "*/test_*.py",
188
+ "*/__pycache__/*",
189
+ "*/site-packages/*",
190
+ "test-env/*",
191
+ ]
192
+
193
+ [tool.coverage.report]
194
+ exclude_lines = [
195
+ "pragma: no cover",
196
+ "def __repr__",
197
+ "if TYPE_CHECKING:",
198
+ "raise NotImplementedError",
199
+ "if __name__ == .__main__.:",
200
+ "@abstractmethod",
201
+ "except ImportError:",
202
+ ]
203
+ precision = 2
204
+ skip_covered = true
205
+ show_missing = true
206
+
207
+ [tool.coverage.html]
208
+ directory = "htmlcov"
209
+
210
+ [tool.coverage.xml]
211
+ output = "coverage.xml"
@@ -1,3 +1,3 @@
1
1
  # This file is automatically updated by 'make release'
2
2
  # Do not edit manually
3
- __version__ = "3.34.3"
3
+ __version__ = "4.0.1"
@@ -1359,13 +1359,18 @@ def cli():
1359
1359
 
1360
1360
  # Import subcommand groups
1361
1361
  from voice_mode.cli_commands import exchanges as exchanges_cmd
1362
+ from voice_mode.cli_commands import transcribe as transcribe_cmd
1362
1363
 
1363
1364
  # Add subcommands to legacy CLI
1364
1365
  cli.add_command(exchanges_cmd.exchanges)
1366
+ cli.add_command(transcribe_cmd.transcribe)
1365
1367
 
1366
1368
  # Add exchanges to main CLI
1367
1369
  voice_mode_main_cli.add_command(exchanges_cmd.exchanges)
1368
1370
 
1371
+ # Add transcribe to main CLI
1372
+ voice_mode_main_cli.add_command(transcribe_cmd.transcribe)
1373
+
1369
1374
 
1370
1375
  # Converse command - direct voice conversation from CLI
1371
1376
  @voice_mode_main_cli.command()
@@ -0,0 +1,141 @@
1
+ """CLI command for audio transcription."""
2
+
3
+ import click
4
+ import json
5
+ import asyncio
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ from voice_mode.tools.transcription import (
10
+ transcribe_audio,
11
+ TranscriptionBackend,
12
+ OutputFormat
13
+ )
14
+
15
+
16
+ @click.group()
17
+ def transcribe():
18
+ """Audio transcription with word-level timestamps."""
19
+ pass
20
+
21
+
22
+ @transcribe.command("audio")
23
+ @click.argument('audio_file', type=click.Path(exists=True))
24
+ @click.option('--words', is_flag=True, help='Include word-level timestamps')
25
+ @click.option(
26
+ '--backend',
27
+ type=click.Choice(['openai', 'whisperx', 'whisper-cpp']),
28
+ default='openai',
29
+ help='Transcription backend to use'
30
+ )
31
+ @click.option(
32
+ '--format',
33
+ 'output_format',
34
+ type=click.Choice(['json', 'srt', 'vtt', 'csv']),
35
+ default='json',
36
+ help='Output format for transcription'
37
+ )
38
+ @click.option('--output', '-o', type=click.Path(), help='Save transcription to file')
39
+ @click.option('--language', help='Language code (e.g., en, es, fr)')
40
+ @click.option('--model', default='whisper-1', help='Model to use (for OpenAI backend)')
41
+ def audio_command(
42
+ audio_file: str,
43
+ words: bool,
44
+ backend: str,
45
+ output_format: str,
46
+ output: Optional[str],
47
+ language: Optional[str],
48
+ model: str
49
+ ):
50
+ """
51
+ Transcribe audio with optional word-level timestamps.
52
+
53
+ Examples:
54
+
55
+ voice-mode transcribe audio recording.mp3
56
+
57
+ voice-mode transcribe audio interview.wav --words
58
+
59
+ voice-mode transcribe audio podcast.mp3 --words --format srt -o subtitles.srt
60
+
61
+ voice-mode transcribe audio spanish.mp3 --language es --backend whisperx
62
+ """
63
+ async def run():
64
+ # Perform transcription
65
+ result = await transcribe_audio(
66
+ audio_file=audio_file,
67
+ word_timestamps=words,
68
+ backend=TranscriptionBackend(backend),
69
+ output_format=OutputFormat(output_format),
70
+ language=language,
71
+ model=model
72
+ )
73
+
74
+ # Check for errors
75
+ if not result.get("success", False):
76
+ error_msg = result.get("error", "Unknown error occurred")
77
+ click.echo(f"Error: {error_msg}", err=True)
78
+ return
79
+
80
+ # Format output
81
+ if output_format == 'json':
82
+ # Remove internal fields for cleaner output
83
+ output_result = {k: v for k, v in result.items()
84
+ if k not in ['formatted_content']}
85
+ content = json.dumps(output_result, indent=2)
86
+ elif "formatted_content" in result:
87
+ content = result["formatted_content"]
88
+ else:
89
+ # Fallback to JSON if format conversion failed
90
+ content = json.dumps(result, indent=2)
91
+
92
+ # Write output
93
+ if output:
94
+ Path(output).write_text(content)
95
+ click.echo(f"Transcription saved to {output}")
96
+ else:
97
+ click.echo(content)
98
+
99
+ # Run async function
100
+ asyncio.run(run())
101
+
102
+
103
+ # For backward compatibility, also provide a direct command
104
+ @click.command('transcribe-audio')
105
+ @click.argument('audio_file', type=click.Path(exists=True))
106
+ @click.option('--words', is_flag=True, help='Include word-level timestamps')
107
+ @click.option(
108
+ '--backend',
109
+ type=click.Choice(['openai', 'whisperx', 'whisper-cpp']),
110
+ default='openai',
111
+ help='Transcription backend'
112
+ )
113
+ @click.option(
114
+ '--format',
115
+ 'output_format',
116
+ type=click.Choice(['json', 'srt', 'vtt', 'csv']),
117
+ default='json',
118
+ help='Output format'
119
+ )
120
+ @click.option('--output', '-o', type=click.Path(), help='Save to file')
121
+ @click.option('--language', help='Language code')
122
+ @click.option('--model', default='whisper-1', help='Model to use')
123
+ def transcribe_audio_command(
124
+ audio_file: str,
125
+ words: bool,
126
+ backend: str,
127
+ output_format: str,
128
+ output: Optional[str],
129
+ language: Optional[str],
130
+ model: str
131
+ ):
132
+ """Direct transcription command for backward compatibility."""
133
+ audio_command.callback(
134
+ audio_file=audio_file,
135
+ words=words,
136
+ backend=backend,
137
+ output_format=output_format,
138
+ output=output,
139
+ language=language,
140
+ model=model
141
+ )
@@ -15,21 +15,66 @@ from datetime import datetime
15
15
 
16
16
  # ==================== ENVIRONMENT CONFIGURATION ====================
17
17
 
18
+ def find_voicemode_env_files() -> list[Path]:
19
+ """
20
+ Find .voicemode.env files by walking up the directory tree.
21
+
22
+ Looks for (in order of priority - closest to current directory wins):
23
+ 1. .voicemode.env in current or parent directories
24
+ 2. .voicemode/voicemode.env in current or parent directories
25
+ 3. ~/.voicemode/voicemode.env in user home (global config)
26
+
27
+ Returns:
28
+ List of Path objects in loading order (global first, then project-specific)
29
+ """
30
+ config_files = []
31
+
32
+ # First add global config (lowest priority - loaded first)
33
+ global_config = Path.home() / ".voicemode" / "voicemode.env"
34
+
35
+ # Backwards compatibility: check for old filename
36
+ if not global_config.exists():
37
+ old_global = Path.home() / ".voicemode" / ".voicemode.env"
38
+ if old_global.exists():
39
+ global_config = old_global
40
+
41
+ if global_config.exists():
42
+ config_files.append(global_config)
43
+
44
+ # Then walk up directory tree for project-specific configs (higher priority)
45
+ current_dir = Path.cwd()
46
+ project_configs = []
47
+
48
+ while current_dir != current_dir.parent:
49
+ # Check for standalone .voicemode.env first
50
+ standalone_file = current_dir / ".voicemode.env"
51
+ if standalone_file.exists():
52
+ project_configs.append(standalone_file)
53
+ break # Stop at first found (closest wins)
54
+
55
+ # Then check .voicemode/voicemode.env
56
+ dir_file = current_dir / ".voicemode" / "voicemode.env"
57
+ # Skip if this is the global config file (already added)
58
+ if dir_file.exists() and dir_file != global_config:
59
+ project_configs.append(dir_file)
60
+ break # Stop at first found (closest wins)
61
+
62
+ current_dir = current_dir.parent
63
+
64
+ # Add project configs (they were collected closest-first, so add as-is)
65
+ config_files.extend(project_configs)
66
+
67
+ return config_files
68
+
69
+
18
70
  def load_voicemode_env():
19
- """Load configuration from voicemode.env file if it exists, creating a default if not."""
20
- # Try new filename first
21
- config_path = Path.home() / ".voicemode" / "voicemode.env"
22
-
23
- # Backwards compatibility: check for old filename if new doesn't exist
24
- if not config_path.exists():
25
- old_path = Path.home() / ".voicemode" / ".voicemode.env"
26
- if old_path.exists():
27
- config_path = old_path
28
- print(f"Warning: Using deprecated .voicemode.env - please rename to voicemode.env")
29
-
30
- if not config_path.exists():
31
- # Create default template
32
- config_path.parent.mkdir(parents=True, exist_ok=True)
71
+ """Load configuration from voicemode.env files, with cascading from global to project-specific."""
72
+ config_files = find_voicemode_env_files()
73
+
74
+ # If no config files found, create default global config
75
+ if not config_files:
76
+ default_path = Path.home() / ".voicemode" / "voicemode.env"
77
+ default_path.parent.mkdir(parents=True, exist_ok=True)
33
78
  default_config = '''# Voice Mode Configuration File
34
79
  # This file is automatically generated and can be customized
35
80
  # Environment variables always take precedence over this file
@@ -66,8 +111,8 @@ def load_voicemode_env():
66
111
  # Comma-separated list of STT endpoints
67
112
  # VOICEMODE_STT_BASE_URLS=http://127.0.0.1:2022/v1,https://api.openai.com/v1
68
113
 
69
- # Comma-separated list of preferred voices
70
- # VOICEMODE_TTS_VOICES=af_sky,alloy
114
+ # Comma-separated list of preferred voices
115
+ # VOICEMODE_VOICES=af_sky,alloy
71
116
 
72
117
  # Comma-separated list of preferred models
73
118
  # VOICEMODE_TTS_MODELS=tts-1,tts-1-hd,gpt-4o-mini-tts
@@ -127,26 +172,28 @@ def load_voicemode_env():
127
172
  # LIVEKIT_API_KEY=devkey
128
173
  # LIVEKIT_API_SECRET=secret
129
174
  '''
130
- with open(config_path, 'w') as f:
175
+ with open(default_path, 'w') as f:
131
176
  f.write(default_config)
132
- os.chmod(config_path, 0o600) # Secure permissions
133
-
134
- # Load configuration from file
135
- if config_path.exists():
136
- with open(config_path, 'r') as f:
137
- for line in f:
138
- line = line.strip()
139
- # Skip comments and empty lines
140
- if not line or line.startswith('#'):
141
- continue
142
- # Parse KEY=VALUE format
143
- if '=' in line:
144
- key, value = line.split('=', 1)
145
- key = key.strip()
146
- value = value.strip()
147
- # Only set if not already in environment
148
- if key and key not in os.environ:
149
- os.environ[key] = value
177
+ os.chmod(default_path, 0o600) # Secure permissions
178
+ config_files = [default_path]
179
+
180
+ # Load configuration from all files in order (global first, project-specific last)
181
+ for config_path in config_files:
182
+ if config_path.exists():
183
+ with open(config_path, 'r') as f:
184
+ for line in f:
185
+ line = line.strip()
186
+ # Skip comments and empty lines
187
+ if not line or line.startswith('#'):
188
+ continue
189
+ # Parse KEY=VALUE format
190
+ if '=' in line:
191
+ key, value = line.split('=', 1)
192
+ key = key.strip()
193
+ value = value.strip()
194
+ # Only set if not already in environment (env vars take precedence)
195
+ if key and key not in os.environ:
196
+ os.environ[key] = value
150
197
 
151
198
  # Load configuration file before other configuration
152
199
  load_voicemode_env()
@@ -222,13 +269,68 @@ def parse_comma_list(env_var: str, fallback: str) -> list:
222
269
  # New provider endpoint lists configuration
223
270
  TTS_BASE_URLS = parse_comma_list("VOICEMODE_TTS_BASE_URLS", "http://127.0.0.1:8880/v1,https://api.openai.com/v1")
224
271
  STT_BASE_URLS = parse_comma_list("VOICEMODE_STT_BASE_URLS", "http://127.0.0.1:2022/v1,https://api.openai.com/v1")
225
- TTS_VOICES = parse_comma_list("VOICEMODE_TTS_VOICES", "af_sky,alloy")
272
+ TTS_VOICES = parse_comma_list("VOICEMODE_VOICES", "af_sky,alloy")
226
273
  TTS_MODELS = parse_comma_list("VOICEMODE_TTS_MODELS", "tts-1,tts-1-hd,gpt-4o-mini-tts")
227
274
 
275
+ # Voice preferences cache
276
+ _cached_voice_preferences: Optional[list] = None
277
+ _voice_preferences_loaded = False
278
+
279
+ def get_voice_preferences() -> list[str]:
280
+ """
281
+ Get voice preferences from configuration.
282
+
283
+ Uses the VOICEMODE_VOICES configuration which is loaded from:
284
+ 1. Environment variables (highest priority)
285
+ 2. Project-specific .voicemode.env files
286
+ 3. Global ~/.voicemode/voicemode.env file
287
+ 4. Built-in defaults
288
+
289
+ Returns:
290
+ List of voice names in preference order
291
+ """
292
+ global _cached_voice_preferences, _voice_preferences_loaded
293
+
294
+ # Return cached preferences if already loaded
295
+ if _voice_preferences_loaded:
296
+ return _cached_voice_preferences or []
297
+
298
+ _voice_preferences_loaded = True
299
+
300
+ # Get voices from TTS_VOICES configuration
301
+ _cached_voice_preferences = TTS_VOICES.copy()
302
+
303
+ logger.info(f"Voice preferences loaded: {_cached_voice_preferences}")
304
+ return _cached_voice_preferences
305
+
306
+ def clear_voice_preferences_cache():
307
+ """Clear the voice preferences cache, forcing a reload on next access."""
308
+ global _cached_voice_preferences, _voice_preferences_loaded
309
+ _cached_voice_preferences = None
310
+ _voice_preferences_loaded = False
311
+ logger.debug("Voice preferences cache cleared")
312
+
313
+ def reload_configuration():
314
+ """Reload configuration from files and clear all caches."""
315
+ # Clear voice preferences cache
316
+ clear_voice_preferences_cache()
317
+
318
+ # Reload environment configuration
319
+ load_voicemode_env()
320
+
321
+ # Update global configuration variables
322
+ global TTS_VOICES, TTS_MODELS, TTS_BASE_URLS, STT_BASE_URLS
323
+ TTS_BASE_URLS = parse_comma_list("VOICEMODE_TTS_BASE_URLS", "http://127.0.0.1:8880/v1,https://api.openai.com/v1")
324
+ STT_BASE_URLS = parse_comma_list("VOICEMODE_STT_BASE_URLS", "http://127.0.0.1:2022/v1,https://api.openai.com/v1")
325
+ TTS_VOICES = parse_comma_list("VOICEMODE_VOICES", "af_sky,alloy")
326
+ TTS_MODELS = parse_comma_list("VOICEMODE_TTS_MODELS", "tts-1,tts-1-hd,gpt-4o-mini-tts")
327
+
328
+ logger.info("Configuration reloaded successfully")
329
+
228
330
  # Legacy variables have been removed - use the new list-based configuration:
229
331
  # - VOICEMODE_TTS_BASE_URLS (comma-separated list)
230
332
  # - VOICEMODE_STT_BASE_URLS (comma-separated list)
231
- # - VOICEMODE_TTS_VOICES (comma-separated list)
333
+ # - VOICEMODE_VOICES (comma-separated list)
232
334
  # - VOICEMODE_TTS_MODELS (comma-separated list)
233
335
 
234
336
  # LiveKit configuration
@@ -9,9 +9,8 @@ import logging
9
9
  from typing import Dict, Optional, List, Any, Tuple
10
10
  from openai import AsyncOpenAI
11
11
 
12
- from .config import TTS_VOICES, TTS_MODELS, TTS_BASE_URLS, OPENAI_API_KEY
12
+ from .config import TTS_VOICES, TTS_MODELS, TTS_BASE_URLS, OPENAI_API_KEY, get_voice_preferences
13
13
  from .provider_discovery import provider_registry, EndpointInfo
14
- from .voice_preferences import get_preferred_voices
15
14
 
16
15
  logger = logging.getLogger("voice-mode")
17
16
 
@@ -68,14 +67,14 @@ async def get_tts_client_and_voice(
68
67
  return client, selected_voice, selected_model, endpoint_info
69
68
 
70
69
  # Voice-first selection algorithm
71
- # Get user preferences and prepend to system defaults
72
- user_preferences = get_preferred_voices()
73
- combined_voice_list = user_preferences + [v for v in TTS_VOICES if v not in user_preferences]
70
+ # Get user preferences from configuration
71
+ voice_preferences = get_voice_preferences()
72
+ combined_voice_list = voice_preferences
74
73
 
75
74
  logger.info(f"TTS Provider Selection (voice-first)")
76
- if user_preferences:
77
- logger.info(f" User voice preferences: {user_preferences}")
78
- logger.info(f" Combined voice list: {combined_voice_list}")
75
+ if voice_preferences:
76
+ logger.info(f" Voice preferences: {voice_preferences}")
77
+ logger.info(f" Voice list: {combined_voice_list}")
79
78
  logger.info(f" Preferred models: {TTS_MODELS}")
80
79
  logger.info(f" Available endpoints: {TTS_BASE_URLS}")
81
80
 
@@ -267,7 +267,7 @@ async def environment_variables() -> str:
267
267
  ("VOICEMODE_AUTO_START_KOKORO", "Auto-start Kokoro service (true/false)"),
268
268
  ("VOICEMODE_TTS_BASE_URLS", "Comma-separated list of TTS endpoints"),
269
269
  ("VOICEMODE_STT_BASE_URLS", "Comma-separated list of STT endpoints"),
270
- ("VOICEMODE_TTS_VOICES", "Comma-separated list of preferred voices"),
270
+ ("VOICEMODE_VOICES", "Comma-separated list of preferred voices"),
271
271
  ("VOICEMODE_TTS_MODELS", "Comma-separated list of preferred models"),
272
272
  # Audio Settings
273
273
  ("VOICEMODE_AUDIO_FORMAT", "Audio format for recording (pcm/mp3/wav/flac/aac/opus)"),
@@ -358,7 +358,7 @@ async def environment_template() -> str:
358
358
  f"export VOICEMODE_AUTO_START_KOKORO=\"{str(AUTO_START_KOKORO).lower()}\"",
359
359
  f"export VOICEMODE_TTS_BASE_URLS=\"{','.join(TTS_BASE_URLS)}\"",
360
360
  f"export VOICEMODE_STT_BASE_URLS=\"{','.join(STT_BASE_URLS)}\"",
361
- f"export VOICEMODE_TTS_VOICES=\"{','.join(TTS_VOICES)}\"",
361
+ f"export VOICEMODE_VOICES=\"{','.join(TTS_VOICES)}\"",
362
362
  f"export VOICEMODE_TTS_MODELS=\"{','.join(TTS_MODELS)}\"",
363
363
  "",
364
364
  "# Audio Settings",