voice-mode 3.34.3__tar.gz → 4.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. {voice_mode-3.34.3 → voice_mode-4.1.0}/.gitignore +6 -0
  2. {voice_mode-3.34.3 → voice_mode-4.1.0}/CHANGELOG.md +57 -0
  3. {voice_mode-3.34.3 → voice_mode-4.1.0}/PKG-INFO +5 -2
  4. {voice_mode-3.34.3 → voice_mode-4.1.0}/pyproject.toml +61 -3
  5. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/__version__.py +1 -1
  6. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/cli.py +8 -0
  7. voice_mode-4.1.0/voice_mode/cli_commands/pronounce_commands.py +223 -0
  8. voice_mode-4.1.0/voice_mode/cli_commands/transcribe.py +141 -0
  9. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/config.py +139 -37
  10. voice_mode-4.1.0/voice_mode/data/default_pronunciation.yaml +268 -0
  11. voice_mode-4.1.0/voice_mode/pronounce.py +397 -0
  12. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/providers.py +7 -8
  13. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/resources/configuration.py +2 -2
  14. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/configuration_management.py +106 -5
  15. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/converse.py +109 -0
  16. voice_mode-4.1.0/voice_mode/tools/pronounce.py +245 -0
  17. voice_mode-4.1.0/voice_mode/tools/transcription/__init__.py +14 -0
  18. voice_mode-4.1.0/voice_mode/tools/transcription/backends.py +287 -0
  19. voice_mode-4.1.0/voice_mode/tools/transcription/core.py +136 -0
  20. voice_mode-4.1.0/voice_mode/tools/transcription/formats.py +144 -0
  21. voice_mode-4.1.0/voice_mode/tools/transcription/types.py +52 -0
  22. voice_mode-3.34.3/voice_mode/voice_preferences.py +0 -125
  23. {voice_mode-3.34.3 → voice_mode-4.1.0}/README.md +0 -0
  24. {voice_mode-3.34.3 → voice_mode-4.1.0}/build_hooks.py +0 -0
  25. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/__init__.py +0 -0
  26. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/__main__.py +0 -0
  27. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/cli_commands/__init__.py +0 -0
  28. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/cli_commands/exchanges.py +0 -0
  29. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/conversation_logger.py +0 -0
  30. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/core.py +0 -0
  31. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/data/versions.json +0 -0
  32. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/exchanges/__init__.py +0 -0
  33. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/exchanges/conversations.py +0 -0
  34. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/exchanges/filters.py +0 -0
  35. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/exchanges/formatters.py +0 -0
  36. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/exchanges/models.py +0 -0
  37. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/exchanges/reader.py +0 -0
  38. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/exchanges/stats.py +0 -0
  39. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/frontend/README.md +0 -0
  40. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/frontend/app/api/connection-details/route.ts +0 -0
  41. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/frontend/app/favicon.ico +0 -0
  42. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/frontend/app/globals.css +0 -0
  43. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/frontend/app/layout.tsx +0 -0
  44. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/frontend/app/page.tsx +0 -0
  45. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/frontend/components/CloseIcon.tsx +0 -0
  46. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/frontend/components/NoAgentNotification.tsx +0 -0
  47. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/frontend/components/TranscriptionView.tsx +0 -0
  48. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/frontend/hooks/useCombinedTranscriptions.ts +0 -0
  49. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/frontend/hooks/useLocalMicTrack.ts +0 -0
  50. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/frontend/next-env.d.ts +0 -0
  51. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/frontend/next.config.mjs +0 -0
  52. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/frontend/package-lock.json +0 -0
  53. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/frontend/package.json +0 -0
  54. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/frontend/pnpm-lock.yaml +0 -0
  55. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/frontend/postcss.config.mjs +0 -0
  56. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/frontend/tailwind.config.ts +0 -0
  57. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/frontend/tsconfig.json +0 -0
  58. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/prompts/README.md +0 -0
  59. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/prompts/__init__.py +0 -0
  60. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/prompts/converse.py +0 -0
  61. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/prompts/release_notes.py +0 -0
  62. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/prompts/services.py +0 -0
  63. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/provider_discovery.py +0 -0
  64. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/resources/__init__.py +0 -0
  65. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/resources/audio_files.py +0 -0
  66. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/resources/changelog.py +0 -0
  67. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/resources/statistics.py +0 -0
  68. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/resources/version.py +0 -0
  69. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/resources/whisper_models.py +0 -0
  70. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/server.py +0 -0
  71. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/shared.py +0 -0
  72. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/simple_failover.py +0 -0
  73. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/statistics.py +0 -0
  74. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/streaming.py +0 -0
  75. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/templates/__init__.py +0 -0
  76. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/templates/launchd/com.voicemode.frontend.plist +0 -0
  77. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/templates/launchd/com.voicemode.kokoro.plist +0 -0
  78. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/templates/launchd/com.voicemode.livekit.plist +0 -0
  79. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/templates/launchd/com.voicemode.whisper.plist +0 -0
  80. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/templates/launchd/start-kokoro-with-health-check.sh +0 -0
  81. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/templates/launchd/start-whisper-with-health-check.sh +0 -0
  82. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/templates/scripts/__init__.py +0 -0
  83. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/templates/scripts/start-whisper-server.sh +0 -0
  84. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/templates/systemd/voicemode-frontend.service +0 -0
  85. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/templates/systemd/voicemode-kokoro.service +0 -0
  86. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/templates/systemd/voicemode-livekit.service +0 -0
  87. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/templates/systemd/voicemode-whisper.service +0 -0
  88. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/__init__.py +0 -0
  89. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/dependencies.py +0 -0
  90. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/devices.py +0 -0
  91. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/diagnostics.py +0 -0
  92. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/providers.py +0 -0
  93. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/service.py +0 -0
  94. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/services/kokoro/install.py +0 -0
  95. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/services/kokoro/uninstall.py +0 -0
  96. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/services/list_versions.py +0 -0
  97. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/services/livekit/__init__.py +0 -0
  98. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/services/livekit/frontend.py +0 -0
  99. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/services/livekit/install.py +0 -0
  100. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/services/livekit/production_server.py +0 -0
  101. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/services/livekit/uninstall.py +0 -0
  102. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/services/version_info.py +0 -0
  103. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/services/whisper/__init__.py +0 -0
  104. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/services/whisper/install.py +0 -0
  105. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/services/whisper/list_models.py +0 -0
  106. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/services/whisper/model_active.py +0 -0
  107. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/services/whisper/model_benchmark.py +0 -0
  108. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/services/whisper/model_install.py +0 -0
  109. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/services/whisper/model_remove.py +0 -0
  110. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/services/whisper/models.py +0 -0
  111. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/services/whisper/uninstall.py +0 -0
  112. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/statistics.py +0 -0
  113. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/tools/voice_registry.py +0 -0
  114. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/utils/__init__.py +0 -0
  115. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/utils/audio_diagnostics.py +0 -0
  116. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/utils/event_logger.py +0 -0
  117. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/utils/ffmpeg_check.py +0 -0
  118. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/utils/format_migration.py +0 -0
  119. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/utils/gpu_detection.py +0 -0
  120. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/utils/migration_helpers.py +0 -0
  121. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/utils/services/common.py +0 -0
  122. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/utils/services/coreml_setup.py +0 -0
  123. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/utils/services/kokoro_helpers.py +0 -0
  124. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/utils/services/livekit_helpers.py +0 -0
  125. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/utils/services/whisper_helpers.py +0 -0
  126. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/utils/services/whisper_version.py +0 -0
  127. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/utils/version_helpers.py +0 -0
  128. {voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/version.py +0 -0
@@ -117,3 +117,9 @@ models/
117
117
  *.mlpackage/
118
118
  *.mlmodel
119
119
  *.mlmodelc/
120
+
121
+ # Coverage reports
122
+ htmlcov/
123
+ .coverage
124
+ .coverage.*
125
+ coverage.xml
@@ -7,6 +7,63 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [4.1.0] - 2025-09-01
11
+
12
+ ### Added
13
+ - **Pronunciation middleware for TTS/STT text processing**
14
+ - Configurable pronunciation rules system that processes text before TTS and after STT
15
+ - Regex-based text substitution rules with YAML configuration
16
+ - Separate TTS and STT rule sets for bidirectional corrections
17
+ - Privacy support - rules can be marked private to hide from LLM tool listings
18
+ - Default rules for common patterns (3M, PoE, GbE, etc.)
19
+ - Full CLI interface for managing pronunciation rules
20
+ - MCP tool for LLM-based rule management with `pronounce` tool
21
+ - Integrated into converse tool for automatic text processing
22
+ - New configuration file: `voice_mode/data/default_pronunciation.yaml`
23
+
24
+ ## [4.0.1] - 2025-09-01
25
+
26
+ ### Removed
27
+ - Removed `whisperx` optional dependency to fix PyPI upload compatibility
28
+ - The dependency was specified as a Git URL which is not allowed for PyPI packages
29
+ - WhisperX functionality was recently added and not essential for core features
30
+
31
+ ## [4.0.0] - 2025-08-31
32
+
33
+ ### BREAKING CHANGES
34
+ - **Unified voice configuration system**
35
+ - **BREAKING**: Replaced `.voices.txt` files with unified `.voicemode.env` configuration
36
+ - Changed environment variable from `VOICEMODE_TTS_VOICES` to `VOICEMODE_VOICES` for simplicity
37
+ - Implemented cascading configuration: env vars > project configs > global config
38
+ - Added directory tree walking for project-specific configuration discovery
39
+ - Supports runtime configuration reloading via MCP tools
40
+ - **Migration Required**: Users must migrate from `.voices.txt` to `.voicemode.env` with `VOICEMODE_VOICES=voice1,voice2` format
41
+
42
+ ### Added
43
+
44
+ - **Comprehensive test coverage reporting system**
45
+ - Integration with pytest-cov for coverage measurement
46
+ - HTML coverage reports generated in htmlcov/ directory
47
+ - Coverage badges and metrics for monitoring code quality
48
+ - Automated coverage reporting in CI/CD pipeline
49
+
50
+ - **Word-level timestamps for transcription**
51
+ - Enhanced transcription output with word-level timing information
52
+ - Support for SubRip (SRT) format output with precise word timestamps
53
+ - New transcription CLI command for processing audio files
54
+ - Comprehensive transcription backend supporting multiple formats
55
+ - Word timing data available for improved accessibility and analysis
56
+
57
+ - **Enhanced voice selection guide**
58
+ - Comprehensive documentation for voice selection across different providers
59
+ - Clear migration instructions from old `.voices.txt` system
60
+
61
+ ### Removed
62
+ - **Legacy voice preference system**
63
+ - Removed 578 lines of old `voice_preferences.py` system
64
+ - Eliminated unreliable `.voices.txt` file parsing
65
+ - Removed associated test files for deprecated voice preference system
66
+
10
67
  ## [3.34.3] - 2025-08-26
11
68
 
12
69
  ### Changed
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: voice-mode
3
- Version: 3.34.3
3
+ Version: 4.1.0
4
4
  Summary: VoiceMode - Voice interaction capabilities for AI assistants (formerly voice-mcp)
5
5
  Project-URL: Homepage, https://github.com/mbailey/voicemode
6
6
  Project-URL: Repository, https://github.com/mbailey/voicemode
@@ -66,9 +66,12 @@ Requires-Dist: pandas>=2.0.0; extra == 'notebooks'
66
66
  Provides-Extra: scripts
67
67
  Requires-Dist: flask>=3.0.0; extra == 'scripts'
68
68
  Provides-Extra: test
69
+ Requires-Dist: coverage[toml]>=7.4.0; extra == 'test'
69
70
  Requires-Dist: pytest-asyncio>=0.21.0; extra == 'test'
70
- Requires-Dist: pytest-cov>=4.0.0; extra == 'test'
71
+ Requires-Dist: pytest-cov>=4.1.0; extra == 'test'
71
72
  Requires-Dist: pytest-mock>=3.10.0; extra == 'test'
73
+ Requires-Dist: pytest-timeout>=2.2.0; extra == 'test'
74
+ Requires-Dist: pytest-xdist>=3.5.0; extra == 'test'
72
75
  Requires-Dist: pytest>=7.0.0; extra == 'test'
73
76
  Description-Content-Type: text/markdown
74
77
 
@@ -67,8 +67,11 @@ dev = [
67
67
  test = [
68
68
  "pytest>=7.0.0",
69
69
  "pytest-asyncio>=0.21.0",
70
- "pytest-cov>=4.0.0",
70
+ "pytest-cov>=4.1.0",
71
71
  "pytest-mock>=3.10.0",
72
+ "pytest-xdist>=3.5.0", # For parallel testing
73
+ "pytest-timeout>=2.2.0", # For test timeouts
74
+ "coverage[toml]>=7.4.0",
72
75
  ]
73
76
  notebooks = [
74
77
  "gradio>=4.0.0",
@@ -144,10 +147,65 @@ exclude = [
144
147
  [tool.hatch.version]
145
148
  path = "voice_mode/__version__.py"
146
149
 
150
+ [tool.hatch.metadata]
151
+ allow-direct-references = true
152
+
147
153
  [tool.pytest.ini_options]
154
+ minversion = "7.0"
148
155
  testpaths = ["tests"]
149
156
  python_files = "test_*.py"
150
157
  python_classes = "Test*"
151
158
  python_functions = "test_*"
152
- # Exclude manual test directory
153
- addopts = "--ignore=tests/manual"
159
+ asyncio_mode = "auto"
160
+ addopts = [
161
+ "-ra",
162
+ "--strict-markers",
163
+ "--strict-config",
164
+ "--ignore=tests/manual",
165
+ "--cov=voice_mode",
166
+ "--cov-branch",
167
+ "--cov-report=term-missing:skip-covered",
168
+ "--cov-report=html",
169
+ "--cov-report=xml",
170
+ ]
171
+ markers = [
172
+ "unit: Unit tests (fast, isolated)",
173
+ "integration: Integration tests (may interact with services)",
174
+ "slow: Tests that take > 1s",
175
+ "manual: Manual tests requiring human interaction",
176
+ ]
177
+ filterwarnings = [
178
+ "ignore::DeprecationWarning",
179
+ ]
180
+
181
+ [tool.coverage.run]
182
+ source = ["voice_mode"]
183
+ branch = true
184
+ parallel = true
185
+ omit = [
186
+ "*/tests/*",
187
+ "*/test_*.py",
188
+ "*/__pycache__/*",
189
+ "*/site-packages/*",
190
+ "test-env/*",
191
+ ]
192
+
193
+ [tool.coverage.report]
194
+ exclude_lines = [
195
+ "pragma: no cover",
196
+ "def __repr__",
197
+ "if TYPE_CHECKING:",
198
+ "raise NotImplementedError",
199
+ "if __name__ == .__main__.:",
200
+ "@abstractmethod",
201
+ "except ImportError:",
202
+ ]
203
+ precision = 2
204
+ skip_covered = true
205
+ show_missing = true
206
+
207
+ [tool.coverage.html]
208
+ directory = "htmlcov"
209
+
210
+ [tool.coverage.xml]
211
+ output = "coverage.xml"
@@ -1,3 +1,3 @@
1
1
  # This file is automatically updated by 'make release'
2
2
  # Do not edit manually
3
- __version__ = "3.34.3"
3
+ __version__ = "4.1.0"
@@ -1359,12 +1359,20 @@ def cli():
1359
1359
 
1360
1360
  # Import subcommand groups
1361
1361
  from voice_mode.cli_commands import exchanges as exchanges_cmd
1362
+ from voice_mode.cli_commands import transcribe as transcribe_cmd
1363
+ from voice_mode.cli_commands import pronounce_commands
1362
1364
 
1363
1365
  # Add subcommands to legacy CLI
1364
1366
  cli.add_command(exchanges_cmd.exchanges)
1367
+ cli.add_command(transcribe_cmd.transcribe)
1368
+ cli.add_command(pronounce_commands.pronounce_group)
1365
1369
 
1366
1370
  # Add exchanges to main CLI
1367
1371
  voice_mode_main_cli.add_command(exchanges_cmd.exchanges)
1372
+ voice_mode_main_cli.add_command(pronounce_commands.pronounce_group)
1373
+
1374
+ # Add transcribe to main CLI
1375
+ voice_mode_main_cli.add_command(transcribe_cmd.transcribe)
1368
1376
 
1369
1377
 
1370
1378
  # Converse command - direct voice conversation from CLI
@@ -0,0 +1,223 @@
1
+ """CLI commands for managing pronunciation rules."""
2
+
3
+ import click
4
+ import yaml
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ from voice_mode.pronounce import get_manager
10
+
11
+
12
+ @click.group(name='pronounce')
13
+ def pronounce_group():
14
+ """Manage pronunciation rules for TTS and STT."""
15
+ pass
16
+
17
+
18
+ @pronounce_group.command(name='list')
19
+ @click.option('--direction', '-d', type=click.Choice(['tts', 'stt', 'all']), default='all',
20
+ help='Filter by direction (tts/stt/all)')
21
+ @click.option('--enabled-only', '-e', is_flag=True, help='Show only enabled rules')
22
+ @click.option('--show-private', '-p', is_flag=True, help='Include private rules')
23
+ @click.option('--format', '-f', type=click.Choice(['table', 'yaml', 'json']), default='table',
24
+ help='Output format')
25
+ def list_rules(direction: str, enabled_only: bool, show_private: bool, format: str):
26
+ """List pronunciation rules."""
27
+ manager = get_manager()
28
+
29
+ # Get rules
30
+ if direction == 'all':
31
+ rules = manager.list_rules(include_private=show_private)
32
+ else:
33
+ rules = manager.list_rules(direction=direction, include_private=show_private)
34
+
35
+ # Filter if needed
36
+ if enabled_only:
37
+ rules = [r for r in rules if r['enabled']]
38
+
39
+ # Format output
40
+ if format == 'table':
41
+ if not rules:
42
+ click.echo("No rules found.")
43
+ return
44
+
45
+ # Count private rules that were hidden
46
+ all_rules = manager.list_rules(include_private=True)
47
+ private_count = len(all_rules) - len(rules)
48
+
49
+ # Simple table format without tabulate
50
+ click.echo("\nPronunciation Rules:")
51
+ click.echo("=" * 80)
52
+
53
+ for rule in rules:
54
+ status = '✓' if rule['enabled'] else '✗'
55
+ click.echo(f"\n{status} [{rule['direction'].upper()}] {rule['name']} (order: {rule['order']})")
56
+ click.echo(f" Pattern: {rule['pattern']}")
57
+ click.echo(f" Replace: {rule['replacement']}")
58
+ if rule['description']:
59
+ click.echo(f" Desc: {rule['description']}")
60
+
61
+ if private_count > 0 and not show_private:
62
+ click.echo(f"\n({private_count} private rules hidden. Use --show-private to display)")
63
+
64
+ elif format == 'yaml':
65
+ import yaml
66
+ click.echo(yaml.dump(rules, default_flow_style=False))
67
+
68
+ elif format == 'json':
69
+ import json
70
+ click.echo(json.dumps(rules, indent=2))
71
+
72
+
73
+ @pronounce_group.command(name='test')
74
+ @click.argument('text')
75
+ @click.option('--direction', '-d', type=click.Choice(['tts', 'stt']), default='tts',
76
+ help='Test direction (tts/stt)')
77
+ def test_rule(text: str, direction: str):
78
+ """Test pronunciation rules on text."""
79
+ manager = get_manager()
80
+ result = manager.test_rule(text, direction)
81
+
82
+ if text != result:
83
+ click.echo(f"Original: {text}")
84
+ click.echo(f"Modified: {result}")
85
+
86
+ # Show which rules were applied if logging is enabled
87
+ import os
88
+ if os.environ.get('VOICEMODE_PRONUNCIATION_LOG_SUBSTITUTIONS', '').lower() == 'true':
89
+ click.echo("\n(Check logs for applied rules)")
90
+ else:
91
+ click.echo(f"No changes: {text}")
92
+
93
+
94
+ @pronounce_group.command(name='add')
95
+ @click.option('--direction', '-d', type=click.Choice(['tts', 'stt']), required=True,
96
+ help='Rule direction (tts/stt)')
97
+ @click.option('--pattern', '-p', required=True, help='Regex pattern to match')
98
+ @click.option('--replacement', '-r', required=True, help='Replacement text')
99
+ @click.option('--name', '-n', help='Rule name (auto-generated if not provided)')
100
+ @click.option('--description', help='Rule description')
101
+ @click.option('--order', type=int, default=100, help='Processing order (lower = earlier)')
102
+ @click.option('--disabled', is_flag=True, help='Create rule as disabled')
103
+ def add_rule(direction: str, pattern: str, replacement: str, name: Optional[str],
104
+ description: str, order: int, disabled: bool):
105
+ """Add a new pronunciation rule."""
106
+ manager = get_manager()
107
+
108
+ success = manager.add_rule(
109
+ direction=direction,
110
+ pattern=pattern,
111
+ replacement=replacement,
112
+ name=name,
113
+ description=description or "",
114
+ enabled=not disabled,
115
+ order=order,
116
+ private=False # CLI-created rules are not private
117
+ )
118
+
119
+ if success:
120
+ click.echo(f"✓ Rule added successfully")
121
+ else:
122
+ click.echo("✗ Failed to add rule (check pattern validity)", err=True)
123
+
124
+
125
+ @pronounce_group.command(name='remove')
126
+ @click.option('--direction', '-d', type=click.Choice(['tts', 'stt']), required=True,
127
+ help='Rule direction (tts/stt)')
128
+ @click.argument('name')
129
+ def remove_rule(direction: str, name: str):
130
+ """Remove a pronunciation rule by name."""
131
+ manager = get_manager()
132
+
133
+ success = manager.remove_rule(direction, name)
134
+
135
+ if success:
136
+ click.echo(f"✓ Rule '{name}' removed")
137
+ else:
138
+ click.echo(f"✗ Rule '{name}' not found", err=True)
139
+
140
+
141
+ @pronounce_group.command(name='enable')
142
+ @click.option('--direction', '-d', type=click.Choice(['tts', 'stt']), required=True,
143
+ help='Rule direction (tts/stt)')
144
+ @click.argument('name')
145
+ def enable_rule(direction: str, name: str):
146
+ """Enable a pronunciation rule."""
147
+ manager = get_manager()
148
+
149
+ success = manager.enable_rule(direction, name)
150
+
151
+ if success:
152
+ click.echo(f"✓ Rule '{name}' enabled")
153
+ else:
154
+ click.echo(f"✗ Failed to enable rule '{name}' (not found or private)", err=True)
155
+
156
+
157
+ @pronounce_group.command(name='disable')
158
+ @click.option('--direction', '-d', type=click.Choice(['tts', 'stt']), required=True,
159
+ help='Rule direction (tts/stt)')
160
+ @click.argument('name')
161
+ def disable_rule(direction: str, name: str):
162
+ """Disable a pronunciation rule."""
163
+ manager = get_manager()
164
+
165
+ success = manager.disable_rule(direction, name)
166
+
167
+ if success:
168
+ click.echo(f"✓ Rule '{name}' disabled")
169
+ else:
170
+ click.echo(f"✗ Failed to disable rule '{name}' (not found or private)", err=True)
171
+
172
+
173
+ @pronounce_group.command(name='reload')
174
+ def reload_rules():
175
+ """Reload pronunciation rules from configuration files."""
176
+ manager = get_manager()
177
+ manager.reload_rules()
178
+ click.echo("✓ Pronunciation rules reloaded")
179
+
180
+
181
+ @pronounce_group.command(name='edit')
182
+ @click.option('--system', is_flag=True, help='Edit system default rules (requires sudo)')
183
+ def edit_config(system: bool):
184
+ """Open pronunciation config in editor."""
185
+ import os
186
+ import subprocess
187
+
188
+ if system:
189
+ # Edit system defaults
190
+ config_path = Path(__file__).parent.parent / 'data' / 'default_pronunciation.yaml'
191
+ if not config_path.exists():
192
+ click.echo(f"System config not found: {config_path}", err=True)
193
+ return
194
+ # Might need sudo
195
+ editor = os.environ.get('EDITOR', 'nano')
196
+ subprocess.run(['sudo', editor, str(config_path)])
197
+ else:
198
+ # Edit user config
199
+ config_path = Path.home() / '.voicemode' / 'config' / 'pronunciation.yaml'
200
+ if not config_path.exists():
201
+ # Create default config
202
+ config_path.parent.mkdir(parents=True, exist_ok=True)
203
+ default_config = {
204
+ 'version': 1,
205
+ 'tts_rules': [],
206
+ 'stt_rules': []
207
+ }
208
+ with open(config_path, 'w') as f:
209
+ yaml.dump(default_config, f, default_flow_style=False)
210
+
211
+ editor = os.environ.get('EDITOR', 'nano')
212
+ subprocess.run([editor, str(config_path)])
213
+
214
+ # Reload after editing
215
+ manager = get_manager()
216
+ manager.reload_rules()
217
+ click.echo("✓ Configuration edited and reloaded")
218
+
219
+
220
+ # Register the command group
221
+ def register_commands(cli):
222
+ """Register pronunciation commands with the main CLI."""
223
+ cli.add_command(pronounce_group)
@@ -0,0 +1,141 @@
1
+ """CLI command for audio transcription."""
2
+
3
+ import click
4
+ import json
5
+ import asyncio
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ from voice_mode.tools.transcription import (
10
+ transcribe_audio,
11
+ TranscriptionBackend,
12
+ OutputFormat
13
+ )
14
+
15
+
16
+ @click.group()
17
+ def transcribe():
18
+ """Audio transcription with word-level timestamps."""
19
+ pass
20
+
21
+
22
+ @transcribe.command("audio")
23
+ @click.argument('audio_file', type=click.Path(exists=True))
24
+ @click.option('--words', is_flag=True, help='Include word-level timestamps')
25
+ @click.option(
26
+ '--backend',
27
+ type=click.Choice(['openai', 'whisperx', 'whisper-cpp']),
28
+ default='openai',
29
+ help='Transcription backend to use'
30
+ )
31
+ @click.option(
32
+ '--format',
33
+ 'output_format',
34
+ type=click.Choice(['json', 'srt', 'vtt', 'csv']),
35
+ default='json',
36
+ help='Output format for transcription'
37
+ )
38
+ @click.option('--output', '-o', type=click.Path(), help='Save transcription to file')
39
+ @click.option('--language', help='Language code (e.g., en, es, fr)')
40
+ @click.option('--model', default='whisper-1', help='Model to use (for OpenAI backend)')
41
+ def audio_command(
42
+ audio_file: str,
43
+ words: bool,
44
+ backend: str,
45
+ output_format: str,
46
+ output: Optional[str],
47
+ language: Optional[str],
48
+ model: str
49
+ ):
50
+ """
51
+ Transcribe audio with optional word-level timestamps.
52
+
53
+ Examples:
54
+
55
+ voice-mode transcribe audio recording.mp3
56
+
57
+ voice-mode transcribe audio interview.wav --words
58
+
59
+ voice-mode transcribe audio podcast.mp3 --words --format srt -o subtitles.srt
60
+
61
+ voice-mode transcribe audio spanish.mp3 --language es --backend whisperx
62
+ """
63
+ async def run():
64
+ # Perform transcription
65
+ result = await transcribe_audio(
66
+ audio_file=audio_file,
67
+ word_timestamps=words,
68
+ backend=TranscriptionBackend(backend),
69
+ output_format=OutputFormat(output_format),
70
+ language=language,
71
+ model=model
72
+ )
73
+
74
+ # Check for errors
75
+ if not result.get("success", False):
76
+ error_msg = result.get("error", "Unknown error occurred")
77
+ click.echo(f"Error: {error_msg}", err=True)
78
+ return
79
+
80
+ # Format output
81
+ if output_format == 'json':
82
+ # Remove internal fields for cleaner output
83
+ output_result = {k: v for k, v in result.items()
84
+ if k not in ['formatted_content']}
85
+ content = json.dumps(output_result, indent=2)
86
+ elif "formatted_content" in result:
87
+ content = result["formatted_content"]
88
+ else:
89
+ # Fallback to JSON if format conversion failed
90
+ content = json.dumps(result, indent=2)
91
+
92
+ # Write output
93
+ if output:
94
+ Path(output).write_text(content)
95
+ click.echo(f"Transcription saved to {output}")
96
+ else:
97
+ click.echo(content)
98
+
99
+ # Run async function
100
+ asyncio.run(run())
101
+
102
+
103
+ # For backward compatibility, also provide a direct command
104
+ @click.command('transcribe-audio')
105
+ @click.argument('audio_file', type=click.Path(exists=True))
106
+ @click.option('--words', is_flag=True, help='Include word-level timestamps')
107
+ @click.option(
108
+ '--backend',
109
+ type=click.Choice(['openai', 'whisperx', 'whisper-cpp']),
110
+ default='openai',
111
+ help='Transcription backend'
112
+ )
113
+ @click.option(
114
+ '--format',
115
+ 'output_format',
116
+ type=click.Choice(['json', 'srt', 'vtt', 'csv']),
117
+ default='json',
118
+ help='Output format'
119
+ )
120
+ @click.option('--output', '-o', type=click.Path(), help='Save to file')
121
+ @click.option('--language', help='Language code')
122
+ @click.option('--model', default='whisper-1', help='Model to use')
123
+ def transcribe_audio_command(
124
+ audio_file: str,
125
+ words: bool,
126
+ backend: str,
127
+ output_format: str,
128
+ output: Optional[str],
129
+ language: Optional[str],
130
+ model: str
131
+ ):
132
+ """Direct transcription command for backward compatibility."""
133
+ audio_command.callback(
134
+ audio_file=audio_file,
135
+ words=words,
136
+ backend=backend,
137
+ output_format=output_format,
138
+ output=output,
139
+ language=language,
140
+ model=model
141
+ )