voice-mode 3.34.3__py3-none-any.whl → 4.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. voice_mode/__version__.py +1 -1
  2. voice_mode/cli.py +8 -0
  3. voice_mode/cli_commands/pronounce_commands.py +223 -0
  4. voice_mode/cli_commands/transcribe.py +141 -0
  5. voice_mode/config.py +139 -37
  6. voice_mode/data/default_pronunciation.yaml +268 -0
  7. voice_mode/frontend/.next/BUILD_ID +1 -0
  8. voice_mode/frontend/.next/app-build-manifest.json +28 -0
  9. voice_mode/frontend/.next/app-path-routes-manifest.json +1 -0
  10. voice_mode/frontend/.next/build-manifest.json +32 -0
  11. voice_mode/frontend/.next/export-marker.json +1 -0
  12. voice_mode/frontend/.next/images-manifest.json +1 -0
  13. voice_mode/frontend/.next/next-minimal-server.js.nft.json +1 -0
  14. voice_mode/frontend/.next/next-server.js.nft.json +1 -0
  15. voice_mode/frontend/.next/package.json +1 -0
  16. voice_mode/frontend/.next/prerender-manifest.json +1 -0
  17. voice_mode/frontend/.next/react-loadable-manifest.json +1 -0
  18. voice_mode/frontend/.next/required-server-files.json +1 -0
  19. voice_mode/frontend/.next/routes-manifest.json +1 -0
  20. voice_mode/frontend/.next/server/app/_not-found/page.js +1 -0
  21. voice_mode/frontend/.next/server/app/_not-found/page.js.nft.json +1 -0
  22. voice_mode/frontend/.next/server/app/_not-found/page_client-reference-manifest.js +1 -0
  23. voice_mode/frontend/.next/server/app/_not-found.html +1 -0
  24. voice_mode/frontend/.next/server/app/_not-found.meta +6 -0
  25. voice_mode/frontend/.next/server/app/_not-found.rsc +9 -0
  26. voice_mode/frontend/.next/server/app/api/connection-details/route.js +12 -0
  27. voice_mode/frontend/.next/server/app/api/connection-details/route.js.nft.json +1 -0
  28. voice_mode/frontend/.next/server/app/favicon.ico/route.js +12 -0
  29. voice_mode/frontend/.next/server/app/favicon.ico/route.js.nft.json +1 -0
  30. voice_mode/frontend/.next/server/app/favicon.ico.body +0 -0
  31. voice_mode/frontend/.next/server/app/favicon.ico.meta +1 -0
  32. voice_mode/frontend/.next/server/app/index.html +1 -0
  33. voice_mode/frontend/.next/server/app/index.meta +5 -0
  34. voice_mode/frontend/.next/server/app/index.rsc +7 -0
  35. voice_mode/frontend/.next/server/app/page.js +11 -0
  36. voice_mode/frontend/.next/server/app/page.js.nft.json +1 -0
  37. voice_mode/frontend/.next/server/app/page_client-reference-manifest.js +1 -0
  38. voice_mode/frontend/.next/server/app-paths-manifest.json +6 -0
  39. voice_mode/frontend/.next/server/chunks/463.js +1 -0
  40. voice_mode/frontend/.next/server/chunks/682.js +6 -0
  41. voice_mode/frontend/.next/server/chunks/948.js +2 -0
  42. voice_mode/frontend/.next/server/chunks/994.js +2 -0
  43. voice_mode/frontend/.next/server/chunks/font-manifest.json +1 -0
  44. voice_mode/frontend/.next/server/font-manifest.json +1 -0
  45. voice_mode/frontend/.next/server/functions-config-manifest.json +1 -0
  46. voice_mode/frontend/.next/server/interception-route-rewrite-manifest.js +1 -0
  47. voice_mode/frontend/.next/server/middleware-build-manifest.js +1 -0
  48. voice_mode/frontend/.next/server/middleware-manifest.json +6 -0
  49. voice_mode/frontend/.next/server/middleware-react-loadable-manifest.js +1 -0
  50. voice_mode/frontend/.next/server/next-font-manifest.js +1 -0
  51. voice_mode/frontend/.next/server/next-font-manifest.json +1 -0
  52. voice_mode/frontend/.next/server/pages/404.html +1 -0
  53. voice_mode/frontend/.next/server/pages/500.html +1 -0
  54. voice_mode/frontend/.next/server/pages/_app.js +1 -0
  55. voice_mode/frontend/.next/server/pages/_app.js.nft.json +1 -0
  56. voice_mode/frontend/.next/server/pages/_document.js +1 -0
  57. voice_mode/frontend/.next/server/pages/_document.js.nft.json +1 -0
  58. voice_mode/frontend/.next/server/pages/_error.js +1 -0
  59. voice_mode/frontend/.next/server/pages/_error.js.nft.json +1 -0
  60. voice_mode/frontend/.next/server/pages-manifest.json +1 -0
  61. voice_mode/frontend/.next/server/server-reference-manifest.js +1 -0
  62. voice_mode/frontend/.next/server/server-reference-manifest.json +1 -0
  63. voice_mode/frontend/.next/server/webpack-runtime.js +1 -0
  64. voice_mode/frontend/.next/standalone/.next/BUILD_ID +1 -0
  65. voice_mode/frontend/.next/standalone/.next/app-build-manifest.json +28 -0
  66. voice_mode/frontend/.next/standalone/.next/app-path-routes-manifest.json +1 -0
  67. voice_mode/frontend/.next/standalone/.next/build-manifest.json +32 -0
  68. voice_mode/frontend/.next/standalone/.next/package.json +1 -0
  69. voice_mode/frontend/.next/standalone/.next/prerender-manifest.json +1 -0
  70. voice_mode/frontend/.next/standalone/.next/react-loadable-manifest.json +1 -0
  71. voice_mode/frontend/.next/standalone/.next/required-server-files.json +1 -0
  72. voice_mode/frontend/.next/standalone/.next/routes-manifest.json +1 -0
  73. voice_mode/frontend/.next/standalone/.next/server/app/_not-found/page.js +1 -0
  74. voice_mode/frontend/.next/standalone/.next/server/app/_not-found/page.js.nft.json +1 -0
  75. voice_mode/frontend/.next/standalone/.next/server/app/_not-found/page_client-reference-manifest.js +1 -0
  76. voice_mode/frontend/.next/standalone/.next/server/app/_not-found.html +1 -0
  77. voice_mode/frontend/.next/standalone/.next/server/app/_not-found.meta +6 -0
  78. voice_mode/frontend/.next/standalone/.next/server/app/_not-found.rsc +9 -0
  79. voice_mode/frontend/.next/standalone/.next/server/app/api/connection-details/route.js +12 -0
  80. voice_mode/frontend/.next/standalone/.next/server/app/api/connection-details/route.js.nft.json +1 -0
  81. voice_mode/frontend/.next/standalone/.next/server/app/favicon.ico/route.js +12 -0
  82. voice_mode/frontend/.next/standalone/.next/server/app/favicon.ico/route.js.nft.json +1 -0
  83. voice_mode/frontend/.next/standalone/.next/server/app/favicon.ico.body +0 -0
  84. voice_mode/frontend/.next/standalone/.next/server/app/favicon.ico.meta +1 -0
  85. voice_mode/frontend/.next/standalone/.next/server/app/index.html +1 -0
  86. voice_mode/frontend/.next/standalone/.next/server/app/index.meta +5 -0
  87. voice_mode/frontend/.next/standalone/.next/server/app/index.rsc +7 -0
  88. voice_mode/frontend/.next/standalone/.next/server/app/page.js +11 -0
  89. voice_mode/frontend/.next/standalone/.next/server/app/page.js.nft.json +1 -0
  90. voice_mode/frontend/.next/standalone/.next/server/app/page_client-reference-manifest.js +1 -0
  91. voice_mode/frontend/.next/standalone/.next/server/app-paths-manifest.json +6 -0
  92. voice_mode/frontend/.next/standalone/.next/server/chunks/463.js +1 -0
  93. voice_mode/frontend/.next/standalone/.next/server/chunks/682.js +6 -0
  94. voice_mode/frontend/.next/standalone/.next/server/chunks/948.js +2 -0
  95. voice_mode/frontend/.next/standalone/.next/server/chunks/994.js +2 -0
  96. voice_mode/frontend/.next/standalone/.next/server/font-manifest.json +1 -0
  97. voice_mode/frontend/.next/standalone/.next/server/middleware-build-manifest.js +1 -0
  98. voice_mode/frontend/.next/standalone/.next/server/middleware-manifest.json +6 -0
  99. voice_mode/frontend/.next/standalone/.next/server/middleware-react-loadable-manifest.js +1 -0
  100. voice_mode/frontend/.next/standalone/.next/server/next-font-manifest.js +1 -0
  101. voice_mode/frontend/.next/standalone/.next/server/next-font-manifest.json +1 -0
  102. voice_mode/frontend/.next/standalone/.next/server/pages/404.html +1 -0
  103. voice_mode/frontend/.next/standalone/.next/server/pages/500.html +1 -0
  104. voice_mode/frontend/.next/standalone/.next/server/pages/_app.js +1 -0
  105. voice_mode/frontend/.next/standalone/.next/server/pages/_app.js.nft.json +1 -0
  106. voice_mode/frontend/.next/standalone/.next/server/pages/_document.js +1 -0
  107. voice_mode/frontend/.next/standalone/.next/server/pages/_document.js.nft.json +1 -0
  108. voice_mode/frontend/.next/standalone/.next/server/pages/_error.js +1 -0
  109. voice_mode/frontend/.next/standalone/.next/server/pages/_error.js.nft.json +1 -0
  110. voice_mode/frontend/.next/standalone/.next/server/pages-manifest.json +1 -0
  111. voice_mode/frontend/.next/standalone/.next/server/server-reference-manifest.js +1 -0
  112. voice_mode/frontend/.next/standalone/.next/server/server-reference-manifest.json +1 -0
  113. voice_mode/frontend/.next/standalone/.next/server/webpack-runtime.js +1 -0
  114. voice_mode/frontend/.next/standalone/package.json +40 -0
  115. voice_mode/frontend/.next/standalone/server.js +38 -0
  116. voice_mode/frontend/.next/static/chunks/117-40bc79a2b97edb21.js +2 -0
  117. voice_mode/frontend/.next/static/chunks/144d3bae-2d5f122b82426d88.js +1 -0
  118. voice_mode/frontend/.next/static/chunks/471-bd4b96a33883dfa2.js +3 -0
  119. voice_mode/frontend/.next/static/chunks/app/_not-found/page-5011050e402ab9c8.js +1 -0
  120. voice_mode/frontend/.next/static/chunks/app/layout-fcb9b9ba5b72c7fc.js +1 -0
  121. voice_mode/frontend/.next/static/chunks/app/page-7c7ec2ad413ace39.js +1 -0
  122. voice_mode/frontend/.next/static/chunks/fd9d1056-af324d327b243cf1.js +1 -0
  123. voice_mode/frontend/.next/static/chunks/framework-f66176bb897dc684.js +1 -0
  124. voice_mode/frontend/.next/static/chunks/main-3163eca598b76a9f.js +1 -0
  125. voice_mode/frontend/.next/static/chunks/main-app-d02bd38ac01adb8a.js +1 -0
  126. voice_mode/frontend/.next/static/chunks/pages/_app-72b849fbd24ac258.js +1 -0
  127. voice_mode/frontend/.next/static/chunks/pages/_error-7ba65e1336b92748.js +1 -0
  128. voice_mode/frontend/.next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
  129. voice_mode/frontend/.next/static/chunks/webpack-0ea9b80f19935b70.js +1 -0
  130. voice_mode/frontend/.next/static/css/a2f49a47752b5010.css +3 -0
  131. voice_mode/frontend/.next/static/media/01099be941da1820-s.woff2 +0 -0
  132. voice_mode/frontend/.next/static/media/39883d31a7792467-s.p.woff2 +0 -0
  133. voice_mode/frontend/.next/static/media/6368404d2e8d66fe-s.woff2 +0 -0
  134. voice_mode/frontend/.next/static/pbDjheefW1LwCua_8mPoZ/_buildManifest.js +1 -0
  135. voice_mode/frontend/.next/static/pbDjheefW1LwCua_8mPoZ/_ssgManifest.js +1 -0
  136. voice_mode/frontend/.next/trace +43 -0
  137. voice_mode/frontend/.next/types/app/api/connection-details/route.ts +343 -0
  138. voice_mode/frontend/.next/types/app/layout.ts +79 -0
  139. voice_mode/frontend/.next/types/app/page.ts +79 -0
  140. voice_mode/frontend/.next/types/package.json +1 -0
  141. voice_mode/frontend/package-lock.json +154 -1
  142. voice_mode/pronounce.py +397 -0
  143. voice_mode/providers.py +7 -8
  144. voice_mode/resources/configuration.py +2 -2
  145. voice_mode/tools/configuration_management.py +106 -5
  146. voice_mode/tools/converse.py +109 -0
  147. voice_mode/tools/pronounce.py +245 -0
  148. voice_mode/tools/transcription/__init__.py +14 -0
  149. voice_mode/tools/transcription/backends.py +287 -0
  150. voice_mode/tools/transcription/core.py +136 -0
  151. voice_mode/tools/transcription/formats.py +144 -0
  152. voice_mode/tools/transcription/types.py +52 -0
  153. {voice_mode-3.34.3.dist-info → voice_mode-4.1.0.dist-info}/METADATA +5 -2
  154. voice_mode-4.1.0.dist-info/RECORD +259 -0
  155. voice_mode/voice_preferences.py +0 -125
  156. voice_mode-3.34.3.dist-info/RECORD +0 -116
  157. {voice_mode-3.34.3.dist-info → voice_mode-4.1.0.dist-info}/WHEEL +0 -0
  158. {voice_mode-3.34.3.dist-info → voice_mode-4.1.0.dist-info}/entry_points.txt +0 -0
voice_mode/__version__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  # This file is automatically updated by 'make release'
2
2
  # Do not edit manually
3
- __version__ = "3.34.3"
3
+ __version__ = "4.1.0"
voice_mode/cli.py CHANGED
@@ -1359,12 +1359,20 @@ def cli():
1359
1359
 
1360
1360
  # Import subcommand groups
1361
1361
  from voice_mode.cli_commands import exchanges as exchanges_cmd
1362
+ from voice_mode.cli_commands import transcribe as transcribe_cmd
1363
+ from voice_mode.cli_commands import pronounce_commands
1362
1364
 
1363
1365
  # Add subcommands to legacy CLI
1364
1366
  cli.add_command(exchanges_cmd.exchanges)
1367
+ cli.add_command(transcribe_cmd.transcribe)
1368
+ cli.add_command(pronounce_commands.pronounce_group)
1365
1369
 
1366
1370
  # Add exchanges to main CLI
1367
1371
  voice_mode_main_cli.add_command(exchanges_cmd.exchanges)
1372
+ voice_mode_main_cli.add_command(pronounce_commands.pronounce_group)
1373
+
1374
+ # Add transcribe to main CLI
1375
+ voice_mode_main_cli.add_command(transcribe_cmd.transcribe)
1368
1376
 
1369
1377
 
1370
1378
  # Converse command - direct voice conversation from CLI
@@ -0,0 +1,223 @@
1
+ """CLI commands for managing pronunciation rules."""
2
+
3
+ import click
4
+ import yaml
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ from voice_mode.pronounce import get_manager
10
+
11
+
12
+ @click.group(name='pronounce')
13
+ def pronounce_group():
14
+ """Manage pronunciation rules for TTS and STT."""
15
+ pass
16
+
17
+
18
+ @pronounce_group.command(name='list')
19
+ @click.option('--direction', '-d', type=click.Choice(['tts', 'stt', 'all']), default='all',
20
+ help='Filter by direction (tts/stt/all)')
21
+ @click.option('--enabled-only', '-e', is_flag=True, help='Show only enabled rules')
22
+ @click.option('--show-private', '-p', is_flag=True, help='Include private rules')
23
+ @click.option('--format', '-f', type=click.Choice(['table', 'yaml', 'json']), default='table',
24
+ help='Output format')
25
+ def list_rules(direction: str, enabled_only: bool, show_private: bool, format: str):
26
+ """List pronunciation rules."""
27
+ manager = get_manager()
28
+
29
+ # Get rules
30
+ if direction == 'all':
31
+ rules = manager.list_rules(include_private=show_private)
32
+ else:
33
+ rules = manager.list_rules(direction=direction, include_private=show_private)
34
+
35
+ # Filter if needed
36
+ if enabled_only:
37
+ rules = [r for r in rules if r['enabled']]
38
+
39
+ # Format output
40
+ if format == 'table':
41
+ if not rules:
42
+ click.echo("No rules found.")
43
+ return
44
+
45
+ # Count private rules that were hidden
46
+ all_rules = manager.list_rules(include_private=True)
47
+ private_count = len(all_rules) - len(rules)
48
+
49
+ # Simple table format without tabulate
50
+ click.echo("\nPronunciation Rules:")
51
+ click.echo("=" * 80)
52
+
53
+ for rule in rules:
54
+ status = '✓' if rule['enabled'] else '✗'
55
+ click.echo(f"\n{status} [{rule['direction'].upper()}] {rule['name']} (order: {rule['order']})")
56
+ click.echo(f" Pattern: {rule['pattern']}")
57
+ click.echo(f" Replace: {rule['replacement']}")
58
+ if rule['description']:
59
+ click.echo(f" Desc: {rule['description']}")
60
+
61
+ if private_count > 0 and not show_private:
62
+ click.echo(f"\n({private_count} private rules hidden. Use --show-private to display)")
63
+
64
+ elif format == 'yaml':
65
+ import yaml
66
+ click.echo(yaml.dump(rules, default_flow_style=False))
67
+
68
+ elif format == 'json':
69
+ import json
70
+ click.echo(json.dumps(rules, indent=2))
71
+
72
+
73
+ @pronounce_group.command(name='test')
74
+ @click.argument('text')
75
+ @click.option('--direction', '-d', type=click.Choice(['tts', 'stt']), default='tts',
76
+ help='Test direction (tts/stt)')
77
+ def test_rule(text: str, direction: str):
78
+ """Test pronunciation rules on text."""
79
+ manager = get_manager()
80
+ result = manager.test_rule(text, direction)
81
+
82
+ if text != result:
83
+ click.echo(f"Original: {text}")
84
+ click.echo(f"Modified: {result}")
85
+
86
+ # Show which rules were applied if logging is enabled
87
+ import os
88
+ if os.environ.get('VOICEMODE_PRONUNCIATION_LOG_SUBSTITUTIONS', '').lower() == 'true':
89
+ click.echo("\n(Check logs for applied rules)")
90
+ else:
91
+ click.echo(f"No changes: {text}")
92
+
93
+
94
+ @pronounce_group.command(name='add')
95
+ @click.option('--direction', '-d', type=click.Choice(['tts', 'stt']), required=True,
96
+ help='Rule direction (tts/stt)')
97
+ @click.option('--pattern', '-p', required=True, help='Regex pattern to match')
98
+ @click.option('--replacement', '-r', required=True, help='Replacement text')
99
+ @click.option('--name', '-n', help='Rule name (auto-generated if not provided)')
100
+ @click.option('--description', help='Rule description')
101
+ @click.option('--order', type=int, default=100, help='Processing order (lower = earlier)')
102
+ @click.option('--disabled', is_flag=True, help='Create rule as disabled')
103
+ def add_rule(direction: str, pattern: str, replacement: str, name: Optional[str],
104
+ description: str, order: int, disabled: bool):
105
+ """Add a new pronunciation rule."""
106
+ manager = get_manager()
107
+
108
+ success = manager.add_rule(
109
+ direction=direction,
110
+ pattern=pattern,
111
+ replacement=replacement,
112
+ name=name,
113
+ description=description or "",
114
+ enabled=not disabled,
115
+ order=order,
116
+ private=False # CLI-created rules are not private
117
+ )
118
+
119
+ if success:
120
+ click.echo(f"✓ Rule added successfully")
121
+ else:
122
+ click.echo("✗ Failed to add rule (check pattern validity)", err=True)
123
+
124
+
125
+ @pronounce_group.command(name='remove')
126
+ @click.option('--direction', '-d', type=click.Choice(['tts', 'stt']), required=True,
127
+ help='Rule direction (tts/stt)')
128
+ @click.argument('name')
129
+ def remove_rule(direction: str, name: str):
130
+ """Remove a pronunciation rule by name."""
131
+ manager = get_manager()
132
+
133
+ success = manager.remove_rule(direction, name)
134
+
135
+ if success:
136
+ click.echo(f"✓ Rule '{name}' removed")
137
+ else:
138
+ click.echo(f"✗ Rule '{name}' not found", err=True)
139
+
140
+
141
+ @pronounce_group.command(name='enable')
142
+ @click.option('--direction', '-d', type=click.Choice(['tts', 'stt']), required=True,
143
+ help='Rule direction (tts/stt)')
144
+ @click.argument('name')
145
+ def enable_rule(direction: str, name: str):
146
+ """Enable a pronunciation rule."""
147
+ manager = get_manager()
148
+
149
+ success = manager.enable_rule(direction, name)
150
+
151
+ if success:
152
+ click.echo(f"✓ Rule '{name}' enabled")
153
+ else:
154
+ click.echo(f"✗ Failed to enable rule '{name}' (not found or private)", err=True)
155
+
156
+
157
+ @pronounce_group.command(name='disable')
158
+ @click.option('--direction', '-d', type=click.Choice(['tts', 'stt']), required=True,
159
+ help='Rule direction (tts/stt)')
160
+ @click.argument('name')
161
+ def disable_rule(direction: str, name: str):
162
+ """Disable a pronunciation rule."""
163
+ manager = get_manager()
164
+
165
+ success = manager.disable_rule(direction, name)
166
+
167
+ if success:
168
+ click.echo(f"✓ Rule '{name}' disabled")
169
+ else:
170
+ click.echo(f"✗ Failed to disable rule '{name}' (not found or private)", err=True)
171
+
172
+
173
+ @pronounce_group.command(name='reload')
174
+ def reload_rules():
175
+ """Reload pronunciation rules from configuration files."""
176
+ manager = get_manager()
177
+ manager.reload_rules()
178
+ click.echo("✓ Pronunciation rules reloaded")
179
+
180
+
181
+ @pronounce_group.command(name='edit')
182
+ @click.option('--system', is_flag=True, help='Edit system default rules (requires sudo)')
183
+ def edit_config(system: bool):
184
+ """Open pronunciation config in editor."""
185
+ import os
186
+ import subprocess
187
+
188
+ if system:
189
+ # Edit system defaults
190
+ config_path = Path(__file__).parent.parent / 'data' / 'default_pronunciation.yaml'
191
+ if not config_path.exists():
192
+ click.echo(f"System config not found: {config_path}", err=True)
193
+ return
194
+ # Might need sudo
195
+ editor = os.environ.get('EDITOR', 'nano')
196
+ subprocess.run(['sudo', editor, str(config_path)])
197
+ else:
198
+ # Edit user config
199
+ config_path = Path.home() / '.voicemode' / 'config' / 'pronunciation.yaml'
200
+ if not config_path.exists():
201
+ # Create default config
202
+ config_path.parent.mkdir(parents=True, exist_ok=True)
203
+ default_config = {
204
+ 'version': 1,
205
+ 'tts_rules': [],
206
+ 'stt_rules': []
207
+ }
208
+ with open(config_path, 'w') as f:
209
+ yaml.dump(default_config, f, default_flow_style=False)
210
+
211
+ editor = os.environ.get('EDITOR', 'nano')
212
+ subprocess.run([editor, str(config_path)])
213
+
214
+ # Reload after editing
215
+ manager = get_manager()
216
+ manager.reload_rules()
217
+ click.echo("✓ Configuration edited and reloaded")
218
+
219
+
220
+ # Register the command group
221
+ def register_commands(cli):
222
+ """Register pronunciation commands with the main CLI."""
223
+ cli.add_command(pronounce_group)
@@ -0,0 +1,141 @@
1
+ """CLI command for audio transcription."""
2
+
3
+ import click
4
+ import json
5
+ import asyncio
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ from voice_mode.tools.transcription import (
10
+ transcribe_audio,
11
+ TranscriptionBackend,
12
+ OutputFormat
13
+ )
14
+
15
+
16
+ @click.group()
17
+ def transcribe():
18
+ """Audio transcription with word-level timestamps."""
19
+ pass
20
+
21
+
22
+ @transcribe.command("audio")
23
+ @click.argument('audio_file', type=click.Path(exists=True))
24
+ @click.option('--words', is_flag=True, help='Include word-level timestamps')
25
+ @click.option(
26
+ '--backend',
27
+ type=click.Choice(['openai', 'whisperx', 'whisper-cpp']),
28
+ default='openai',
29
+ help='Transcription backend to use'
30
+ )
31
+ @click.option(
32
+ '--format',
33
+ 'output_format',
34
+ type=click.Choice(['json', 'srt', 'vtt', 'csv']),
35
+ default='json',
36
+ help='Output format for transcription'
37
+ )
38
+ @click.option('--output', '-o', type=click.Path(), help='Save transcription to file')
39
+ @click.option('--language', help='Language code (e.g., en, es, fr)')
40
+ @click.option('--model', default='whisper-1', help='Model to use (for OpenAI backend)')
41
+ def audio_command(
42
+ audio_file: str,
43
+ words: bool,
44
+ backend: str,
45
+ output_format: str,
46
+ output: Optional[str],
47
+ language: Optional[str],
48
+ model: str
49
+ ):
50
+ """
51
+ Transcribe audio with optional word-level timestamps.
52
+
53
+ Examples:
54
+
55
+ voice-mode transcribe audio recording.mp3
56
+
57
+ voice-mode transcribe audio interview.wav --words
58
+
59
+ voice-mode transcribe audio podcast.mp3 --words --format srt -o subtitles.srt
60
+
61
+ voice-mode transcribe audio spanish.mp3 --language es --backend whisperx
62
+ """
63
+ async def run():
64
+ # Perform transcription
65
+ result = await transcribe_audio(
66
+ audio_file=audio_file,
67
+ word_timestamps=words,
68
+ backend=TranscriptionBackend(backend),
69
+ output_format=OutputFormat(output_format),
70
+ language=language,
71
+ model=model
72
+ )
73
+
74
+ # Check for errors
75
+ if not result.get("success", False):
76
+ error_msg = result.get("error", "Unknown error occurred")
77
+ click.echo(f"Error: {error_msg}", err=True)
78
+ return
79
+
80
+ # Format output
81
+ if output_format == 'json':
82
+ # Remove internal fields for cleaner output
83
+ output_result = {k: v for k, v in result.items()
84
+ if k not in ['formatted_content']}
85
+ content = json.dumps(output_result, indent=2)
86
+ elif "formatted_content" in result:
87
+ content = result["formatted_content"]
88
+ else:
89
+ # Fallback to JSON if format conversion failed
90
+ content = json.dumps(result, indent=2)
91
+
92
+ # Write output
93
+ if output:
94
+ Path(output).write_text(content)
95
+ click.echo(f"Transcription saved to {output}")
96
+ else:
97
+ click.echo(content)
98
+
99
+ # Run async function
100
+ asyncio.run(run())
101
+
102
+
103
+ # For backward compatibility, also provide a direct command
104
+ @click.command('transcribe-audio')
105
+ @click.argument('audio_file', type=click.Path(exists=True))
106
+ @click.option('--words', is_flag=True, help='Include word-level timestamps')
107
+ @click.option(
108
+ '--backend',
109
+ type=click.Choice(['openai', 'whisperx', 'whisper-cpp']),
110
+ default='openai',
111
+ help='Transcription backend'
112
+ )
113
+ @click.option(
114
+ '--format',
115
+ 'output_format',
116
+ type=click.Choice(['json', 'srt', 'vtt', 'csv']),
117
+ default='json',
118
+ help='Output format'
119
+ )
120
+ @click.option('--output', '-o', type=click.Path(), help='Save to file')
121
+ @click.option('--language', help='Language code')
122
+ @click.option('--model', default='whisper-1', help='Model to use')
123
+ def transcribe_audio_command(
124
+ audio_file: str,
125
+ words: bool,
126
+ backend: str,
127
+ output_format: str,
128
+ output: Optional[str],
129
+ language: Optional[str],
130
+ model: str
131
+ ):
132
+ """Direct transcription command for backward compatibility."""
133
+ audio_command.callback(
134
+ audio_file=audio_file,
135
+ words=words,
136
+ backend=backend,
137
+ output_format=output_format,
138
+ output=output,
139
+ language=language,
140
+ model=model
141
+ )
voice_mode/config.py CHANGED
@@ -15,21 +15,66 @@ from datetime import datetime
15
15
 
16
16
  # ==================== ENVIRONMENT CONFIGURATION ====================
17
17
 
18
+ def find_voicemode_env_files() -> list[Path]:
19
+ """
20
+ Find .voicemode.env files by walking up the directory tree.
21
+
22
+ Looks for (in order of priority - closest to current directory wins):
23
+ 1. .voicemode.env in current or parent directories
24
+ 2. .voicemode/voicemode.env in current or parent directories
25
+ 3. ~/.voicemode/voicemode.env in user home (global config)
26
+
27
+ Returns:
28
+ List of Path objects in loading order (global first, then project-specific)
29
+ """
30
+ config_files = []
31
+
32
+ # First add global config (lowest priority - loaded first)
33
+ global_config = Path.home() / ".voicemode" / "voicemode.env"
34
+
35
+ # Backwards compatibility: check for old filename
36
+ if not global_config.exists():
37
+ old_global = Path.home() / ".voicemode" / ".voicemode.env"
38
+ if old_global.exists():
39
+ global_config = old_global
40
+
41
+ if global_config.exists():
42
+ config_files.append(global_config)
43
+
44
+ # Then walk up directory tree for project-specific configs (higher priority)
45
+ current_dir = Path.cwd()
46
+ project_configs = []
47
+
48
+ while current_dir != current_dir.parent:
49
+ # Check for standalone .voicemode.env first
50
+ standalone_file = current_dir / ".voicemode.env"
51
+ if standalone_file.exists():
52
+ project_configs.append(standalone_file)
53
+ break # Stop at first found (closest wins)
54
+
55
+ # Then check .voicemode/voicemode.env
56
+ dir_file = current_dir / ".voicemode" / "voicemode.env"
57
+ # Skip if this is the global config file (already added)
58
+ if dir_file.exists() and dir_file != global_config:
59
+ project_configs.append(dir_file)
60
+ break # Stop at first found (closest wins)
61
+
62
+ current_dir = current_dir.parent
63
+
64
+ # Add project configs (they were collected closest-first, so add as-is)
65
+ config_files.extend(project_configs)
66
+
67
+ return config_files
68
+
69
+
18
70
  def load_voicemode_env():
19
- """Load configuration from voicemode.env file if it exists, creating a default if not."""
20
- # Try new filename first
21
- config_path = Path.home() / ".voicemode" / "voicemode.env"
22
-
23
- # Backwards compatibility: check for old filename if new doesn't exist
24
- if not config_path.exists():
25
- old_path = Path.home() / ".voicemode" / ".voicemode.env"
26
- if old_path.exists():
27
- config_path = old_path
28
- print(f"Warning: Using deprecated .voicemode.env - please rename to voicemode.env")
29
-
30
- if not config_path.exists():
31
- # Create default template
32
- config_path.parent.mkdir(parents=True, exist_ok=True)
71
+ """Load configuration from voicemode.env files, with cascading from global to project-specific."""
72
+ config_files = find_voicemode_env_files()
73
+
74
+ # If no config files found, create default global config
75
+ if not config_files:
76
+ default_path = Path.home() / ".voicemode" / "voicemode.env"
77
+ default_path.parent.mkdir(parents=True, exist_ok=True)
33
78
  default_config = '''# Voice Mode Configuration File
34
79
  # This file is automatically generated and can be customized
35
80
  # Environment variables always take precedence over this file
@@ -66,8 +111,8 @@ def load_voicemode_env():
66
111
  # Comma-separated list of STT endpoints
67
112
  # VOICEMODE_STT_BASE_URLS=http://127.0.0.1:2022/v1,https://api.openai.com/v1
68
113
 
69
- # Comma-separated list of preferred voices
70
- # VOICEMODE_TTS_VOICES=af_sky,alloy
114
+ # Comma-separated list of preferred voices
115
+ # VOICEMODE_VOICES=af_sky,alloy
71
116
 
72
117
  # Comma-separated list of preferred models
73
118
  # VOICEMODE_TTS_MODELS=tts-1,tts-1-hd,gpt-4o-mini-tts
@@ -127,26 +172,28 @@ def load_voicemode_env():
127
172
  # LIVEKIT_API_KEY=devkey
128
173
  # LIVEKIT_API_SECRET=secret
129
174
  '''
130
- with open(config_path, 'w') as f:
175
+ with open(default_path, 'w') as f:
131
176
  f.write(default_config)
132
- os.chmod(config_path, 0o600) # Secure permissions
133
-
134
- # Load configuration from file
135
- if config_path.exists():
136
- with open(config_path, 'r') as f:
137
- for line in f:
138
- line = line.strip()
139
- # Skip comments and empty lines
140
- if not line or line.startswith('#'):
141
- continue
142
- # Parse KEY=VALUE format
143
- if '=' in line:
144
- key, value = line.split('=', 1)
145
- key = key.strip()
146
- value = value.strip()
147
- # Only set if not already in environment
148
- if key and key not in os.environ:
149
- os.environ[key] = value
177
+ os.chmod(default_path, 0o600) # Secure permissions
178
+ config_files = [default_path]
179
+
180
+ # Load configuration from all files in order (global first, project-specific last)
181
+ for config_path in config_files:
182
+ if config_path.exists():
183
+ with open(config_path, 'r') as f:
184
+ for line in f:
185
+ line = line.strip()
186
+ # Skip comments and empty lines
187
+ if not line or line.startswith('#'):
188
+ continue
189
+ # Parse KEY=VALUE format
190
+ if '=' in line:
191
+ key, value = line.split('=', 1)
192
+ key = key.strip()
193
+ value = value.strip()
194
+ # Only set if not already in environment (env vars take precedence)
195
+ if key and key not in os.environ:
196
+ os.environ[key] = value
150
197
 
151
198
  # Load configuration file before other configuration
152
199
  load_voicemode_env()
@@ -222,13 +269,68 @@ def parse_comma_list(env_var: str, fallback: str) -> list:
222
269
  # New provider endpoint lists configuration
223
270
  TTS_BASE_URLS = parse_comma_list("VOICEMODE_TTS_BASE_URLS", "http://127.0.0.1:8880/v1,https://api.openai.com/v1")
224
271
  STT_BASE_URLS = parse_comma_list("VOICEMODE_STT_BASE_URLS", "http://127.0.0.1:2022/v1,https://api.openai.com/v1")
225
- TTS_VOICES = parse_comma_list("VOICEMODE_TTS_VOICES", "af_sky,alloy")
272
+ TTS_VOICES = parse_comma_list("VOICEMODE_VOICES", "af_sky,alloy")
226
273
  TTS_MODELS = parse_comma_list("VOICEMODE_TTS_MODELS", "tts-1,tts-1-hd,gpt-4o-mini-tts")
227
274
 
275
+ # Voice preferences cache
276
+ _cached_voice_preferences: Optional[list] = None
277
+ _voice_preferences_loaded = False
278
+
279
+ def get_voice_preferences() -> list[str]:
280
+ """
281
+ Get voice preferences from configuration.
282
+
283
+ Uses the VOICEMODE_VOICES configuration which is loaded from:
284
+ 1. Environment variables (highest priority)
285
+ 2. Project-specific .voicemode.env files
286
+ 3. Global ~/.voicemode/voicemode.env file
287
+ 4. Built-in defaults
288
+
289
+ Returns:
290
+ List of voice names in preference order
291
+ """
292
+ global _cached_voice_preferences, _voice_preferences_loaded
293
+
294
+ # Return cached preferences if already loaded
295
+ if _voice_preferences_loaded:
296
+ return _cached_voice_preferences or []
297
+
298
+ _voice_preferences_loaded = True
299
+
300
+ # Get voices from TTS_VOICES configuration
301
+ _cached_voice_preferences = TTS_VOICES.copy()
302
+
303
+ logger.info(f"Voice preferences loaded: {_cached_voice_preferences}")
304
+ return _cached_voice_preferences
305
+
306
+ def clear_voice_preferences_cache():
307
+ """Clear the voice preferences cache, forcing a reload on next access."""
308
+ global _cached_voice_preferences, _voice_preferences_loaded
309
+ _cached_voice_preferences = None
310
+ _voice_preferences_loaded = False
311
+ logger.debug("Voice preferences cache cleared")
312
+
313
+ def reload_configuration():
314
+ """Reload configuration from files and clear all caches."""
315
+ # Clear voice preferences cache
316
+ clear_voice_preferences_cache()
317
+
318
+ # Reload environment configuration
319
+ load_voicemode_env()
320
+
321
+ # Update global configuration variables
322
+ global TTS_VOICES, TTS_MODELS, TTS_BASE_URLS, STT_BASE_URLS
323
+ TTS_BASE_URLS = parse_comma_list("VOICEMODE_TTS_BASE_URLS", "http://127.0.0.1:8880/v1,https://api.openai.com/v1")
324
+ STT_BASE_URLS = parse_comma_list("VOICEMODE_STT_BASE_URLS", "http://127.0.0.1:2022/v1,https://api.openai.com/v1")
325
+ TTS_VOICES = parse_comma_list("VOICEMODE_VOICES", "af_sky,alloy")
326
+ TTS_MODELS = parse_comma_list("VOICEMODE_TTS_MODELS", "tts-1,tts-1-hd,gpt-4o-mini-tts")
327
+
328
+ logger.info("Configuration reloaded successfully")
329
+
228
330
  # Legacy variables have been removed - use the new list-based configuration:
229
331
  # - VOICEMODE_TTS_BASE_URLS (comma-separated list)
230
332
  # - VOICEMODE_STT_BASE_URLS (comma-separated list)
231
- # - VOICEMODE_TTS_VOICES (comma-separated list)
333
+ # - VOICEMODE_VOICES (comma-separated list)
232
334
  # - VOICEMODE_TTS_MODELS (comma-separated list)
233
335
 
234
336
  # LiveKit configuration