voice-mode 4.0.1__py3-none-any.whl → 4.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. voice_mode/__version__.py +1 -1
  2. voice_mode/cli.py +73 -0
  3. voice_mode/cli_commands/claude.py +208 -0
  4. voice_mode/cli_commands/hook.py +197 -0
  5. voice_mode/cli_commands/pronounce_commands.py +223 -0
  6. voice_mode/config.py +25 -0
  7. voice_mode/data/default_pronunciation.yaml +268 -0
  8. voice_mode/frontend/.next/BUILD_ID +1 -1
  9. voice_mode/frontend/.next/app-build-manifest.json +5 -5
  10. voice_mode/frontend/.next/build-manifest.json +3 -3
  11. voice_mode/frontend/.next/next-minimal-server.js.nft.json +1 -1
  12. voice_mode/frontend/.next/next-server.js.nft.json +1 -1
  13. voice_mode/frontend/.next/prerender-manifest.json +1 -1
  14. voice_mode/frontend/.next/required-server-files.json +1 -1
  15. voice_mode/frontend/.next/server/app/_not-found/page.js +1 -1
  16. voice_mode/frontend/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
  17. voice_mode/frontend/.next/server/app/_not-found.html +1 -1
  18. voice_mode/frontend/.next/server/app/_not-found.rsc +1 -1
  19. voice_mode/frontend/.next/server/app/api/connection-details/route.js +2 -2
  20. voice_mode/frontend/.next/server/app/favicon.ico/route.js +2 -2
  21. voice_mode/frontend/.next/server/app/index.html +1 -1
  22. voice_mode/frontend/.next/server/app/index.rsc +2 -2
  23. voice_mode/frontend/.next/server/app/page.js +2 -2
  24. voice_mode/frontend/.next/server/app/page_client-reference-manifest.js +1 -1
  25. voice_mode/frontend/.next/server/chunks/994.js +1 -1
  26. voice_mode/frontend/.next/server/middleware-build-manifest.js +1 -1
  27. voice_mode/frontend/.next/server/next-font-manifest.js +1 -1
  28. voice_mode/frontend/.next/server/next-font-manifest.json +1 -1
  29. voice_mode/frontend/.next/server/pages/404.html +1 -1
  30. voice_mode/frontend/.next/server/pages/500.html +1 -1
  31. voice_mode/frontend/.next/server/server-reference-manifest.json +1 -1
  32. voice_mode/frontend/.next/standalone/.next/BUILD_ID +1 -1
  33. voice_mode/frontend/.next/standalone/.next/app-build-manifest.json +5 -5
  34. voice_mode/frontend/.next/standalone/.next/build-manifest.json +3 -3
  35. voice_mode/frontend/.next/standalone/.next/prerender-manifest.json +1 -1
  36. voice_mode/frontend/.next/standalone/.next/required-server-files.json +1 -1
  37. voice_mode/frontend/.next/standalone/.next/server/app/_not-found/page.js +1 -1
  38. voice_mode/frontend/.next/standalone/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
  39. voice_mode/frontend/.next/standalone/.next/server/app/_not-found.html +1 -1
  40. voice_mode/frontend/.next/standalone/.next/server/app/_not-found.rsc +1 -1
  41. voice_mode/frontend/.next/standalone/.next/server/app/api/connection-details/route.js +2 -2
  42. voice_mode/frontend/.next/standalone/.next/server/app/favicon.ico/route.js +2 -2
  43. voice_mode/frontend/.next/standalone/.next/server/app/index.html +1 -1
  44. voice_mode/frontend/.next/standalone/.next/server/app/index.rsc +2 -2
  45. voice_mode/frontend/.next/standalone/.next/server/app/page.js +2 -2
  46. voice_mode/frontend/.next/standalone/.next/server/app/page_client-reference-manifest.js +1 -1
  47. voice_mode/frontend/.next/standalone/.next/server/chunks/994.js +1 -1
  48. voice_mode/frontend/.next/standalone/.next/server/middleware-build-manifest.js +1 -1
  49. voice_mode/frontend/.next/standalone/.next/server/next-font-manifest.js +1 -1
  50. voice_mode/frontend/.next/standalone/.next/server/next-font-manifest.json +1 -1
  51. voice_mode/frontend/.next/standalone/.next/server/pages/404.html +1 -1
  52. voice_mode/frontend/.next/standalone/.next/server/pages/500.html +1 -1
  53. voice_mode/frontend/.next/standalone/.next/server/server-reference-manifest.json +1 -1
  54. voice_mode/frontend/.next/standalone/server.js +1 -1
  55. voice_mode/frontend/.next/static/chunks/app/layout-df4aba5e7309efec.js +1 -0
  56. voice_mode/frontend/.next/static/chunks/app/page-a87d04099ce6aeb2.js +1 -0
  57. voice_mode/frontend/.next/static/chunks/{main-app-233f6c633f73ae84.js → main-app-ca62791c49278d6d.js} +1 -1
  58. voice_mode/frontend/.next/trace +43 -43
  59. voice_mode/frontend/.next/types/app/api/connection-details/route.ts +1 -1
  60. voice_mode/frontend/.next/types/app/layout.ts +1 -1
  61. voice_mode/frontend/.next/types/app/page.ts +1 -1
  62. voice_mode/frontend/package-lock.json +3 -3
  63. voice_mode/pronounce.py +397 -0
  64. voice_mode/tools/claude_thinking.py +285 -0
  65. voice_mode/tools/converse.py +11 -0
  66. voice_mode/tools/pronounce.py +245 -0
  67. voice_mode/tools/sound_fonts/__init__.py +1 -0
  68. voice_mode/tools/sound_fonts/audio_player.py +87 -0
  69. voice_mode/tools/sound_fonts/hook_handler.py +127 -0
  70. voice_mode/tools/sound_fonts/player.py +180 -0
  71. {voice_mode-4.0.1.dist-info → voice_mode-4.2.0.dist-info}/METADATA +1 -1
  72. {voice_mode-4.0.1.dist-info → voice_mode-4.2.0.dist-info}/RECORD +76 -65
  73. voice_mode/frontend/.next/static/chunks/app/layout-0074dd8ab91cdbe0.js +0 -1
  74. voice_mode/frontend/.next/static/chunks/app/page-ae5f3aa9d9ba5993.js +0 -1
  75. /voice_mode/frontend/.next/static/{c5TIe90lGzrESrqJkkXQa → FTYM9NyY_2zq92GYxPDhS}/_buildManifest.js +0 -0
  76. /voice_mode/frontend/.next/static/{c5TIe90lGzrESrqJkkXQa → FTYM9NyY_2zq92GYxPDhS}/_ssgManifest.js +0 -0
  77. {voice_mode-4.0.1.dist-info → voice_mode-4.2.0.dist-info}/WHEEL +0 -0
  78. {voice_mode-4.0.1.dist-info → voice_mode-4.2.0.dist-info}/entry_points.txt +0 -0
@@ -1,4 +1,4 @@
1
- // File: /tmp/build-via-sdist-au03bmmj/voice_mode-4.0.1/voice_mode/frontend/app/api/connection-details/route.ts
1
+ // File: /tmp/build-via-sdist-grc13230/voice_mode-4.2.0/voice_mode/frontend/app/api/connection-details/route.ts
2
2
  import * as entry from '../../../../../app/api/connection-details/route.js'
3
3
  import type { NextRequest } from 'next/server.js'
4
4
 
@@ -1,4 +1,4 @@
1
- // File: /tmp/build-via-sdist-au03bmmj/voice_mode-4.0.1/voice_mode/frontend/app/layout.tsx
1
+ // File: /tmp/build-via-sdist-grc13230/voice_mode-4.2.0/voice_mode/frontend/app/layout.tsx
2
2
  import * as entry from '../../../app/layout.js'
3
3
  import type { ResolvingMetadata, ResolvingViewport } from 'next/dist/lib/metadata/types/metadata-interface.js'
4
4
 
@@ -1,4 +1,4 @@
1
- // File: /tmp/build-via-sdist-au03bmmj/voice_mode-4.0.1/voice_mode/frontend/app/page.tsx
1
+ // File: /tmp/build-via-sdist-grc13230/voice_mode-4.2.0/voice_mode/frontend/app/page.tsx
2
2
  import * as entry from '../../../app/page.js'
3
3
  import type { ResolvingMetadata, ResolvingViewport } from 'next/dist/lib/metadata/types/metadata-interface.js'
4
4
 
@@ -1774,9 +1774,9 @@
1774
1774
  "license": "MIT"
1775
1775
  },
1776
1776
  "node_modules/electron-to-chromium": {
1777
- "version": "1.5.211",
1778
- "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.211.tgz",
1779
- "integrity": "sha512-IGBvimJkotaLzFnwIVgW9/UD/AOJ2tByUmeOrtqBfACSbAw5b1G0XpvdaieKyc7ULmbwXVx+4e4Be8pOPBrYkw==",
1777
+ "version": "1.5.212",
1778
+ "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.212.tgz",
1779
+ "integrity": "sha512-gE7ErIzSW+d8jALWMcOIgf+IB6lpfsg6NwOhPVwKzDtN2qcBix47vlin4yzSregYDxTCXOUqAZjVY/Z3naS7ww==",
1780
1780
  "dev": true,
1781
1781
  "license": "ISC"
1782
1782
  },
@@ -0,0 +1,397 @@
1
+ """
2
+ Pronunciation middleware for TTS and STT text processing.
3
+
4
+ This module provides regex-based text substitutions to improve TTS pronunciation
5
+ and correct STT transcription errors.
6
+ """
7
+
8
+ import logging
9
+ import re
10
+ from pathlib import Path
11
+ from typing import Dict, List, Optional, Tuple
12
+ import yaml
13
+ from dataclasses import dataclass, field
14
+ import os
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @dataclass
20
+ class PronounceRule:
21
+ """A single pronunciation rule."""
22
+ name: str
23
+ pattern: str
24
+ replacement: str
25
+ order: int = 100
26
+ enabled: bool = True
27
+ description: str = ""
28
+ private: bool = True # Default to private for security
29
+ _compiled: Optional[re.Pattern] = field(default=None, init=False, repr=False)
30
+
31
+ def __post_init__(self):
32
+ """Compile the regex pattern after initialization."""
33
+ try:
34
+ self._compiled = re.compile(self.pattern)
35
+ except re.error as e:
36
+ logger.error(f"Invalid regex pattern in rule '{self.name}': {e}")
37
+ self._compiled = None
38
+
39
+ def apply(self, text: str) -> Tuple[str, bool]:
40
+ """Apply this rule to text. Returns (modified_text, was_applied)."""
41
+ if not self.enabled or not self._compiled:
42
+ return text, False
43
+
44
+ original = text
45
+ try:
46
+ text = self._compiled.sub(self.replacement, text)
47
+ return text, text != original
48
+ except Exception as e:
49
+ logger.error(f"Error applying rule '{self.name}': {e}")
50
+ return original, False
51
+
52
+
53
+ class PronounceManager:
54
+ """Manages pronunciation rules for TTS and STT corrections."""
55
+
56
+ def __init__(self, config_paths: Optional[List[Path]] = None):
57
+ """
58
+ Initialize the pronunciation rule manager.
59
+
60
+ Args:
61
+ config_paths: List of config file paths. If None, uses default locations.
62
+ """
63
+ self.rules: Dict[str, List[PronounceRule]] = {
64
+ 'tts': [],
65
+ 'stt': []
66
+ }
67
+ self.config_paths = config_paths or self._get_default_config_paths()
68
+ self._load_all_rules()
69
+
70
+ def _get_default_config_paths(self) -> List[Path]:
71
+ """Get default configuration file paths."""
72
+ paths = []
73
+
74
+ # System defaults
75
+ default_path = Path(__file__).parent / 'data' / 'default_pronunciation.yaml'
76
+ if default_path.exists():
77
+ paths.append(default_path)
78
+
79
+ # User config
80
+ user_config = Path.home() / '.voicemode' / 'config' / 'pronunciation.yaml'
81
+ if user_config.exists():
82
+ paths.append(user_config)
83
+
84
+ # Project config (like Claude Code hooks)
85
+ project_config = Path.cwd() / '.pronunciation.yaml'
86
+ if project_config.exists():
87
+ paths.append(project_config)
88
+
89
+ # Environment variable paths
90
+ env_paths = os.environ.get('VOICEMODE_PRONUNCIATION_CONFIG', '')
91
+ if env_paths:
92
+ for path_str in env_paths.split(':'):
93
+ path = Path(path_str).expanduser()
94
+ if path.exists():
95
+ paths.append(path)
96
+
97
+ return paths
98
+
99
+ def _load_all_rules(self):
100
+ """Load rules from all configured paths."""
101
+ self.rules = {'tts': [], 'stt': []}
102
+
103
+ for config_path in self.config_paths:
104
+ try:
105
+ self._load_rules_from_file(config_path)
106
+ logger.info(f"Loaded pronunciation rules from {config_path}")
107
+ except Exception as e:
108
+ logger.error(f"Failed to load rules from {config_path}: {e}")
109
+
110
+ def _load_rules_from_file(self, config_path: Path):
111
+ """Load rules from a single YAML file."""
112
+ with open(config_path, 'r') as f:
113
+ config = yaml.safe_load(f)
114
+
115
+ if not config:
116
+ return
117
+
118
+ # Load TTS rules
119
+ for rule_dict in config.get('tts_rules', []):
120
+ rule = self._dict_to_rule(rule_dict)
121
+ if rule:
122
+ # Check for duplicate names and override
123
+ self.rules['tts'] = [r for r in self.rules['tts'] if r.name != rule.name]
124
+ self.rules['tts'].append(rule)
125
+
126
+ # Load STT rules
127
+ for rule_dict in config.get('stt_rules', []):
128
+ rule = self._dict_to_rule(rule_dict)
129
+ if rule:
130
+ # Check for duplicate names and override
131
+ self.rules['stt'] = [r for r in self.rules['stt'] if r.name != rule.name]
132
+ self.rules['stt'].append(rule)
133
+
134
+ # Sort rules by order
135
+ self.rules['tts'].sort(key=lambda r: r.order)
136
+ self.rules['stt'].sort(key=lambda r: r.order)
137
+
138
+ def _dict_to_rule(self, rule_dict: dict) -> Optional[PronounceRule]:
139
+ """Convert a dictionary to a PronounceRule."""
140
+ try:
141
+ return PronounceRule(
142
+ name=rule_dict['name'],
143
+ pattern=rule_dict['pattern'],
144
+ replacement=rule_dict['replacement'],
145
+ order=rule_dict.get('order', 100),
146
+ enabled=rule_dict.get('enabled', True),
147
+ description=rule_dict.get('description', ''),
148
+ private=rule_dict.get('private', True) # Default to private
149
+ )
150
+ except (KeyError, TypeError) as e:
151
+ logger.error(f"Invalid rule configuration: {e}")
152
+ return None
153
+
154
+ def process_tts(self, text: str) -> str:
155
+ """
156
+ Apply TTS substitutions before speech generation.
157
+
158
+ Args:
159
+ text: Text to be spoken by TTS
160
+
161
+ Returns:
162
+ Modified text with pronunciation improvements
163
+ """
164
+ log_substitutions = os.environ.get('VOICEMODE_PRONUNCIATION_LOG_SUBSTITUTIONS', '').lower() == 'true'
165
+
166
+ for rule in self.rules['tts']:
167
+ original = text
168
+ text, applied = rule.apply(text)
169
+ if applied and log_substitutions:
170
+ logger.info(f"Pronunciation TTS: Applied rule '{rule.name}': \"{original}\" → \"{text}\"")
171
+
172
+ return text
173
+
174
+ def process_stt(self, text: str) -> str:
175
+ """
176
+ Apply STT corrections after transcription.
177
+
178
+ Args:
179
+ text: Text transcribed from speech
180
+
181
+ Returns:
182
+ Corrected text
183
+ """
184
+ log_substitutions = os.environ.get('VOICEMODE_PRONUNCIATION_LOG_SUBSTITUTIONS', '').lower() == 'true'
185
+
186
+ for rule in self.rules['stt']:
187
+ original = text
188
+ text, applied = rule.apply(text)
189
+ if applied and log_substitutions:
190
+ logger.info(f"Pronunciation STT: Applied rule '{rule.name}': \"{original}\" → \"{text}\"")
191
+
192
+ return text
193
+
194
+ # CRUD Operations
195
+ def add_rule(self, direction: str, pattern: str, replacement: str,
196
+ name: Optional[str] = None, description: str = "",
197
+ enabled: bool = True, order: int = 100,
198
+ private: bool = False) -> bool:
199
+ """
200
+ Add a new pronunciation rule.
201
+
202
+ Args:
203
+ direction: 'tts' or 'stt'
204
+ pattern: Regex pattern to match
205
+ replacement: Replacement text
206
+ name: Rule name (auto-generated if not provided)
207
+ description: Human-readable description
208
+ enabled: Whether rule is active
209
+ order: Processing order
210
+ private: Whether rule is hidden from LLM
211
+
212
+ Returns:
213
+ True if rule was added successfully
214
+ """
215
+ if direction not in ['tts', 'stt']:
216
+ logger.error(f"Invalid direction: {direction}")
217
+ return False
218
+
219
+ # Auto-generate name if not provided
220
+ if not name:
221
+ name = f"{direction}_rule_{len(self.rules[direction])}"
222
+
223
+ # Check for duplicate names
224
+ if any(r.name == name for r in self.rules[direction]):
225
+ logger.error(f"Rule with name '{name}' already exists")
226
+ return False
227
+
228
+ rule = PronounceRule(
229
+ name=name,
230
+ pattern=pattern,
231
+ replacement=replacement,
232
+ order=order,
233
+ enabled=enabled,
234
+ description=description,
235
+ private=private
236
+ )
237
+
238
+ if not rule._compiled:
239
+ return False
240
+
241
+ self.rules[direction].append(rule)
242
+ self.rules[direction].sort(key=lambda r: r.order)
243
+
244
+ # Save to user config
245
+ self._save_user_rules()
246
+ return True
247
+
248
+ def remove_rule(self, direction: str, name: str) -> bool:
249
+ """Remove a pronunciation rule by name."""
250
+ if direction not in ['tts', 'stt']:
251
+ return False
252
+
253
+ original_count = len(self.rules[direction])
254
+ self.rules[direction] = [r for r in self.rules[direction] if r.name != name]
255
+
256
+ if len(self.rules[direction]) < original_count:
257
+ self._save_user_rules()
258
+ return True
259
+ return False
260
+
261
+ def list_rules(self, direction: Optional[str] = None,
262
+ include_private: bool = False) -> List[dict]:
263
+ """
264
+ List all rules or rules for specific direction.
265
+
266
+ Args:
267
+ direction: 'tts', 'stt', or None for all
268
+ include_private: Whether to include private rules (for CLI, not MCP)
269
+
270
+ Returns:
271
+ List of rule dictionaries
272
+ """
273
+ rules = []
274
+
275
+ directions = [direction] if direction else ['tts', 'stt']
276
+
277
+ for dir in directions:
278
+ if dir not in self.rules:
279
+ continue
280
+
281
+ for rule in self.rules[dir]:
282
+ # Skip private rules unless explicitly requested
283
+ if rule.private and not include_private:
284
+ continue
285
+
286
+ rules.append({
287
+ 'direction': dir,
288
+ 'name': rule.name,
289
+ 'pattern': rule.pattern,
290
+ 'replacement': rule.replacement,
291
+ 'order': rule.order,
292
+ 'enabled': rule.enabled,
293
+ 'description': rule.description,
294
+ 'private': rule.private
295
+ })
296
+
297
+ return rules
298
+
299
+ def enable_rule(self, direction: str, name: str) -> bool:
300
+ """Enable a specific rule."""
301
+ if direction not in ['tts', 'stt']:
302
+ return False
303
+
304
+ for rule in self.rules[direction]:
305
+ if rule.name == name:
306
+ if rule.private:
307
+ logger.warning(f"Cannot enable private rule '{name}' via API")
308
+ return False
309
+ rule.enabled = True
310
+ self._save_user_rules()
311
+ return True
312
+ return False
313
+
314
+ def disable_rule(self, direction: str, name: str) -> bool:
315
+ """Disable a specific rule."""
316
+ if direction not in ['tts', 'stt']:
317
+ return False
318
+
319
+ for rule in self.rules[direction]:
320
+ if rule.name == name:
321
+ if rule.private:
322
+ logger.warning(f"Cannot disable private rule '{name}' via API")
323
+ return False
324
+ rule.enabled = False
325
+ self._save_user_rules()
326
+ return True
327
+ return False
328
+
329
+ def test_rule(self, text: str, direction: str = "tts") -> str:
330
+ """Test what a text would become after applying rules."""
331
+ if direction == 'tts':
332
+ return self.process_tts(text)
333
+ elif direction == 'stt':
334
+ return self.process_stt(text)
335
+ else:
336
+ return text
337
+
338
+ def reload_rules(self):
339
+ """Reload all rules from configuration files."""
340
+ self._load_all_rules()
341
+ logger.info("Reloaded pronunciation rules")
342
+
343
+ def _save_user_rules(self):
344
+ """Save current rules to user config file."""
345
+ user_config = Path.home() / '.voicemode' / 'config' / 'pronunciation.yaml'
346
+ user_config.parent.mkdir(parents=True, exist_ok=True)
347
+
348
+ # Only save non-default rules
349
+ config = {
350
+ 'version': 1,
351
+ 'tts_rules': [],
352
+ 'stt_rules': []
353
+ }
354
+
355
+ for rule in self.rules['tts']:
356
+ config['tts_rules'].append({
357
+ 'name': rule.name,
358
+ 'order': rule.order,
359
+ 'pattern': rule.pattern,
360
+ 'replacement': rule.replacement,
361
+ 'enabled': rule.enabled,
362
+ 'description': rule.description,
363
+ 'private': rule.private
364
+ })
365
+
366
+ for rule in self.rules['stt']:
367
+ config['stt_rules'].append({
368
+ 'name': rule.name,
369
+ 'order': rule.order,
370
+ 'pattern': rule.pattern,
371
+ 'replacement': rule.replacement,
372
+ 'enabled': rule.enabled,
373
+ 'description': rule.description,
374
+ 'private': rule.private
375
+ })
376
+
377
+ with open(user_config, 'w') as f:
378
+ yaml.safe_dump(config, f, default_flow_style=False, sort_keys=False)
379
+
380
+ logger.info(f"Saved pronunciation rules to {user_config}")
381
+
382
+
383
+ # Global instance (lazy loaded)
384
+ _manager: Optional[PronounceManager] = None
385
+
386
+
387
+ def get_manager() -> PronounceManager:
388
+ """Get or create the global pronunciation manager."""
389
+ global _manager
390
+ if _manager is None:
391
+ _manager = PronounceManager()
392
+ return _manager
393
+
394
+
395
+ def is_enabled() -> bool:
396
+ """Check if pronunciation middleware is enabled."""
397
+ return os.environ.get('VOICEMODE_PRONUNCIATION_ENABLED', 'true').lower() == 'true'