voice-mode 4.0.1__py3-none-any.whl → 4.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. voice_mode/__version__.py +1 -1
  2. voice_mode/cli.py +73 -0
  3. voice_mode/cli_commands/claude.py +208 -0
  4. voice_mode/cli_commands/hook.py +197 -0
  5. voice_mode/cli_commands/pronounce_commands.py +223 -0
  6. voice_mode/config.py +25 -0
  7. voice_mode/data/default_pronunciation.yaml +268 -0
  8. voice_mode/frontend/.next/BUILD_ID +1 -1
  9. voice_mode/frontend/.next/app-build-manifest.json +5 -5
  10. voice_mode/frontend/.next/build-manifest.json +3 -3
  11. voice_mode/frontend/.next/next-minimal-server.js.nft.json +1 -1
  12. voice_mode/frontend/.next/next-server.js.nft.json +1 -1
  13. voice_mode/frontend/.next/prerender-manifest.json +1 -1
  14. voice_mode/frontend/.next/required-server-files.json +1 -1
  15. voice_mode/frontend/.next/server/app/_not-found/page.js +1 -1
  16. voice_mode/frontend/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
  17. voice_mode/frontend/.next/server/app/_not-found.html +1 -1
  18. voice_mode/frontend/.next/server/app/_not-found.rsc +1 -1
  19. voice_mode/frontend/.next/server/app/api/connection-details/route.js +2 -2
  20. voice_mode/frontend/.next/server/app/favicon.ico/route.js +2 -2
  21. voice_mode/frontend/.next/server/app/index.html +1 -1
  22. voice_mode/frontend/.next/server/app/index.rsc +2 -2
  23. voice_mode/frontend/.next/server/app/page.js +2 -2
  24. voice_mode/frontend/.next/server/app/page_client-reference-manifest.js +1 -1
  25. voice_mode/frontend/.next/server/chunks/994.js +1 -1
  26. voice_mode/frontend/.next/server/middleware-build-manifest.js +1 -1
  27. voice_mode/frontend/.next/server/next-font-manifest.js +1 -1
  28. voice_mode/frontend/.next/server/next-font-manifest.json +1 -1
  29. voice_mode/frontend/.next/server/pages/404.html +1 -1
  30. voice_mode/frontend/.next/server/pages/500.html +1 -1
  31. voice_mode/frontend/.next/server/server-reference-manifest.json +1 -1
  32. voice_mode/frontend/.next/standalone/.next/BUILD_ID +1 -1
  33. voice_mode/frontend/.next/standalone/.next/app-build-manifest.json +5 -5
  34. voice_mode/frontend/.next/standalone/.next/build-manifest.json +3 -3
  35. voice_mode/frontend/.next/standalone/.next/prerender-manifest.json +1 -1
  36. voice_mode/frontend/.next/standalone/.next/required-server-files.json +1 -1
  37. voice_mode/frontend/.next/standalone/.next/server/app/_not-found/page.js +1 -1
  38. voice_mode/frontend/.next/standalone/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
  39. voice_mode/frontend/.next/standalone/.next/server/app/_not-found.html +1 -1
  40. voice_mode/frontend/.next/standalone/.next/server/app/_not-found.rsc +1 -1
  41. voice_mode/frontend/.next/standalone/.next/server/app/api/connection-details/route.js +2 -2
  42. voice_mode/frontend/.next/standalone/.next/server/app/favicon.ico/route.js +2 -2
  43. voice_mode/frontend/.next/standalone/.next/server/app/index.html +1 -1
  44. voice_mode/frontend/.next/standalone/.next/server/app/index.rsc +2 -2
  45. voice_mode/frontend/.next/standalone/.next/server/app/page.js +2 -2
  46. voice_mode/frontend/.next/standalone/.next/server/app/page_client-reference-manifest.js +1 -1
  47. voice_mode/frontend/.next/standalone/.next/server/chunks/994.js +1 -1
  48. voice_mode/frontend/.next/standalone/.next/server/middleware-build-manifest.js +1 -1
  49. voice_mode/frontend/.next/standalone/.next/server/next-font-manifest.js +1 -1
  50. voice_mode/frontend/.next/standalone/.next/server/next-font-manifest.json +1 -1
  51. voice_mode/frontend/.next/standalone/.next/server/pages/404.html +1 -1
  52. voice_mode/frontend/.next/standalone/.next/server/pages/500.html +1 -1
  53. voice_mode/frontend/.next/standalone/.next/server/server-reference-manifest.json +1 -1
  54. voice_mode/frontend/.next/standalone/server.js +1 -1
  55. voice_mode/frontend/.next/static/chunks/app/layout-df4aba5e7309efec.js +1 -0
  56. voice_mode/frontend/.next/static/chunks/app/page-a87d04099ce6aeb2.js +1 -0
  57. voice_mode/frontend/.next/static/chunks/{main-app-233f6c633f73ae84.js → main-app-ca62791c49278d6d.js} +1 -1
  58. voice_mode/frontend/.next/trace +43 -43
  59. voice_mode/frontend/.next/types/app/api/connection-details/route.ts +1 -1
  60. voice_mode/frontend/.next/types/app/layout.ts +1 -1
  61. voice_mode/frontend/.next/types/app/page.ts +1 -1
  62. voice_mode/frontend/package-lock.json +3 -3
  63. voice_mode/pronounce.py +397 -0
  64. voice_mode/tools/claude_thinking.py +285 -0
  65. voice_mode/tools/converse.py +11 -0
  66. voice_mode/tools/pronounce.py +245 -0
  67. voice_mode/tools/sound_fonts/__init__.py +1 -0
  68. voice_mode/tools/sound_fonts/audio_player.py +87 -0
  69. voice_mode/tools/sound_fonts/hook_handler.py +127 -0
  70. voice_mode/tools/sound_fonts/player.py +180 -0
  71. {voice_mode-4.0.1.dist-info → voice_mode-4.2.0.dist-info}/METADATA +1 -1
  72. {voice_mode-4.0.1.dist-info → voice_mode-4.2.0.dist-info}/RECORD +76 -65
  73. voice_mode/frontend/.next/static/chunks/app/layout-0074dd8ab91cdbe0.js +0 -1
  74. voice_mode/frontend/.next/static/chunks/app/page-ae5f3aa9d9ba5993.js +0 -1
  75. /voice_mode/frontend/.next/static/{c5TIe90lGzrESrqJkkXQa → FTYM9NyY_2zq92GYxPDhS}/_buildManifest.js +0 -0
  76. /voice_mode/frontend/.next/static/{c5TIe90lGzrESrqJkkXQa → FTYM9NyY_2zq92GYxPDhS}/_ssgManifest.js +0 -0
  77. {voice_mode-4.0.1.dist-info → voice_mode-4.2.0.dist-info}/WHEEL +0 -0
  78. {voice_mode-4.0.1.dist-info → voice_mode-4.2.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,285 @@
1
+ """Claude Code message extraction tools for Think Out Loud mode and conversation analysis."""
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ from pathlib import Path
7
+ from typing import Optional, List, Dict, Any
8
+ from datetime import datetime
9
+
10
+ from voice_mode.server import mcp
11
+ from voice_mode.config import THINK_OUT_LOUD_ENABLED
12
+
13
+ logger = logging.getLogger("voice-mode")
14
+
15
+
16
+ def find_claude_log_file(working_dir: Optional[str] = None) -> Optional[Path]:
17
+ """Find the current Claude Code conversation log file.
18
+
19
+ Args:
20
+ working_dir: The working directory (defaults to CWD)
21
+
22
+ Returns:
23
+ Path to the most recent JSONL log file, or None if not found
24
+ """
25
+ if working_dir is None:
26
+ working_dir = os.getcwd()
27
+
28
+ logger.debug(f"Looking for Claude logs in working_dir: {working_dir}")
29
+
30
+ # Transform path: /Users/admin/Code/github.com/project → -Users-admin-Code-github-com-project
31
+ # Note: Both slashes and dots are replaced with hyphens
32
+ project_dir = working_dir.replace('/', '-').replace('.', '-')
33
+ logger.debug(f"Transformed project dir: {project_dir}")
34
+
35
+ # Build path to Claude logs
36
+ claude_base = Path.home() / '.claude' / 'projects'
37
+ log_dir = claude_base / project_dir
38
+ logger.debug(f"Claude log directory: {log_dir}")
39
+
40
+ if not log_dir.exists():
41
+ logger.warning(f"Claude log directory does not exist: {log_dir}")
42
+ return None
43
+
44
+ # Find most recent .jsonl file
45
+ log_files = sorted(
46
+ log_dir.glob('*.jsonl'),
47
+ key=lambda p: p.stat().st_mtime,
48
+ reverse=True
49
+ )
50
+
51
+ if log_files:
52
+ logger.info(f"Found {len(log_files)} Claude log files, using most recent: {log_files[0].name}")
53
+ return log_files[0]
54
+ else:
55
+ logger.warning(f"No .jsonl files found in {log_dir}")
56
+ return None
57
+
58
+
59
+ def extract_messages_from_log(log_file: Path, last_n: int = 2, message_types: Optional[List[str]] = None) -> List[Dict[str, Any]]:
60
+ """Extract messages from Claude Code JSONL log.
61
+
62
+ Args:
63
+ log_file: Path to the JSONL log file
64
+ last_n: Number of most recent messages to return (default: 2)
65
+ message_types: Optional list of message types to filter ('user', 'assistant', 'system')
66
+ If None, returns all message types
67
+
68
+ Returns:
69
+ List of messages with metadata
70
+ """
71
+ logger.debug(f"Extracting {last_n} messages from {log_file}, types={message_types}")
72
+ messages = []
73
+
74
+ try:
75
+ with open(log_file, 'r') as f:
76
+ lines = f.readlines()
77
+
78
+ logger.debug(f"Read {len(lines)} lines from log file")
79
+
80
+ # Process lines in reverse to get most recent first
81
+ for i, line in enumerate(reversed(lines)):
82
+ if not line.strip():
83
+ continue
84
+
85
+ try:
86
+ entry = json.loads(line)
87
+ entry_type = entry.get('type')
88
+
89
+ # Filter by message type if specified
90
+ if message_types and entry_type not in message_types:
91
+ continue
92
+
93
+ # Extract user or assistant messages
94
+ if entry_type in ['user', 'assistant']:
95
+ logger.debug(f"Found {entry_type} message at line {len(lines) - i}")
96
+ message = entry.get('message', {})
97
+
98
+ # Build message info
99
+ message_info = {
100
+ 'type': entry_type,
101
+ 'role': message.get('role'),
102
+ 'content': message.get('content', []),
103
+ 'timestamp': entry.get('timestamp'),
104
+ 'uuid': entry.get('uuid'),
105
+ 'model': message.get('model') if entry_type == 'assistant' else None
106
+ }
107
+
108
+ # Add usage info for assistant messages
109
+ if entry_type == 'assistant' and 'usage' in message:
110
+ message_info['usage'] = message['usage']
111
+
112
+ messages.append(message_info)
113
+
114
+ if len(messages) >= last_n:
115
+ logger.info(f"Extracted {len(messages)} messages successfully")
116
+ return messages
117
+
118
+ except json.JSONDecodeError as e:
119
+ logger.debug(f"Skipping invalid JSON at line {len(lines) - i}: {e}")
120
+ continue
121
+
122
+ except Exception as e:
123
+ logger.error(f"Error reading log file {log_file}: {e}")
124
+
125
+ logger.info(f"Extracted {len(messages)} messages (requested {last_n})")
126
+ return messages
127
+
128
+
129
+ def extract_thinking_from_messages(messages: List[Dict[str, Any]]) -> List[str]:
130
+ """Extract thinking content from a list of messages.
131
+
132
+ Args:
133
+ messages: List of message dictionaries
134
+
135
+ Returns:
136
+ List of thinking text strings
137
+ """
138
+ thinking_texts = []
139
+
140
+ for message in messages:
141
+ if message.get('type') == 'assistant':
142
+ content = message.get('content', [])
143
+ for item in content:
144
+ if isinstance(item, dict) and item.get('type') == 'thinking':
145
+ text = item.get('text', '').strip()
146
+ if text:
147
+ thinking_texts.append(text)
148
+
149
+ return thinking_texts
150
+
151
+
152
+ @mcp.tool
153
+ def get_claude_messages(
154
+ last_n: int = 2,
155
+ working_dir: Optional[str] = None,
156
+ message_types: Optional[List[str]] = None,
157
+ format: str = "full"
158
+ ) -> str:
159
+ """Extract messages from Claude Code conversation logs.
160
+
161
+ This tool reads Claude Code's conversation logs to extract recent messages
162
+ for Think Out Loud mode and conversation analysis.
163
+
164
+ Args:
165
+ last_n: Number of most recent messages to return (default: 2)
166
+ working_dir: Working directory to find logs for (defaults to CWD)
167
+ message_types: Optional list to filter by type ('user', 'assistant').
168
+ If None, returns all types.
169
+ format: Output format - 'full' (complete message), 'text' (just text content),
170
+ 'thinking' (just thinking content)
171
+
172
+ Returns:
173
+ The extracted messages in the requested format
174
+ """
175
+ logger.debug(f"get_claude_messages called: last_n={last_n}, working_dir={working_dir}, types={message_types}, format={format}")
176
+
177
+ # Check if Think Out Loud mode is enabled
178
+ if not THINK_OUT_LOUD_ENABLED:
179
+ logger.warning("Think Out Loud mode is not enabled")
180
+ return "Think Out Loud mode is not enabled. Set VOICEMODE_THINK_OUT_LOUD=true to enable."
181
+
182
+ # Find the log file
183
+ log_file = find_claude_log_file(working_dir)
184
+ if not log_file:
185
+ return f"Could not find Claude Code logs for directory: {working_dir or os.getcwd()}"
186
+
187
+ # Extract messages
188
+ messages = extract_messages_from_log(log_file, last_n, message_types)
189
+
190
+ if not messages:
191
+ return f"No messages found in recent Claude Code logs."
192
+
193
+ # Format output based on requested format
194
+ if format == "thinking":
195
+ # Extract only thinking content
196
+ thinking_texts = extract_thinking_from_messages(messages)
197
+ if not thinking_texts:
198
+ return "No thinking content found in recent messages."
199
+ if len(thinking_texts) == 1:
200
+ return thinking_texts[0]
201
+ return "\n\n=== Next Thinking ===\n\n".join(thinking_texts)
202
+
203
+ elif format == "text":
204
+ # Extract just the text content
205
+ result = []
206
+ for msg in messages:
207
+ content_text = []
208
+ for item in msg.get('content', []):
209
+ if isinstance(item, dict):
210
+ if item.get('type') == 'text':
211
+ content_text.append(item.get('text', ''))
212
+ elif item.get('type') == 'thinking':
213
+ content_text.append(f"[Thinking: {item.get('text', '')}]")
214
+ if content_text:
215
+ result.append(f"{msg['type'].title()}: {' '.join(content_text)}")
216
+ return "\n\n".join(result)
217
+
218
+ else: # format == "full"
219
+ # Return complete message structure
220
+ result = []
221
+ for i, msg in enumerate(messages, 1):
222
+ result.append(f"=== Message {i} ===")
223
+ result.append(f"Type: {msg['type']}")
224
+ result.append(f"Timestamp: {msg.get('timestamp', 'Unknown')}")
225
+ if msg.get('model'):
226
+ result.append(f"Model: {msg['model']}")
227
+
228
+ # Format content
229
+ content = msg.get('content', [])
230
+ if content:
231
+ result.append("Content:")
232
+ for item in content:
233
+ if isinstance(item, dict):
234
+ item_type = item.get('type', 'unknown')
235
+ if item_type == 'text':
236
+ result.append(f" [Text]: {item.get('text', '')}")
237
+ elif item_type == 'thinking':
238
+ result.append(f" [Thinking]: {item.get('text', '')}")
239
+ elif item_type == 'tool_use':
240
+ result.append(f" [Tool Use]: {item.get('name', '')}")
241
+ elif item_type == 'tool_result':
242
+ result.append(f" [Tool Result]: {item.get('content', '')[:100]}...")
243
+ result.append("")
244
+
245
+ return "\n".join(result).strip()
246
+
247
+
248
+ @mcp.tool
249
+ def check_claude_context() -> str:
250
+ """Check if running in Claude Code context.
251
+
252
+ Returns information about the Claude Code environment including:
253
+ - Whether Claude Code logs are accessible
254
+ - Current working directory
255
+ - Log file location if found
256
+ """
257
+ working_dir = os.getcwd()
258
+ log_file = find_claude_log_file(working_dir)
259
+
260
+ result = []
261
+ result.append(f"Working Directory: {working_dir}")
262
+ result.append(f"Claude Logs Found: {'Yes' if log_file else 'No'}")
263
+
264
+ if log_file:
265
+ result.append(f"Log File: {log_file}")
266
+ result.append(f"Log Size: {log_file.stat().st_size:,} bytes")
267
+
268
+ # Check for recent activity
269
+ mtime = datetime.fromtimestamp(log_file.stat().st_mtime)
270
+ now = datetime.now()
271
+ age = now - mtime
272
+
273
+ if age.total_seconds() < 60:
274
+ result.append(f"Last Updated: {int(age.total_seconds())} seconds ago")
275
+ elif age.total_seconds() < 3600:
276
+ result.append(f"Last Updated: {int(age.total_seconds() / 60)} minutes ago")
277
+ else:
278
+ result.append(f"Last Updated: {int(age.total_seconds() / 3600)} hours ago")
279
+ else:
280
+ project_dir = working_dir.replace('/', '-')
281
+ expected_path = Path.home() / '.claude' / 'projects' / project_dir
282
+ result.append(f"Expected Log Location: {expected_path}")
283
+ result.append("Note: Logs are only created when using Claude Code (claude.ai/code)")
284
+
285
+ return "\n".join(result)
@@ -85,6 +85,7 @@ from voice_mode.utils import (
85
85
  log_tool_request_start,
86
86
  log_tool_request_end
87
87
  )
88
+ from voice_mode.pronounce import get_manager as get_pronounce_manager, is_enabled as pronounce_enabled
88
89
 
89
90
  logger = logging.getLogger("voice-mode")
90
91
 
@@ -255,6 +256,11 @@ async def text_to_speech_with_failover(
255
256
  """
256
257
  from voice_mode.config import SIMPLE_FAILOVER
257
258
 
259
+ # Apply pronunciation rules if enabled
260
+ if pronounce_enabled():
261
+ pronounce_mgr = get_pronounce_manager()
262
+ message = pronounce_mgr.process_tts(message)
263
+
258
264
  # Use simple failover if enabled
259
265
  if SIMPLE_FAILOVER:
260
266
  from voice_mode.simple_failover import simple_tts_failover
@@ -695,6 +701,11 @@ async def _speech_to_text_internal(
695
701
  logger.debug(f"STT API response type: {type(transcription)}")
696
702
  text = transcription.strip() if isinstance(transcription, str) else transcription.text.strip()
697
703
 
704
+ # Apply pronunciation rules if enabled
705
+ if text and pronounce_enabled():
706
+ pronounce_mgr = get_pronounce_manager()
707
+ text = pronounce_mgr.process_stt(text)
708
+
698
709
  if text:
699
710
  logger.info(f"✓ STT result: '{text}'")
700
711
 
@@ -0,0 +1,245 @@
1
+ """MCP tools for managing pronunciation rules."""
2
+
3
+ import json
4
+ import yaml
5
+ from typing import Optional, Literal, List, Dict
6
+
7
+ from voice_mode.server import mcp
8
+ from voice_mode.pronounce import get_manager, is_enabled
9
+
10
+
11
+ @mcp.tool()
12
+ async def pronounce(
13
+ action: Literal["list", "add", "remove", "enable", "disable", "test", "reload"],
14
+ pattern: Optional[str] = None,
15
+ replacement: Optional[str] = None,
16
+ rule_type: Literal["tts", "stt"] = "tts",
17
+ description: Optional[str] = None,
18
+ name: Optional[str] = None,
19
+ test_text: Optional[str] = None
20
+ ) -> str:
21
+ """
22
+ Manage pronunciation rules for TTS/STT text processing.
23
+
24
+ This tool allows managing pronunciation rules that improve TTS pronunciation
25
+ and correct STT transcription errors. Rules are applied automatically when
26
+ text is processed.
27
+
28
+ Actions:
29
+ - list: Show all non-private rules (returns count of private rules)
30
+ - add: Add a new rule (requires pattern, replacement, rule_type)
31
+ - remove: Remove a rule by name (requires name, rule_type)
32
+ - enable: Enable a disabled rule (requires name, rule_type)
33
+ - disable: Disable an enabled rule (requires name, rule_type)
34
+ - test: Test rules on text (requires test_text, rule_type)
35
+ - reload: Reload rules from configuration files
36
+
37
+ Examples:
38
+ - List all TTS rules:
39
+ pronunciation_rules(action="list", rule_type="tts")
40
+
41
+ - Add a rule to pronounce "3M" correctly:
42
+ pronunciation_rules(
43
+ action="add",
44
+ pattern=r"\b3M\b",
45
+ replacement="three em",
46
+ rule_type="tts",
47
+ description="Pronounce 3M company name"
48
+ )
49
+
50
+ - Test how text would be pronounced:
51
+ pronunciation_rules(
52
+ action="test",
53
+ test_text="I work at 3M",
54
+ rule_type="tts"
55
+ )
56
+
57
+ - Correct common Whisper mishearing:
58
+ pronunciation_rules(
59
+ action="add",
60
+ pattern="me tool",
61
+ replacement="metool",
62
+ rule_type="stt",
63
+ description="Correct 'me tool' to 'metool'"
64
+ )
65
+
66
+ Args:
67
+ action: The action to perform
68
+ pattern: Regex pattern for add action
69
+ replacement: Replacement text for add action
70
+ rule_type: Type of rule (tts for text-to-speech, stt for speech-to-text)
71
+ description: Human-readable description for add action
72
+ name: Rule name for remove/enable/disable actions
73
+ test_text: Text to test for test action
74
+
75
+ Returns:
76
+ Result of the action as a formatted string
77
+ """
78
+ manager = get_manager()
79
+
80
+ if action == "list":
81
+ # List rules (excluding private ones)
82
+ all_rules = manager.list_rules(include_private=True)
83
+ public_rules = manager.list_rules(include_private=False)
84
+
85
+ # Filter by type if specified
86
+ if rule_type:
87
+ public_rules = [r for r in public_rules if r['direction'] == rule_type]
88
+ all_rules = [r for r in all_rules if r['direction'] == rule_type]
89
+
90
+ # Format the response
91
+ if not public_rules:
92
+ private_count = len(all_rules)
93
+ if private_count > 0:
94
+ return f"No public {rule_type} rules found. ({private_count} private rules hidden)"
95
+ else:
96
+ return f"No {rule_type} rules found."
97
+
98
+ # Build response
99
+ result = f"Pronunciation Rules ({rule_type.upper()}):\n\n"
100
+
101
+ for rule in public_rules:
102
+ status = "✓" if rule['enabled'] else "✗"
103
+ result += f"{status} {rule['name']}: \n"
104
+ result += f" Pattern: {rule['pattern']}\n"
105
+ result += f" Replace: {rule['replacement']}\n"
106
+ if rule['description']:
107
+ result += f" Desc: {rule['description']}\n"
108
+ result += "\n"
109
+
110
+ # Add private rule count if any
111
+ private_count = len(all_rules) - len(public_rules)
112
+ if private_count > 0:
113
+ result += f"({private_count} private rules hidden from view)\n"
114
+
115
+ return result
116
+
117
+ elif action == "add":
118
+ if not pattern or not replacement:
119
+ return "Error: 'add' action requires pattern and replacement"
120
+
121
+ success = manager.add_rule(
122
+ direction=rule_type,
123
+ pattern=pattern,
124
+ replacement=replacement,
125
+ name=name,
126
+ description=description or "",
127
+ enabled=True,
128
+ private=False # MCP-created rules are public
129
+ )
130
+
131
+ if success:
132
+ return f"✓ Rule added successfully for {rule_type.upper()}"
133
+ else:
134
+ return "✗ Failed to add rule. Check if the regex pattern is valid."
135
+
136
+ elif action == "remove":
137
+ if not name:
138
+ return "Error: 'remove' action requires rule name"
139
+
140
+ success = manager.remove_rule(rule_type, name)
141
+
142
+ if success:
143
+ return f"✓ Rule '{name}' removed from {rule_type.upper()}"
144
+ else:
145
+ return f"✗ Rule '{name}' not found in {rule_type.upper()} rules (may be private)"
146
+
147
+ elif action == "enable":
148
+ if not name:
149
+ return "Error: 'enable' action requires rule name"
150
+
151
+ success = manager.enable_rule(rule_type, name)
152
+
153
+ if success:
154
+ return f"✓ Rule '{name}' enabled in {rule_type.upper()}"
155
+ else:
156
+ return f"✗ Failed to enable rule '{name}' (not found or private)"
157
+
158
+ elif action == "disable":
159
+ if not name:
160
+ return "Error: 'disable' action requires rule name"
161
+
162
+ success = manager.disable_rule(rule_type, name)
163
+
164
+ if success:
165
+ return f"✓ Rule '{name}' disabled in {rule_type.upper()}"
166
+ else:
167
+ return f"✗ Failed to disable rule '{name}' (not found or private)"
168
+
169
+ elif action == "test":
170
+ if not test_text:
171
+ return "Error: 'test' action requires test_text"
172
+
173
+ result = manager.test_rule(test_text, rule_type)
174
+
175
+ if test_text != result:
176
+ return f"Original: {test_text}\nModified: {result}\n\nRules were applied to transform the text."
177
+ else:
178
+ return f"No changes: {test_text}\n\nNo rules matched or all rules are disabled."
179
+
180
+ elif action == "reload":
181
+ manager.reload_rules()
182
+
183
+ # Get counts
184
+ all_rules = manager.list_rules(include_private=True)
185
+ tts_count = len([r for r in all_rules if r['direction'] == 'tts'])
186
+ stt_count = len([r for r in all_rules if r['direction'] == 'stt'])
187
+
188
+ return f"✓ Pronunciation rules reloaded\nLoaded {tts_count} TTS rules and {stt_count} STT rules"
189
+
190
+ else:
191
+ return f"Error: Unknown action '{action}'. Use: list, add, remove, enable, disable, test, reload"
192
+
193
+
194
+ @mcp.tool()
195
+ async def pronounce_status() -> str:
196
+ """
197
+ Get the status of the pronunciation middleware.
198
+
199
+ Shows whether pronunciation processing is enabled and provides
200
+ statistics about loaded rules.
201
+
202
+ Returns:
203
+ Status information as a formatted string
204
+ """
205
+ enabled = is_enabled()
206
+ manager = get_manager()
207
+
208
+ # Get rule counts
209
+ all_rules = manager.list_rules(include_private=True)
210
+ public_rules = manager.list_rules(include_private=False)
211
+
212
+ tts_all = len([r for r in all_rules if r['direction'] == 'tts'])
213
+ tts_public = len([r for r in public_rules if r['direction'] == 'tts'])
214
+ tts_enabled = len([r for r in all_rules if r['direction'] == 'tts' and r['enabled']])
215
+
216
+ stt_all = len([r for r in all_rules if r['direction'] == 'stt'])
217
+ stt_public = len([r for r in public_rules if r['direction'] == 'stt'])
218
+ stt_enabled = len([r for r in all_rules if r['direction'] == 'stt' and r['enabled']])
219
+
220
+ status = f"Pronunciation Middleware Status:\n"
221
+ status += f"{'='*40}\n"
222
+ status += f"Enabled: {'✓ Yes' if enabled else '✗ No'}\n\n"
223
+
224
+ status += f"TTS Rules:\n"
225
+ status += f" Total: {tts_all} ({tts_public} public, {tts_all - tts_public} private)\n"
226
+ status += f" Enabled: {tts_enabled}\n\n"
227
+
228
+ status += f"STT Rules:\n"
229
+ status += f" Total: {stt_all} ({stt_public} public, {stt_all - stt_public} private)\n"
230
+ status += f" Enabled: {stt_enabled}\n\n"
231
+
232
+ status += f"Configuration:\n"
233
+ import os
234
+ log_enabled = os.environ.get('VOICEMODE_PRONUNCIATION_LOG_SUBSTITUTIONS', '').lower() == 'true'
235
+ private_mode = os.environ.get('VOICEMODE_PRONUNCIATION_PRIVATE_MODE', '').lower() == 'true'
236
+
237
+ status += f" Logging: {'✓ Enabled' if log_enabled else '✗ Disabled'}\n"
238
+ status += f" Private Mode: {'✓ All rules private' if private_mode else '✗ Normal'}\n"
239
+
240
+ # Show config file paths
241
+ status += f"\nConfiguration Files:\n"
242
+ for path in manager.config_paths:
243
+ status += f" - {path}\n"
244
+
245
+ return status
@@ -0,0 +1 @@
1
+ """Sound Fonts module for Voice Mode."""
@@ -0,0 +1,87 @@
1
+ """
2
+ Simple audio player for sound fonts.
3
+
4
+ Handles audio playback with features like volume control, start/end times,
5
+ and potential future support for URLs and looping.
6
+ """
7
+
8
+ import subprocess
9
+ from pathlib import Path
10
+ from typing import Optional
11
+ import sys
12
+
13
+
14
+ class Player:
15
+ """Simple audio player using ffplay."""
16
+
17
+ def play(
18
+ self,
19
+ file_path: str,
20
+ start: float = 0.0,
21
+ end: Optional[float] = None,
22
+ volume: float = 1.0
23
+ ) -> bool:
24
+ """
25
+ Play an audio file or slice of it.
26
+
27
+ Args:
28
+ file_path: Path to audio file (local path, future: URLs)
29
+ start: Start time in seconds
30
+ end: End time in seconds (None for end of file)
31
+ volume: Volume multiplier (0.0 to 1.0)
32
+
33
+ Returns:
34
+ True if playback started successfully, False otherwise
35
+ """
36
+ # Check if file exists (skip for URLs in future)
37
+ if not file_path.startswith(('http://', 'https://')):
38
+ path = Path(file_path)
39
+ if not path.exists():
40
+ if sys.stderr.isatty():
41
+ print(f"Error: Audio file not found: {file_path}", file=sys.stderr)
42
+ return False
43
+
44
+ # Build ffplay command for non-blocking audio playback
45
+ cmd = [
46
+ "ffplay",
47
+ "-nodisp", # No video display
48
+ "-autoexit", # Exit when playback ends
49
+ "-loglevel", "quiet", # Suppress output
50
+ ]
51
+
52
+ # Add start time if specified
53
+ if start > 0:
54
+ cmd.extend(["-ss", str(start)])
55
+
56
+ # Add duration if end time specified
57
+ if end is not None:
58
+ duration = end - start
59
+ if duration > 0:
60
+ cmd.extend(["-t", str(duration)])
61
+
62
+ # Add volume filter if not 1.0
63
+ if volume != 1.0:
64
+ # Clamp volume between 0 and 2 (200%)
65
+ volume = max(0.0, min(2.0, volume))
66
+ cmd.extend(["-af", f"volume={volume}"])
67
+
68
+ # Add the file path
69
+ cmd.append(file_path)
70
+
71
+ try:
72
+ # Run in background (non-blocking)
73
+ subprocess.Popen(
74
+ cmd,
75
+ stdout=subprocess.DEVNULL,
76
+ stderr=subprocess.DEVNULL
77
+ )
78
+ return True
79
+ except FileNotFoundError:
80
+ # ffplay not installed
81
+ if sys.stderr.isatty():
82
+ print("Error: ffplay not found. Please install ffmpeg.", file=sys.stderr)
83
+ return False
84
+ except Exception as e:
85
+ if sys.stderr.isatty():
86
+ print(f"Error playing audio: {e}", file=sys.stderr)
87
+ return False