PyPI - voice-mode - Versions diffs - 4.0.1__py3-none-any.whl → 4.2.0__py3-none-any.whl - Mend

voice-mode 4.0.1py3-none-any.whl → 4.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

voice_mode/tools/claude_thinking.py ADDED Viewed

@@ -0,0 +1,285 @@
+"""Claude Code message extraction tools for Think Out Loud mode and conversation analysis."""
+import json
+import logging
+import os
+from pathlib import Path
+from typing import Optional, List, Dict, Any
+from datetime import datetime
+from voice_mode.server import mcp
+from voice_mode.config import THINK_OUT_LOUD_ENABLED
+logger = logging.getLogger("voice-mode")
+def find_claude_log_file(working_dir: Optional[str] = None) -> Optional[Path]:
+    """Find the current Claude Code conversation log file.
+    Args:
+        working_dir: The working directory (defaults to CWD)
+    Returns:
+        Path to the most recent JSONL log file, or None if not found
+    """
+    if working_dir is None:
+        working_dir = os.getcwd()
+    logger.debug(f"Looking for Claude logs in working_dir: {working_dir}")
+    # Transform path: /Users/admin/Code/github.com/project → -Users-admin-Code-github-com-project
+    # Note: Both slashes and dots are replaced with hyphens
+    project_dir = working_dir.replace('/', '-').replace('.', '-')
+    logger.debug(f"Transformed project dir: {project_dir}")
+    # Build path to Claude logs
+    claude_base = Path.home() / '.claude' / 'projects'
+    log_dir = claude_base / project_dir
+    logger.debug(f"Claude log directory: {log_dir}")
+    if not log_dir.exists():
+        logger.warning(f"Claude log directory does not exist: {log_dir}")
+        return None
+    # Find most recent .jsonl file
+    log_files = sorted(
+        log_dir.glob('*.jsonl'),
+        key=lambda p: p.stat().st_mtime,
+        reverse=True
+    )
+    if log_files:
+        logger.info(f"Found {len(log_files)} Claude log files, using most recent: {log_files[0].name}")
+        return log_files[0]
+    else:
+        logger.warning(f"No .jsonl files found in {log_dir}")
+        return None
+def extract_messages_from_log(log_file: Path, last_n: int = 2, message_types: Optional[List[str]] = None) -> List[Dict[str, Any]]:
+    """Extract messages from Claude Code JSONL log.
+    Args:
+        log_file: Path to the JSONL log file
+        last_n: Number of most recent messages to return (default: 2)
+        message_types: Optional list of message types to filter ('user', 'assistant', 'system')
+                      If None, returns all message types
+    Returns:
+        List of messages with metadata
+    """
+    logger.debug(f"Extracting {last_n} messages from {log_file}, types={message_types}")
+    messages = []
+    try:
+        with open(log_file, 'r') as f:
+            lines = f.readlines()
+        logger.debug(f"Read {len(lines)} lines from log file")
+        # Process lines in reverse to get most recent first
+        for i, line in enumerate(reversed(lines)):
+            if not line.strip():
+                continue
+            try:
+                entry = json.loads(line)
+                entry_type = entry.get('type')
+                # Filter by message type if specified
+                if message_types and entry_type not in message_types:
+                    continue
+                # Extract user or assistant messages
+                if entry_type in ['user', 'assistant']:
+                    logger.debug(f"Found {entry_type} message at line {len(lines) - i}")
+                    message = entry.get('message', {})
+                    # Build message info
+                    message_info = {
+                        'type': entry_type,
+                        'role': message.get('role'),
+                        'content': message.get('content', []),
+                        'timestamp': entry.get('timestamp'),
+                        'uuid': entry.get('uuid'),
+                        'model': message.get('model') if entry_type == 'assistant' else None
+                    }
+                    # Add usage info for assistant messages
+                    if entry_type == 'assistant' and 'usage' in message:
+                        message_info['usage'] = message['usage']
+                    messages.append(message_info)
+                    if len(messages) >= last_n:
+                        logger.info(f"Extracted {len(messages)} messages successfully")
+                        return messages
+            except json.JSONDecodeError as e:
+                logger.debug(f"Skipping invalid JSON at line {len(lines) - i}: {e}")
+                continue
+    except Exception as e:
+        logger.error(f"Error reading log file {log_file}: {e}")
+    logger.info(f"Extracted {len(messages)} messages (requested {last_n})")
+    return messages
+def extract_thinking_from_messages(messages: List[Dict[str, Any]]) -> List[str]:
+    """Extract thinking content from a list of messages.
+    Args:
+        messages: List of message dictionaries
+    Returns:
+        List of thinking text strings
+    """
+    thinking_texts = []
+    for message in messages:
+        if message.get('type') == 'assistant':
+            content = message.get('content', [])
+            for item in content:
+                if isinstance(item, dict) and item.get('type') == 'thinking':
+                    text = item.get('text', '').strip()
+                    if text:
+                        thinking_texts.append(text)
+    return thinking_texts
+@mcp.tool
+def get_claude_messages(
+    last_n: int = 2,
+    working_dir: Optional[str] = None,
+    message_types: Optional[List[str]] = None,
+    format: str = "full"
+) -> str:
+    """Extract messages from Claude Code conversation logs.
+    This tool reads Claude Code's conversation logs to extract recent messages
+    for Think Out Loud mode and conversation analysis.
+    Args:
+        last_n: Number of most recent messages to return (default: 2)
+        working_dir: Working directory to find logs for (defaults to CWD)
+        message_types: Optional list to filter by type ('user', 'assistant').
+                       If None, returns all types.
+        format: Output format - 'full' (complete message), 'text' (just text content),
+                'thinking' (just thinking content)
+    Returns:
+        The extracted messages in the requested format
+    """
+    logger.debug(f"get_claude_messages called: last_n={last_n}, working_dir={working_dir}, types={message_types}, format={format}")
+    # Check if Think Out Loud mode is enabled
+    if not THINK_OUT_LOUD_ENABLED:
+        logger.warning("Think Out Loud mode is not enabled")
+        return "Think Out Loud mode is not enabled. Set VOICEMODE_THINK_OUT_LOUD=true to enable."
+    # Find the log file
+    log_file = find_claude_log_file(working_dir)
+    if not log_file:
+        return f"Could not find Claude Code logs for directory: {working_dir or os.getcwd()}"
+    # Extract messages
+    messages = extract_messages_from_log(log_file, last_n, message_types)
+    if not messages:
+        return f"No messages found in recent Claude Code logs."
+    # Format output based on requested format
+    if format == "thinking":
+        # Extract only thinking content
+        thinking_texts = extract_thinking_from_messages(messages)
+        if not thinking_texts:
+            return "No thinking content found in recent messages."
+        if len(thinking_texts) == 1:
+            return thinking_texts[0]
+        return "\n\n=== Next Thinking ===\n\n".join(thinking_texts)
+    elif format == "text":
+        # Extract just the text content
+        result = []
+        for msg in messages:
+            content_text = []
+            for item in msg.get('content', []):
+                if isinstance(item, dict):
+                    if item.get('type') == 'text':
+                        content_text.append(item.get('text', ''))
+                    elif item.get('type') == 'thinking':
+                        content_text.append(f"[Thinking: {item.get('text', '')}]")
+            if content_text:
+                result.append(f"{msg['type'].title()}: {' '.join(content_text)}")
+        return "\n\n".join(result)
+    else:  # format == "full"
+        # Return complete message structure
+        result = []
+        for i, msg in enumerate(messages, 1):
+            result.append(f"=== Message {i} ===")
+            result.append(f"Type: {msg['type']}")
+            result.append(f"Timestamp: {msg.get('timestamp', 'Unknown')}")
+            if msg.get('model'):
+                result.append(f"Model: {msg['model']}")
+            # Format content
+            content = msg.get('content', [])
+            if content:
+                result.append("Content:")
+                for item in content:
+                    if isinstance(item, dict):
+                        item_type = item.get('type', 'unknown')
+                        if item_type == 'text':
+                            result.append(f"  [Text]: {item.get('text', '')}")
+                        elif item_type == 'thinking':
+                            result.append(f"  [Thinking]: {item.get('text', '')}")
+                        elif item_type == 'tool_use':
+                            result.append(f"  [Tool Use]: {item.get('name', '')}")
+                        elif item_type == 'tool_result':
+                            result.append(f"  [Tool Result]: {item.get('content', '')[:100]}...")
+            result.append("")
+        return "\n".join(result).strip()
+@mcp.tool
+def check_claude_context() -> str:
+    """Check if running in Claude Code context.
+    Returns information about the Claude Code environment including:
+    - Whether Claude Code logs are accessible
+    - Current working directory
+    - Log file location if found
+    """
+    working_dir = os.getcwd()
+    log_file = find_claude_log_file(working_dir)
+    result = []
+    result.append(f"Working Directory: {working_dir}")
+    result.append(f"Claude Logs Found: {'Yes' if log_file else 'No'}")
+    if log_file:
+        result.append(f"Log File: {log_file}")
+        result.append(f"Log Size: {log_file.stat().st_size:,} bytes")
+        # Check for recent activity
+        mtime = datetime.fromtimestamp(log_file.stat().st_mtime)
+        now = datetime.now()
+        age = now - mtime
+        if age.total_seconds() < 60:
+            result.append(f"Last Updated: {int(age.total_seconds())} seconds ago")
+        elif age.total_seconds() < 3600:
+            result.append(f"Last Updated: {int(age.total_seconds() / 60)} minutes ago")
+        else:
+            result.append(f"Last Updated: {int(age.total_seconds() / 3600)} hours ago")
+    else:
+        project_dir = working_dir.replace('/', '-')
+        expected_path = Path.home() / '.claude' / 'projects' / project_dir
+        result.append(f"Expected Log Location: {expected_path}")
+        result.append("Note: Logs are only created when using Claude Code (claude.ai/code)")
+    return "\n".join(result)

voice_mode/tools/converse.py CHANGED Viewed

@@ -85,6 +85,7 @@ from voice_mode.utils import (
     log_tool_request_start,
     log_tool_request_end
 )
+from voice_mode.pronounce import get_manager as get_pronounce_manager, is_enabled as pronounce_enabled
 logger = logging.getLogger("voice-mode")
@@ -255,6 +256,11 @@ async def text_to_speech_with_failover(
     """
     from voice_mode.config import SIMPLE_FAILOVER
+    # Apply pronunciation rules if enabled
+    if pronounce_enabled():
+        pronounce_mgr = get_pronounce_manager()
+        message = pronounce_mgr.process_tts(message)
     # Use simple failover if enabled
     if SIMPLE_FAILOVER:
         from voice_mode.simple_failover import simple_tts_failover
@@ -695,6 +701,11 @@ async def _speech_to_text_internal(
             logger.debug(f"STT API response type: {type(transcription)}")
             text = transcription.strip() if isinstance(transcription, str) else transcription.text.strip()
+            # Apply pronunciation rules if enabled
+            if text and pronounce_enabled():
+                pronounce_mgr = get_pronounce_manager()
+                text = pronounce_mgr.process_stt(text)
             if text:
                 logger.info(f"✓ STT result: '{text}'")

voice_mode/tools/pronounce.py ADDED Viewed

@@ -0,0 +1,245 @@
+"""MCP tools for managing pronunciation rules."""
+import json
+import yaml
+from typing import Optional, Literal, List, Dict
+from voice_mode.server import mcp
+from voice_mode.pronounce import get_manager, is_enabled
+@mcp.tool()
+async def pronounce(
+    action: Literal["list", "add", "remove", "enable", "disable", "test", "reload"],
+    pattern: Optional[str] = None,
+    replacement: Optional[str] = None,
+    rule_type: Literal["tts", "stt"] = "tts",
+    description: Optional[str] = None,
+    name: Optional[str] = None,
+    test_text: Optional[str] = None
+) -> str:
+    """
+    Manage pronunciation rules for TTS/STT text processing.
+    This tool allows managing pronunciation rules that improve TTS pronunciation
+    and correct STT transcription errors. Rules are applied automatically when
+    text is processed.
+    Actions:
+    - list: Show all non-private rules (returns count of private rules)
+    - add: Add a new rule (requires pattern, replacement, rule_type)
+    - remove: Remove a rule by name (requires name, rule_type)
+    - enable: Enable a disabled rule (requires name, rule_type)
+    - disable: Disable an enabled rule (requires name, rule_type)
+    - test: Test rules on text (requires test_text, rule_type)
+    - reload: Reload rules from configuration files
+    Examples:
+    - List all TTS rules:
+      pronunciation_rules(action="list", rule_type="tts")
+    - Add a rule to pronounce "3M" correctly:
+      pronunciation_rules(
+          action="add",
+          pattern=r"\b3M\b",
+          replacement="three em",
+          rule_type="tts",
+          description="Pronounce 3M company name"
+      )
+    - Test how text would be pronounced:
+      pronunciation_rules(
+          action="test",
+          test_text="I work at 3M",
+          rule_type="tts"
+      )
+    - Correct common Whisper mishearing:
+      pronunciation_rules(
+          action="add",
+          pattern="me tool",
+          replacement="metool",
+          rule_type="stt",
+          description="Correct 'me tool' to 'metool'"
+      )
+    Args:
+        action: The action to perform
+        pattern: Regex pattern for add action
+        replacement: Replacement text for add action
+        rule_type: Type of rule (tts for text-to-speech, stt for speech-to-text)
+        description: Human-readable description for add action
+        name: Rule name for remove/enable/disable actions
+        test_text: Text to test for test action
+    Returns:
+        Result of the action as a formatted string
+    """
+    manager = get_manager()
+    if action == "list":
+        # List rules (excluding private ones)
+        all_rules = manager.list_rules(include_private=True)
+        public_rules = manager.list_rules(include_private=False)
+        # Filter by type if specified
+        if rule_type:
+            public_rules = [r for r in public_rules if r['direction'] == rule_type]
+            all_rules = [r for r in all_rules if r['direction'] == rule_type]
+        # Format the response
+        if not public_rules:
+            private_count = len(all_rules)
+            if private_count > 0:
+                return f"No public {rule_type} rules found. ({private_count} private rules hidden)"
+            else:
+                return f"No {rule_type} rules found."
+        # Build response
+        result = f"Pronunciation Rules ({rule_type.upper()}):\n\n"
+        for rule in public_rules:
+            status = "✓" if rule['enabled'] else "✗"
+            result += f"{status} {rule['name']}: \n"
+            result += f"  Pattern: {rule['pattern']}\n"
+            result += f"  Replace: {rule['replacement']}\n"
+            if rule['description']:
+                result += f"  Desc: {rule['description']}\n"
+            result += "\n"
+        # Add private rule count if any
+        private_count = len(all_rules) - len(public_rules)
+        if private_count > 0:
+            result += f"({private_count} private rules hidden from view)\n"
+        return result
+    elif action == "add":
+        if not pattern or not replacement:
+            return "Error: 'add' action requires pattern and replacement"
+        success = manager.add_rule(
+            direction=rule_type,
+            pattern=pattern,
+            replacement=replacement,
+            name=name,
+            description=description or "",
+            enabled=True,
+            private=False  # MCP-created rules are public
+        )
+        if success:
+            return f"✓ Rule added successfully for {rule_type.upper()}"
+        else:
+            return "✗ Failed to add rule. Check if the regex pattern is valid."
+    elif action == "remove":
+        if not name:
+            return "Error: 'remove' action requires rule name"
+        success = manager.remove_rule(rule_type, name)
+        if success:
+            return f"✓ Rule '{name}' removed from {rule_type.upper()}"
+        else:
+            return f"✗ Rule '{name}' not found in {rule_type.upper()} rules (may be private)"
+    elif action == "enable":
+        if not name:
+            return "Error: 'enable' action requires rule name"
+        success = manager.enable_rule(rule_type, name)
+        if success:
+            return f"✓ Rule '{name}' enabled in {rule_type.upper()}"
+        else:
+            return f"✗ Failed to enable rule '{name}' (not found or private)"
+    elif action == "disable":
+        if not name:
+            return "Error: 'disable' action requires rule name"
+        success = manager.disable_rule(rule_type, name)
+        if success:
+            return f"✓ Rule '{name}' disabled in {rule_type.upper()}"
+        else:
+            return f"✗ Failed to disable rule '{name}' (not found or private)"
+    elif action == "test":
+        if not test_text:
+            return "Error: 'test' action requires test_text"
+        result = manager.test_rule(test_text, rule_type)
+        if test_text != result:
+            return f"Original: {test_text}\nModified: {result}\n\nRules were applied to transform the text."
+        else:
+            return f"No changes: {test_text}\n\nNo rules matched or all rules are disabled."
+    elif action == "reload":
+        manager.reload_rules()
+        # Get counts
+        all_rules = manager.list_rules(include_private=True)
+        tts_count = len([r for r in all_rules if r['direction'] == 'tts'])
+        stt_count = len([r for r in all_rules if r['direction'] == 'stt'])
+        return f"✓ Pronunciation rules reloaded\nLoaded {tts_count} TTS rules and {stt_count} STT rules"
+    else:
+        return f"Error: Unknown action '{action}'. Use: list, add, remove, enable, disable, test, reload"
+@mcp.tool()
+async def pronounce_status() -> str:
+    """
+    Get the status of the pronunciation middleware.
+    Shows whether pronunciation processing is enabled and provides
+    statistics about loaded rules.
+    Returns:
+        Status information as a formatted string
+    """
+    enabled = is_enabled()
+    manager = get_manager()
+    # Get rule counts
+    all_rules = manager.list_rules(include_private=True)
+    public_rules = manager.list_rules(include_private=False)
+    tts_all = len([r for r in all_rules if r['direction'] == 'tts'])
+    tts_public = len([r for r in public_rules if r['direction'] == 'tts'])
+    tts_enabled = len([r for r in all_rules if r['direction'] == 'tts' and r['enabled']])
+    stt_all = len([r for r in all_rules if r['direction'] == 'stt'])
+    stt_public = len([r for r in public_rules if r['direction'] == 'stt'])
+    stt_enabled = len([r for r in all_rules if r['direction'] == 'stt' and r['enabled']])
+    status = f"Pronunciation Middleware Status:\n"
+    status += f"{'='*40}\n"
+    status += f"Enabled: {'✓ Yes' if enabled else '✗ No'}\n\n"
+    status += f"TTS Rules:\n"
+    status += f"  Total: {tts_all} ({tts_public} public, {tts_all - tts_public} private)\n"
+    status += f"  Enabled: {tts_enabled}\n\n"
+    status += f"STT Rules:\n"
+    status += f"  Total: {stt_all} ({stt_public} public, {stt_all - stt_public} private)\n"
+    status += f"  Enabled: {stt_enabled}\n\n"
+    status += f"Configuration:\n"
+    import os
+    log_enabled = os.environ.get('VOICEMODE_PRONUNCIATION_LOG_SUBSTITUTIONS', '').lower() == 'true'
+    private_mode = os.environ.get('VOICEMODE_PRONUNCIATION_PRIVATE_MODE', '').lower() == 'true'
+    status += f"  Logging: {'✓ Enabled' if log_enabled else '✗ Disabled'}\n"
+    status += f"  Private Mode: {'✓ All rules private' if private_mode else '✗ Normal'}\n"
+    # Show config file paths
+    status += f"\nConfiguration Files:\n"
+    for path in manager.config_paths:
+        status += f"  - {path}\n"
+    return status

voice_mode/tools/sound_fonts/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Sound Fonts module for Voice Mode."""

voice_mode/tools/sound_fonts/audio_player.py ADDED Viewed

@@ -0,0 +1,87 @@
+"""
+Simple audio player for sound fonts.
+Handles audio playback with features like volume control, start/end times,
+and potential future support for URLs and looping.
+"""
+import subprocess
+from pathlib import Path
+from typing import Optional
+import sys
+class Player:
+    """Simple audio player using ffplay."""
+    def play(
+        self,
+        file_path: str,
+        start: float = 0.0,
+        end: Optional[float] = None,
+        volume: float = 1.0
+    ) -> bool:
+        """
+        Play an audio file or slice of it.
+        Args:
+            file_path: Path to audio file (local path, future: URLs)
+            start: Start time in seconds
+            end: End time in seconds (None for end of file)
+            volume: Volume multiplier (0.0 to 1.0)
+        Returns:
+            True if playback started successfully, False otherwise
+        """
+        # Check if file exists (skip for URLs in future)
+        if not file_path.startswith(('http://', 'https://')):
+            path = Path(file_path)
+            if not path.exists():
+                if sys.stderr.isatty():
+                    print(f"Error: Audio file not found: {file_path}", file=sys.stderr)
+                return False
+        # Build ffplay command for non-blocking audio playback
+        cmd = [
+            "ffplay",
+            "-nodisp",      # No video display
+            "-autoexit",    # Exit when playback ends
+            "-loglevel", "quiet",  # Suppress output
+        ]
+        # Add start time if specified
+        if start > 0:
+            cmd.extend(["-ss", str(start)])
+        # Add duration if end time specified
+        if end is not None:
+            duration = end - start
+            if duration > 0:
+                cmd.extend(["-t", str(duration)])
+        # Add volume filter if not 1.0
+        if volume != 1.0:
+            # Clamp volume between 0 and 2 (200%)
+            volume = max(0.0, min(2.0, volume))
+            cmd.extend(["-af", f"volume={volume}"])
+        # Add the file path
+        cmd.append(file_path)
+        try:
+            # Run in background (non-blocking)
+            subprocess.Popen(
+                cmd,
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL
+            )
+            return True
+        except FileNotFoundError:
+            # ffplay not installed
+            if sys.stderr.isatty():
+                print("Error: ffplay not found. Please install ffmpeg.", file=sys.stderr)
+            return False
+        except Exception as e:
+            if sys.stderr.isatty():
+                print(f"Error playing audio: {e}", file=sys.stderr)
+            return False

voice-mode 4.0.1__py3-none-any.whl → 4.2.0__py3-none-any.whl

voice-mode 4.0.1py3-none-any.whl → 4.2.0py3-none-any.whl