PyPI - voice-mode - Versions diffs - 4.0.1__tar.gz → 4.2.0__tar.gz - Mend

voice-mode 4.0.1tar.gz → 4.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (134) hide show

{voice_mode-4.0.1 → voice_mode-4.2.0}/CHANGELOG.md RENAMED Viewed

@@ -7,6 +7,62 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+## [4.2.0] - 2025-09-03
+### Added
+- **🧠 Think Out Loud Mode - AI Reasoning Made Audible**
+  - Revolutionary feature that transforms AI's internal thinking into spoken performances
+  - Extracts and voices Claude's reasoning blocks using multiple personas
+  - Herman's Head / Inside Out style multi-voice performances for different reasoning types
+  - Theditor agent that orchestrates thinking performances with distinct voices
+  - Makes AI decision-making transparent and engaging through voice
+- **🔊 Sound Fonts Integration - Audio Feedback for Every Action**
+  - Play custom sounds for tool operations, errors, and completions
+  - Filesystem-based sound font system with automatic discovery
+  - Claude Code integration via stdin-receiver for hook-based audio
+  - CLI command `play-sound` with theme, action, and sound selection
+  - Enhances user experience with auditory feedback during operations
+- **🎭 Claude Code Deep Integration**
+  - Extract and analyze Claude's conversation logs in real-time
+  - Access Claude's internal thinking blocks for transparency
+  - CLI commands for message extraction with multiple output formats
+  - Automatic context detection for Claude Code sessions
+  - Foundation for advanced AI introspection features
+### Changed
+- **Enhanced Message Extraction**
+  - Generic and flexible extraction supporting full conversations
+  - Multiple output formats: full messages, text only, or thinking only
+  - Better filtering by message type (user/assistant)
+  - Improved integration with voice mode tools
+### Removed
+- **Redundant get_claude_thinking MCP tool**
+  - Consolidated into more powerful get_claude_messages tool
+### Documentation
+- **Comprehensive Think Out Loud Documentation**
+  - Agent specifications for theditor
+  - Claude orchestration instructions
+  - Voice persona mapping guide
+  - Integration patterns and examples
+## [4.1.0] - 2025-09-01
+### Added
+- **Pronunciation middleware for TTS/STT text processing**
+  - Configurable pronunciation rules system that processes text before TTS and after STT
+  - Regex-based text substitution rules with YAML configuration
+  - Separate TTS and STT rule sets for bidirectional corrections
+  - Privacy support - rules can be marked private to hide from LLM tool listings
+  - Default rules for common patterns (3M, PoE, GbE, etc.)
+  - Full CLI interface for managing pronunciation rules
+  - MCP tool for LLM-based rule management with `pronounce` tool
+  - Integrated into converse tool for automatic text processing
+  - New configuration file: `voice_mode/data/default_pronunciation.yaml`
 ## [4.0.1] - 2025-09-01
 ### Removed

{voice_mode-4.0.1 → voice_mode-4.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: voice-mode
-Version: 4.0.1
+Version: 4.2.0
 Summary: VoiceMode - Voice interaction capabilities for AI assistants (formerly voice-mcp)
 Project-URL: Homepage, https://github.com/mbailey/voicemode
 Project-URL: Repository, https://github.com/mbailey/voicemode

{voice_mode-4.0.1 → voice_mode-4.2.0}/voice_mode/__version__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 # This file is automatically updated by 'make release'
 # Do not edit manually
-__version__ = "4.0.1"
+__version__ = "4.2.0"

{voice_mode-4.0.1 → voice_mode-4.2.0}/voice_mode/cli.py RENAMED Viewed

@@ -1360,17 +1360,27 @@ def cli():
 # Import subcommand groups
 from voice_mode.cli_commands import exchanges as exchanges_cmd
 from voice_mode.cli_commands import transcribe as transcribe_cmd
+from voice_mode.cli_commands import pronounce_commands
+from voice_mode.cli_commands import claude
+from voice_mode.cli_commands import hook as hook_cmd
 # Add subcommands to legacy CLI
 cli.add_command(exchanges_cmd.exchanges)
 cli.add_command(transcribe_cmd.transcribe)
+cli.add_command(pronounce_commands.pronounce_group)
+cli.add_command(claude.claude_group)
 # Add exchanges to main CLI
 voice_mode_main_cli.add_command(exchanges_cmd.exchanges)
+voice_mode_main_cli.add_command(pronounce_commands.pronounce_group)
+voice_mode_main_cli.add_command(claude.claude_group)
 # Add transcribe to main CLI
 voice_mode_main_cli.add_command(transcribe_cmd.transcribe)
+# Add hook command to main CLI
+voice_mode_main_cli.add_command(hook_cmd.hook)
 # Converse command - direct voice conversation from CLI
 @voice_mode_main_cli.command()
@@ -1748,6 +1758,69 @@ def update(force):
                 click.echo("Try running: pip install --upgrade voice-mode")
+# Sound Fonts command
+@voice_mode_main_cli.command("play-sound")
+@click.help_option('-h', '--help')
+@click.option('-t', '--tool', help='Tool name for direct command-line usage')
+@click.option('-a', '--action', default='start', type=click.Choice(['start', 'end']), help='Action type')
+@click.option('-s', '--subagent', help='Subagent type (for Task tool)')
+def play_sound(tool, action, subagent):
+    """Play sound based on tool events (primarily for Claude Code hooks).
+    This command is designed to be called by Claude Code hooks to play sounds
+    when tools are used. It reads hook data from stdin by default, or can be
+    used directly with command-line options.
+    Examples:
+        echo '{"tool_name":"Task","tool_input":{"subagent_type":"mama-bear"}}' | voicemode play-sound
+        voicemode play-sound --tool Task --action start --subagent mama-bear
+    """
+    import sys
+    from .tools.sound_fonts.player import AudioPlayer
+    from .tools.sound_fonts.hook_handler import (
+        read_hook_data_from_stdin,
+        parse_claude_code_hook
+    )
+    # Try to read hook data from stdin first
+    hook_data = None
+    if not sys.stdin.isatty():
+        hook_data = read_hook_data_from_stdin()
+    if hook_data:
+        # Parse Claude Code hook format
+        parsed_data = parse_claude_code_hook(hook_data)
+        if not parsed_data:
+            sys.exit(1)
+        tool_name = parsed_data["tool_name"]
+        action_type = parsed_data["action"]
+        subagent_type = parsed_data["subagent_type"]
+        metadata = parsed_data["metadata"]
+    else:
+        # Use command-line arguments
+        if not tool:
+            click.echo("Error: --tool is required when not reading from stdin", err=True)
+            sys.exit(1)
+        tool_name = tool
+        action_type = action
+        subagent_type = subagent
+        metadata = {}
+    # Play the sound
+    player = AudioPlayer()
+    success = player.play_sound_for_event(
+        tool_name=tool_name,
+        action=action_type,
+        subagent_type=subagent_type,
+        metadata=metadata
+    )
+    # Silent exit for hooks - don't clutter Claude Code output
+    sys.exit(0 if success else 1)
 # Completions command
 @voice_mode_main_cli.command()
 @click.help_option('-h', '--help')

voice_mode-4.2.0/voice_mode/cli_commands/claude.py ADDED Viewed

@@ -0,0 +1,208 @@
+"""CLI commands for Claude Code message extraction."""
+import click
+import json
+import os
+import logging
+from pathlib import Path
+from typing import Optional, List
+from datetime import datetime
+from voice_mode.tools.claude_thinking import (
+    find_claude_log_file,
+    extract_messages_from_log,
+    extract_thinking_from_messages
+)
+logger = logging.getLogger("voice-mode")
+@click.group(name='claude')
+def claude_group():
+    """Extract messages from Claude Code conversation logs."""
+    pass
+@claude_group.command(name='messages')
+@click.option('--last-n', '-n', default=2, type=int,
+              help='Number of recent messages to extract (default: 2)')
+@click.option('--type', '-t', 'message_types', multiple=True,
+              type=click.Choice(['user', 'assistant']),
+              help='Filter by message type (can specify multiple)')
+@click.option('--format', '-f',
+              type=click.Choice(['full', 'text', 'thinking']),
+              default='full',
+              help='Output format: full (complete), text (text only), thinking (thinking only)')
+@click.option('--working-dir', '-d', type=click.Path(exists=True),
+              help='Working directory to find logs for (defaults to CWD)')
+@click.option('--output', '-o', type=click.Path(),
+              help='Save output to file')
+@click.option('--json', 'as_json', is_flag=True,
+              help='Output as JSON (overrides format)')
+def messages_command(last_n: int, message_types: tuple, format: str,
+                    working_dir: Optional[str], output: Optional[str],
+                    as_json: bool):
+    """
+    Extract recent messages from Claude Code logs.
+    Examples:
+        voicemode claude messages
+        voicemode claude messages -n 5 --format thinking
+        voicemode claude messages --type assistant --format text
+        voicemode claude messages --json -o messages.json
+    """
+    # Find log file
+    log_file = find_claude_log_file(working_dir)
+    if not log_file:
+        click.echo(f"Error: Could not find Claude Code logs for directory: {working_dir or os.getcwd()}", err=True)
+        return
+    # Extract messages
+    message_type_list = list(message_types) if message_types else None
+    messages = extract_messages_from_log(log_file, last_n, message_type_list)
+    if not messages:
+        click.echo("No messages found in recent logs", err=True)
+        return
+    # Format output
+    if as_json:
+        content = json.dumps(messages, indent=2)
+    elif format == 'thinking':
+        thinking_texts = extract_thinking_from_messages(messages)
+        if not thinking_texts:
+            click.echo("No thinking content found", err=True)
+            return
+        content = "\n\n=== Next Thinking ===\n\n".join(thinking_texts)
+    elif format == 'text':
+        result = []
+        for msg in messages:
+            content_text = []
+            content_items = msg.get('content', [])
+            logger.debug(f"Message has {len(content_items)} content items")
+            for item in content_items:
+                if isinstance(item, dict):
+                    item_type = item.get('type')
+                    logger.debug(f"Content item type: {item_type}")
+                    if item_type == 'text':
+                        text = item.get('text', '')
+                        if text:
+                            content_text.append(text)
+                    elif item_type == 'thinking':
+                        text = item.get('text', '')
+                        if text:
+                            content_text.append(f"[Thinking: {text}]")
+            if content_text:
+                result.append(f"{msg['type'].title()}: {' '.join(content_text)}")
+        content = "\n\n".join(result)
+    else:  # full format
+        # Format as human-readable
+        result = []
+        for i, msg in enumerate(messages, 1):
+            result.append(f"=== Message {i} ===")
+            result.append(f"Type: {msg['type']}")
+            result.append(f"Timestamp: {msg.get('timestamp', 'Unknown')}")
+            if msg.get('model'):
+                result.append(f"Model: {msg['model']}")
+            # Format content
+            content_items = msg.get('content', [])
+            if content_items:
+                result.append("Content:")
+                for item in content_items:
+                    if isinstance(item, dict):
+                        item_type = item.get('type', 'unknown')
+                        if item_type == 'text':
+                            result.append(f"  [Text]: {item.get('text', '')}")
+                        elif item_type == 'thinking':
+                            result.append(f"  [Thinking]: {item.get('text', '')}")
+                        elif item_type == 'tool_use':
+                            result.append(f"  [Tool Use]: {item.get('name', '')}")
+                        elif item_type == 'tool_result':
+                            result.append(f"  [Tool Result]: {item.get('content', '')[:100]}...")
+            result.append("")
+        content = "\n".join(result).strip()
+    # Output
+    if output:
+        Path(output).write_text(content)
+        click.echo(f"Output saved to {output}")
+    else:
+        # Debug: ensure content is printed
+        logger.debug(f"About to output {len(content)} characters")
+        click.echo(content)
+        logger.debug("Output complete")
+@claude_group.command(name='thinking')
+@click.option('--last-n', '-n', default=1, type=int,
+              help='Number of messages to search for thinking (default: 1)')
+@click.option('--working-dir', '-d', type=click.Path(exists=True),
+              help='Working directory to find logs for (defaults to CWD)')
+def thinking_command(last_n: int, working_dir: Optional[str]):
+    """
+    Extract only thinking content from Claude Code logs.
+    Convenience command equivalent to:
+    voicemode claude messages --format thinking --type assistant
+    Examples:
+        voicemode claude thinking
+        voicemode claude thinking -n 3
+    """
+    # Delegate to messages command with thinking format
+    ctx = click.get_current_context()
+    ctx.invoke(messages_command,
+               last_n=last_n,
+               message_types=('assistant',),
+               format='thinking',
+               working_dir=working_dir,
+               output=None,
+               as_json=False)
+@claude_group.command(name='check')
+def check_command():
+    """
+    Check if Claude Code context is available.
+    Shows information about the Claude Code environment including:
+    - Whether Claude Code logs are accessible
+    - Current working directory
+    - Log file location if found
+    - Last update time
+    Example:
+        voicemode claude check
+    """
+    working_dir = os.getcwd()
+    log_file = find_claude_log_file(working_dir)
+    click.echo(f"Working Directory: {working_dir}")
+    click.echo(f"Claude Logs Found: {'Yes' if log_file else 'No'}")
+    if log_file:
+        click.echo(f"Log File: {log_file}")
+        click.echo(f"Log Size: {log_file.stat().st_size:,} bytes")
+        mtime = datetime.fromtimestamp(log_file.stat().st_mtime)
+        now = datetime.now()
+        age = now - mtime
+        if age.total_seconds() < 60:
+            click.echo(f"Last Updated: {int(age.total_seconds())} seconds ago")
+        elif age.total_seconds() < 3600:
+            click.echo(f"Last Updated: {int(age.total_seconds() / 60)} minutes ago")
+        else:
+            click.echo(f"Last Updated: {int(age.total_seconds() / 3600)} hours ago")
+    else:
+        project_dir = working_dir.replace('/', '-')
+        expected_path = Path.home() / '.claude' / 'projects' / project_dir
+        click.echo(f"Expected Log Location: {expected_path}")
+        click.echo("Note: Logs are only created when using Claude Code (claude.ai/code)")

voice_mode-4.2.0/voice_mode/cli_commands/hook.py ADDED Viewed

@@ -0,0 +1,197 @@
+"""
+Hook commands for Voice Mode - primarily for Claude Code integration.
+"""
+import click
+import sys
+import json
+import os
+from pathlib import Path
+from typing import Optional
+@click.group()
+@click.help_option('-h', '--help', help='Show this message and exit')
+def hook():
+    """Manage Voice Mode hooks and event handlers."""
+    pass
+@hook.command('stdin-receiver')
+@click.option('--tool-name', help='Override tool name (for testing)')
+@click.option('--action', type=click.Choice(['start', 'end']), help='Override action (for testing)')
+@click.option('--subagent-type', help='Override subagent type (for testing)')
+@click.option('--event', type=click.Choice(['PreToolUse', 'PostToolUse']), help='Override event (for testing)')
+@click.option('--debug', is_flag=True, help='Enable debug output')
+def stdin_receiver(tool_name, action, subagent_type, event, debug):
+    """Receive and process hook events from Claude Code via stdin.
+    This command reads JSON from stdin when called by Claude Code hooks,
+    or accepts command-line arguments for testing.
+    The filesystem structure defines sound mappings:
+    ~/.voicemode/soundfonts/current/PreToolUse/task/subagent/baby-bear.wav
+    Examples:
+        # Called by Claude Code (reads JSON from stdin)
+        voicemode hook stdin-receiver
+        # Testing with defaults
+        voicemode hook stdin-receiver --debug
+        # Testing with specific values
+        voicemode hook stdin-receiver --tool-name Task --action start --subagent-type mama-bear
+    """
+    from voice_mode.tools.sound_fonts.audio_player import Player
+    # Try to read JSON from stdin if available
+    hook_data = {}
+    if not sys.stdin.isatty():
+        try:
+            hook_data = json.load(sys.stdin)
+            if debug:
+                print(f"[DEBUG] Received JSON: {json.dumps(hook_data, indent=2)}", file=sys.stderr)
+        except Exception as e:
+            if debug:
+                print(f"[DEBUG] Failed to parse JSON from stdin: {e}", file=sys.stderr)
+            # Silent fail for hooks
+            sys.exit(0)
+    # Extract values from JSON or use command-line overrides/defaults
+    if not tool_name:
+        tool_name = hook_data.get('tool_name', 'Task')
+    if not event:
+        event_name = hook_data.get('hook_event_name', 'PreToolUse')
+    else:
+        event_name = event
+    # Map event to action if not specified
+    if not action:
+        if event_name == 'PreToolUse':
+            action = 'start'
+        elif event_name == 'PostToolUse':
+            action = 'end'
+        else:
+            action = 'start'  # Default
+    # Get subagent_type from tool_input if not specified
+    if not subagent_type and tool_name == 'Task':
+        tool_input = hook_data.get('tool_input', {})
+        subagent_type = tool_input.get('subagent_type', 'baby-bear')
+    elif not subagent_type:
+        subagent_type = None
+    if debug:
+        print(f"[DEBUG] Processing: event={event_name}, tool={tool_name}, "
+              f"action={action}, subagent={subagent_type}", file=sys.stderr)
+    # Find sound file using filesystem conventions
+    sound_file = find_sound_file(event_name, tool_name, subagent_type)
+    if sound_file:
+        if debug:
+            print(f"[DEBUG] Found sound file: {sound_file}", file=sys.stderr)
+        # Play the sound
+        player = Player()
+        success = player.play(str(sound_file))
+        if debug:
+            if success:
+                print(f"[DEBUG] Sound played successfully", file=sys.stderr)
+            else:
+                print(f"[DEBUG] Failed to play sound", file=sys.stderr)
+    else:
+        if debug:
+            print(f"[DEBUG] No sound file found for this event", file=sys.stderr)
+    # Always exit 0 to not disrupt Claude Code
+    sys.exit(0)
+def find_sound_file(event: str, tool: str, subagent: Optional[str] = None) -> Optional[Path]:
+    """
+    Find sound file using filesystem conventions.
+    Tries paths in order:
+    1. Most specific: {event}/{tool}/subagent/{subagent}.wav (Task tool only)
+    2. Tool default: {event}/{tool}/default.wav
+    3. Event default: {event}/default.wav
+    4. Global fallback: fallback.wav
+    Args:
+        event: Event name (PreToolUse, PostToolUse)
+        tool: Tool name (lowercase)
+        subagent: Optional subagent type (lowercase)
+    Returns:
+        Path to sound file if found, None otherwise
+    """
+    # Get base path (follow symlink if exists)
+    base_path = Path.home() / '.voicemode' / 'soundfonts' / 'current'
+    # Resolve symlink if it exists
+    if base_path.is_symlink():
+        base_path = base_path.resolve()
+    if not base_path.exists():
+        return None
+    # Normalize names to lowercase for filesystem
+    event = event.lower() if event else 'pretooluse'
+    tool = tool.lower() if tool else 'default'
+    subagent = subagent.lower() if subagent else None
+    # Map event names to directory names
+    event_map = {
+        'pretooluse': 'PreToolUse',
+        'posttooluse': 'PostToolUse',
+        'start': 'PreToolUse',
+        'end': 'PostToolUse'
+    }
+    event_dir = event_map.get(event, event)
+    # Build list of paths to try (most specific to least specific)
+    paths_to_try = []
+    # 1. Most specific: subagent sound (Task tool only)
+    if tool == 'task' and subagent:
+        paths_to_try.append(base_path / event_dir / tool / 'subagent' / f'{subagent}.wav')
+    # 2. Tool-specific default
+    paths_to_try.append(base_path / event_dir / tool / 'default.wav')
+    # 3. Event-level default
+    paths_to_try.append(base_path / event_dir / 'default.wav')
+    # 4. Global fallback
+    paths_to_try.append(base_path / 'fallback.wav')
+    # Find first existing file
+    for path in paths_to_try:
+        if path.exists():
+            return path
+    return None
+# Keep the old receiver command for backwards compatibility (deprecated)
+@hook.command('receiver', hidden=True)
+@click.argument('tool_name')
+@click.argument('action', type=click.Choice(['start', 'end', 'complete']))
+@click.argument('subagent_type', required=False)
+@click.option('--debug', is_flag=True, help='Enable debug output')
+def receiver_deprecated(tool_name, action, subagent_type, debug):
+    """[DEPRECATED] Use stdin-receiver instead."""
+    # Map old action to event
+    event = 'PreToolUse' if action == 'start' else 'PostToolUse'
+    # Call the new command
+    ctx = click.get_current_context()
+    ctx.invoke(stdin_receiver,
+               tool_name=tool_name,
+               action=action if action != 'complete' else 'end',
+               subagent_type=subagent_type,
+               event=event,
+               debug=debug)

voice-mode 4.0.1__tar.gz → 4.2.0__tar.gz

voice-mode 4.0.1tar.gz → 4.2.0tar.gz