PyPI - voice-mode - Versions diffs - 3.34.3__tar.gz → 4.1.0__tar.gz - Mend

voice-mode 3.34.3tar.gz → 4.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (128) hide show

{voice_mode-3.34.3 → voice_mode-4.1.0}/.gitignore RENAMED Viewed

@@ -117,3 +117,9 @@ models/
 *.mlpackage/
 *.mlmodel
 *.mlmodelc/
+# Coverage reports
+htmlcov/
+.coverage
+.coverage.*
+coverage.xml

{voice_mode-3.34.3 → voice_mode-4.1.0}/CHANGELOG.md RENAMED Viewed

@@ -7,6 +7,63 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+## [4.1.0] - 2025-09-01
+### Added
+- **Pronunciation middleware for TTS/STT text processing**
+  - Configurable pronunciation rules system that processes text before TTS and after STT
+  - Regex-based text substitution rules with YAML configuration
+  - Separate TTS and STT rule sets for bidirectional corrections
+  - Privacy support - rules can be marked private to hide from LLM tool listings
+  - Default rules for common patterns (3M, PoE, GbE, etc.)
+  - Full CLI interface for managing pronunciation rules
+  - MCP tool for LLM-based rule management with `pronounce` tool
+  - Integrated into converse tool for automatic text processing
+  - New configuration file: `voice_mode/data/default_pronunciation.yaml`
+## [4.0.1] - 2025-09-01
+### Removed
+- Removed `whisperx` optional dependency to fix PyPI upload compatibility
+  - The dependency was specified as a Git URL which is not allowed for PyPI packages
+  - WhisperX functionality was recently added and not essential for core features
+## [4.0.0] - 2025-08-31
+### BREAKING CHANGES
+- **Unified voice configuration system**
+  - **BREAKING**: Replaced `.voices.txt` files with unified `.voicemode.env` configuration
+  - Changed environment variable from `VOICEMODE_TTS_VOICES` to `VOICEMODE_VOICES` for simplicity
+  - Implemented cascading configuration: env vars > project configs > global config
+  - Added directory tree walking for project-specific configuration discovery
+  - Supports runtime configuration reloading via MCP tools
+  - **Migration Required**: Users must migrate from `.voices.txt` to `.voicemode.env` with `VOICEMODE_VOICES=voice1,voice2` format
+### Added
+- **Comprehensive test coverage reporting system**
+  - Integration with pytest-cov for coverage measurement
+  - HTML coverage reports generated in htmlcov/ directory
+  - Coverage badges and metrics for monitoring code quality
+  - Automated coverage reporting in CI/CD pipeline
+- **Word-level timestamps for transcription**
+  - Enhanced transcription output with word-level timing information
+  - Support for SubRip (SRT) format output with precise word timestamps
+  - New transcription CLI command for processing audio files
+  - Comprehensive transcription backend supporting multiple formats
+  - Word timing data available for improved accessibility and analysis
+- **Enhanced voice selection guide**
+  - Comprehensive documentation for voice selection across different providers
+  - Clear migration instructions from old `.voices.txt` system
+### Removed
+- **Legacy voice preference system**
+  - Removed 578 lines of old `voice_preferences.py` system
+  - Eliminated unreliable `.voices.txt` file parsing
+  - Removed associated test files for deprecated voice preference system
 ## [3.34.3] - 2025-08-26
 ### Changed

{voice_mode-3.34.3 → voice_mode-4.1.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: voice-mode
-Version: 3.34.3
+Version: 4.1.0
 Summary: VoiceMode - Voice interaction capabilities for AI assistants (formerly voice-mcp)
 Project-URL: Homepage, https://github.com/mbailey/voicemode
 Project-URL: Repository, https://github.com/mbailey/voicemode
@@ -66,9 +66,12 @@ Requires-Dist: pandas>=2.0.0; extra == 'notebooks'
 Provides-Extra: scripts
 Requires-Dist: flask>=3.0.0; extra == 'scripts'
 Provides-Extra: test
+Requires-Dist: coverage[toml]>=7.4.0; extra == 'test'
 Requires-Dist: pytest-asyncio>=0.21.0; extra == 'test'
-Requires-Dist: pytest-cov>=4.0.0; extra == 'test'
+Requires-Dist: pytest-cov>=4.1.0; extra == 'test'
 Requires-Dist: pytest-mock>=3.10.0; extra == 'test'
+Requires-Dist: pytest-timeout>=2.2.0; extra == 'test'
+Requires-Dist: pytest-xdist>=3.5.0; extra == 'test'
 Requires-Dist: pytest>=7.0.0; extra == 'test'
 Description-Content-Type: text/markdown

{voice_mode-3.34.3 → voice_mode-4.1.0}/pyproject.toml RENAMED Viewed

@@ -67,8 +67,11 @@ dev = [
 test = [
     "pytest>=7.0.0",
     "pytest-asyncio>=0.21.0",
-    "pytest-cov>=4.0.0",
+    "pytest-cov>=4.1.0",
     "pytest-mock>=3.10.0",
+    "pytest-xdist>=3.5.0",  # For parallel testing
+    "pytest-timeout>=2.2.0",  # For test timeouts
+    "coverage[toml]>=7.4.0",
 ]
 notebooks = [
     "gradio>=4.0.0",
@@ -144,10 +147,65 @@ exclude = [
 [tool.hatch.version]
 path = "voice_mode/__version__.py"
+[tool.hatch.metadata]
+allow-direct-references = true
 [tool.pytest.ini_options]
+minversion = "7.0"
 testpaths = ["tests"]
 python_files = "test_*.py"
 python_classes = "Test*"
 python_functions = "test_*"
-# Exclude manual test directory
-addopts = "--ignore=tests/manual"
+asyncio_mode = "auto"
+addopts = [
+    "-ra",
+    "--strict-markers",
+    "--strict-config",
+    "--ignore=tests/manual",
+    "--cov=voice_mode",
+    "--cov-branch",
+    "--cov-report=term-missing:skip-covered",
+    "--cov-report=html",
+    "--cov-report=xml",
+]
+markers = [
+    "unit: Unit tests (fast, isolated)",
+    "integration: Integration tests (may interact with services)",
+    "slow: Tests that take > 1s",
+    "manual: Manual tests requiring human interaction",
+]
+filterwarnings = [
+    "ignore::DeprecationWarning",
+]
+[tool.coverage.run]
+source = ["voice_mode"]
+branch = true
+parallel = true
+omit = [
+    "*/tests/*",
+    "*/test_*.py",
+    "*/__pycache__/*",
+    "*/site-packages/*",
+    "test-env/*",
+]
+[tool.coverage.report]
+exclude_lines = [
+    "pragma: no cover",
+    "def __repr__",
+    "if TYPE_CHECKING:",
+    "raise NotImplementedError",
+    "if __name__ == .__main__.:",
+    "@abstractmethod",
+    "except ImportError:",
+]
+precision = 2
+skip_covered = true
+show_missing = true
+[tool.coverage.html]
+directory = "htmlcov"
+[tool.coverage.xml]
+output = "coverage.xml"

{voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/__version__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 # This file is automatically updated by 'make release'
 # Do not edit manually
-__version__ = "3.34.3"
+__version__ = "4.1.0"

{voice_mode-3.34.3 → voice_mode-4.1.0}/voice_mode/cli.py RENAMED Viewed

@@ -1359,12 +1359,20 @@ def cli():
 # Import subcommand groups
 from voice_mode.cli_commands import exchanges as exchanges_cmd
+from voice_mode.cli_commands import transcribe as transcribe_cmd
+from voice_mode.cli_commands import pronounce_commands
 # Add subcommands to legacy CLI
 cli.add_command(exchanges_cmd.exchanges)
+cli.add_command(transcribe_cmd.transcribe)
+cli.add_command(pronounce_commands.pronounce_group)
 # Add exchanges to main CLI
 voice_mode_main_cli.add_command(exchanges_cmd.exchanges)
+voice_mode_main_cli.add_command(pronounce_commands.pronounce_group)
+# Add transcribe to main CLI
+voice_mode_main_cli.add_command(transcribe_cmd.transcribe)
 # Converse command - direct voice conversation from CLI

voice_mode-4.1.0/voice_mode/cli_commands/pronounce_commands.py ADDED Viewed

@@ -0,0 +1,223 @@
+"""CLI commands for managing pronunciation rules."""
+import click
+import yaml
+import json
+from pathlib import Path
+from typing import Optional
+from voice_mode.pronounce import get_manager
+@click.group(name='pronounce')
+def pronounce_group():
+    """Manage pronunciation rules for TTS and STT."""
+    pass
+@pronounce_group.command(name='list')
+@click.option('--direction', '-d', type=click.Choice(['tts', 'stt', 'all']), default='all',
+              help='Filter by direction (tts/stt/all)')
+@click.option('--enabled-only', '-e', is_flag=True, help='Show only enabled rules')
+@click.option('--show-private', '-p', is_flag=True, help='Include private rules')
+@click.option('--format', '-f', type=click.Choice(['table', 'yaml', 'json']), default='table',
+              help='Output format')
+def list_rules(direction: str, enabled_only: bool, show_private: bool, format: str):
+    """List pronunciation rules."""
+    manager = get_manager()
+    # Get rules
+    if direction == 'all':
+        rules = manager.list_rules(include_private=show_private)
+    else:
+        rules = manager.list_rules(direction=direction, include_private=show_private)
+    # Filter if needed
+    if enabled_only:
+        rules = [r for r in rules if r['enabled']]
+    # Format output
+    if format == 'table':
+        if not rules:
+            click.echo("No rules found.")
+            return
+        # Count private rules that were hidden
+        all_rules = manager.list_rules(include_private=True)
+        private_count = len(all_rules) - len(rules)
+        # Simple table format without tabulate
+        click.echo("\nPronunciation Rules:")
+        click.echo("=" * 80)
+        for rule in rules:
+            status = '✓' if rule['enabled'] else '✗'
+            click.echo(f"\n{status} [{rule['direction'].upper()}] {rule['name']} (order: {rule['order']})")
+            click.echo(f"  Pattern:  {rule['pattern']}")
+            click.echo(f"  Replace:  {rule['replacement']}")
+            if rule['description']:
+                click.echo(f"  Desc:     {rule['description']}")
+        if private_count > 0 and not show_private:
+            click.echo(f"\n({private_count} private rules hidden. Use --show-private to display)")
+    elif format == 'yaml':
+        import yaml
+        click.echo(yaml.dump(rules, default_flow_style=False))
+    elif format == 'json':
+        import json
+        click.echo(json.dumps(rules, indent=2))
+@pronounce_group.command(name='test')
+@click.argument('text')
+@click.option('--direction', '-d', type=click.Choice(['tts', 'stt']), default='tts',
+              help='Test direction (tts/stt)')
+def test_rule(text: str, direction: str):
+    """Test pronunciation rules on text."""
+    manager = get_manager()
+    result = manager.test_rule(text, direction)
+    if text != result:
+        click.echo(f"Original: {text}")
+        click.echo(f"Modified: {result}")
+        # Show which rules were applied if logging is enabled
+        import os
+        if os.environ.get('VOICEMODE_PRONUNCIATION_LOG_SUBSTITUTIONS', '').lower() == 'true':
+            click.echo("\n(Check logs for applied rules)")
+    else:
+        click.echo(f"No changes: {text}")
+@pronounce_group.command(name='add')
+@click.option('--direction', '-d', type=click.Choice(['tts', 'stt']), required=True,
+              help='Rule direction (tts/stt)')
+@click.option('--pattern', '-p', required=True, help='Regex pattern to match')
+@click.option('--replacement', '-r', required=True, help='Replacement text')
+@click.option('--name', '-n', help='Rule name (auto-generated if not provided)')
+@click.option('--description', help='Rule description')
+@click.option('--order', type=int, default=100, help='Processing order (lower = earlier)')
+@click.option('--disabled', is_flag=True, help='Create rule as disabled')
+def add_rule(direction: str, pattern: str, replacement: str, name: Optional[str],
+             description: str, order: int, disabled: bool):
+    """Add a new pronunciation rule."""
+    manager = get_manager()
+    success = manager.add_rule(
+        direction=direction,
+        pattern=pattern,
+        replacement=replacement,
+        name=name,
+        description=description or "",
+        enabled=not disabled,
+        order=order,
+        private=False  # CLI-created rules are not private
+    )
+    if success:
+        click.echo(f"✓ Rule added successfully")
+    else:
+        click.echo("✗ Failed to add rule (check pattern validity)", err=True)
+@pronounce_group.command(name='remove')
+@click.option('--direction', '-d', type=click.Choice(['tts', 'stt']), required=True,
+              help='Rule direction (tts/stt)')
+@click.argument('name')
+def remove_rule(direction: str, name: str):
+    """Remove a pronunciation rule by name."""
+    manager = get_manager()
+    success = manager.remove_rule(direction, name)
+    if success:
+        click.echo(f"✓ Rule '{name}' removed")
+    else:
+        click.echo(f"✗ Rule '{name}' not found", err=True)
+@pronounce_group.command(name='enable')
+@click.option('--direction', '-d', type=click.Choice(['tts', 'stt']), required=True,
+              help='Rule direction (tts/stt)')
+@click.argument('name')
+def enable_rule(direction: str, name: str):
+    """Enable a pronunciation rule."""
+    manager = get_manager()
+    success = manager.enable_rule(direction, name)
+    if success:
+        click.echo(f"✓ Rule '{name}' enabled")
+    else:
+        click.echo(f"✗ Failed to enable rule '{name}' (not found or private)", err=True)
+@pronounce_group.command(name='disable')
+@click.option('--direction', '-d', type=click.Choice(['tts', 'stt']), required=True,
+              help='Rule direction (tts/stt)')
+@click.argument('name')
+def disable_rule(direction: str, name: str):
+    """Disable a pronunciation rule."""
+    manager = get_manager()
+    success = manager.disable_rule(direction, name)
+    if success:
+        click.echo(f"✓ Rule '{name}' disabled")
+    else:
+        click.echo(f"✗ Failed to disable rule '{name}' (not found or private)", err=True)
+@pronounce_group.command(name='reload')
+def reload_rules():
+    """Reload pronunciation rules from configuration files."""
+    manager = get_manager()
+    manager.reload_rules()
+    click.echo("✓ Pronunciation rules reloaded")
+@pronounce_group.command(name='edit')
+@click.option('--system', is_flag=True, help='Edit system default rules (requires sudo)')
+def edit_config(system: bool):
+    """Open pronunciation config in editor."""
+    import os
+    import subprocess
+    if system:
+        # Edit system defaults
+        config_path = Path(__file__).parent.parent / 'data' / 'default_pronunciation.yaml'
+        if not config_path.exists():
+            click.echo(f"System config not found: {config_path}", err=True)
+            return
+        # Might need sudo
+        editor = os.environ.get('EDITOR', 'nano')
+        subprocess.run(['sudo', editor, str(config_path)])
+    else:
+        # Edit user config
+        config_path = Path.home() / '.voicemode' / 'config' / 'pronunciation.yaml'
+        if not config_path.exists():
+            # Create default config
+            config_path.parent.mkdir(parents=True, exist_ok=True)
+            default_config = {
+                'version': 1,
+                'tts_rules': [],
+                'stt_rules': []
+            }
+            with open(config_path, 'w') as f:
+                yaml.dump(default_config, f, default_flow_style=False)
+        editor = os.environ.get('EDITOR', 'nano')
+        subprocess.run([editor, str(config_path)])
+    # Reload after editing
+    manager = get_manager()
+    manager.reload_rules()
+    click.echo("✓ Configuration edited and reloaded")
+# Register the command group
+def register_commands(cli):
+    """Register pronunciation commands with the main CLI."""
+    cli.add_command(pronounce_group)

voice_mode-4.1.0/voice_mode/cli_commands/transcribe.py ADDED Viewed

@@ -0,0 +1,141 @@
+"""CLI command for audio transcription."""
+import click
+import json
+import asyncio
+from pathlib import Path
+from typing import Optional
+from voice_mode.tools.transcription import (
+    transcribe_audio,
+    TranscriptionBackend,
+    OutputFormat
+)
+@click.group()
+def transcribe():
+    """Audio transcription with word-level timestamps."""
+    pass
+@transcribe.command("audio")
+@click.argument('audio_file', type=click.Path(exists=True))
+@click.option('--words', is_flag=True, help='Include word-level timestamps')
+@click.option(
+    '--backend',
+    type=click.Choice(['openai', 'whisperx', 'whisper-cpp']),
+    default='openai',
+    help='Transcription backend to use'
+)
+@click.option(
+    '--format',
+    'output_format',
+    type=click.Choice(['json', 'srt', 'vtt', 'csv']),
+    default='json',
+    help='Output format for transcription'
+)
+@click.option('--output', '-o', type=click.Path(), help='Save transcription to file')
+@click.option('--language', help='Language code (e.g., en, es, fr)')
+@click.option('--model', default='whisper-1', help='Model to use (for OpenAI backend)')
+def audio_command(
+    audio_file: str,
+    words: bool,
+    backend: str,
+    output_format: str,
+    output: Optional[str],
+    language: Optional[str],
+    model: str
+):
+    """
+    Transcribe audio with optional word-level timestamps.
+    Examples:
+        voice-mode transcribe audio recording.mp3
+        voice-mode transcribe audio interview.wav --words
+        voice-mode transcribe audio podcast.mp3 --words --format srt -o subtitles.srt
+        voice-mode transcribe audio spanish.mp3 --language es --backend whisperx
+    """
+    async def run():
+        # Perform transcription
+        result = await transcribe_audio(
+            audio_file=audio_file,
+            word_timestamps=words,
+            backend=TranscriptionBackend(backend),
+            output_format=OutputFormat(output_format),
+            language=language,
+            model=model
+        )
+        # Check for errors
+        if not result.get("success", False):
+            error_msg = result.get("error", "Unknown error occurred")
+            click.echo(f"Error: {error_msg}", err=True)
+            return
+        # Format output
+        if output_format == 'json':
+            # Remove internal fields for cleaner output
+            output_result = {k: v for k, v in result.items()
+                           if k not in ['formatted_content']}
+            content = json.dumps(output_result, indent=2)
+        elif "formatted_content" in result:
+            content = result["formatted_content"]
+        else:
+            # Fallback to JSON if format conversion failed
+            content = json.dumps(result, indent=2)
+        # Write output
+        if output:
+            Path(output).write_text(content)
+            click.echo(f"Transcription saved to {output}")
+        else:
+            click.echo(content)
+    # Run async function
+    asyncio.run(run())
+# For backward compatibility, also provide a direct command
+@click.command('transcribe-audio')
+@click.argument('audio_file', type=click.Path(exists=True))
+@click.option('--words', is_flag=True, help='Include word-level timestamps')
+@click.option(
+    '--backend',
+    type=click.Choice(['openai', 'whisperx', 'whisper-cpp']),
+    default='openai',
+    help='Transcription backend'
+)
+@click.option(
+    '--format',
+    'output_format',
+    type=click.Choice(['json', 'srt', 'vtt', 'csv']),
+    default='json',
+    help='Output format'
+)
+@click.option('--output', '-o', type=click.Path(), help='Save to file')
+@click.option('--language', help='Language code')
+@click.option('--model', default='whisper-1', help='Model to use')
+def transcribe_audio_command(
+    audio_file: str,
+    words: bool,
+    backend: str,
+    output_format: str,
+    output: Optional[str],
+    language: Optional[str],
+    model: str
+):
+    """Direct transcription command for backward compatibility."""
+    audio_command.callback(
+        audio_file=audio_file,
+        words=words,
+        backend=backend,
+        output_format=output_format,
+        output=output,
+        language=language,
+        model=model
+    )

voice-mode 3.34.3__tar.gz → 4.1.0__tar.gz

voice-mode 3.34.3tar.gz → 4.1.0tar.gz