PyPI - voice-mode - Versions diffs - 2.22.3__tar.gz → 2.24.0__tar.gz - Mend

voice-mode 2.22.3tar.gz → 2.24.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (108) hide show

{voice_mode-2.22.3 → voice_mode-2.24.0}/CHANGELOG.md RENAMED Viewed

@@ -7,10 +7,38 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
-## [2.22.3] - 2025-08-16
+## [2.24.0] - 2025-08-16
+### Added
+- **VAD debugging mode** - Comprehensive debugging for Voice Activity Detection
+  - New `VOICEMODE_VAD_DEBUG` environment variable enables detailed VAD logging
+  - Shows real-time speech detection decisions, state transitions, and timing
+  - Helps diagnose issues where recording stops before speech or cuts off early
+  - Added test script `scripts/test-vad-enhancement.py` for VAD testing
+  - Documented in `docs/vad-debugging.md` with common issues and solutions
 ## [2.23.0] - 2025-08-16
+### Added
+- **`skip_tts` parameter** - Dynamic control over text-to-speech in converse tool
+  - Add optional `skip_tts` parameter to override global `VOICEMODE_SKIP_TTS` setting
+  - When `True`: Skip TTS for faster text-only responses
+  - When `False`: Always use TTS regardless of environment setting
+  - When `None` (default): Follow `VOICEMODE_SKIP_TTS` environment variable
+  - Enables LLM to intelligently choose between voice and text-only responses
+- **`VOICEMODE_SKIP_TTS` environment variable** - Global TTS skip configuration
+  - Set to `true` for permanent text-only mode (faster responses)
+  - Can be overridden per-call with `skip_tts` parameter
+  - Useful for rapid development iterations or when voice isn't needed
+### Fixed
+- **Service status detection** - Correctly identify SSH-forwarded vs locally running services
+  - SSH processes listening on service ports are now recognized as port forwards
+  - Status command now shows 🔄 for forwarded services vs ✅ for local services
+  - Prevents confusion about where services are actually running
+## [2.22.3] - 2025-08-16
 ### Fixed
 - **Service auto-enable error** - Fix 'FunctionTool' object is not callable
   - Changed whisper and kokoro installers to use `enable_service` function instead of MCP tool

{voice_mode-2.22.3 → voice_mode-2.24.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: voice-mode
-Version: 2.22.3
+Version: 2.24.0
 Summary: VoiceMode - Voice interaction capabilities for AI assistants (formerly voice-mcp)
 Project-URL: Homepage, https://github.com/mbailey/voicemode
 Project-URL: Repository, https://github.com/mbailey/voicemode

{voice_mode-2.22.3 → voice_mode-2.24.0}/voice_mode/__version__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 # This file is automatically updated by 'make release'
 # Do not edit manually
-__version__ = "2.22.3"
+__version__ = "2.24.0"

{voice_mode-2.22.3 → voice_mode-2.24.0}/voice_mode/config.py RENAMED Viewed

@@ -162,6 +162,7 @@ MODELS_DIR = Path(os.getenv("VOICEMODE_MODELS_DIR", str(BASE_DIR / "models")))
 # Debug configuration
 DEBUG = os.getenv("VOICEMODE_DEBUG", "").lower() in ("true", "1", "yes", "on")
 TRACE_DEBUG = os.getenv("VOICEMODE_DEBUG", "").lower() == "trace"
+VAD_DEBUG = os.getenv("VOICEMODE_VAD_DEBUG", "").lower() in ("true", "1", "yes", "on")
 DEBUG_DIR = LOGS_DIR / "debug"  # Debug files now go under logs
 # Master save-all configuration
@@ -175,6 +176,9 @@ SAVE_TRANSCRIPTIONS = SAVE_ALL or DEBUG or os.getenv("VOICEMODE_SAVE_TRANSCRIPTI
 # Audio feedback configuration
 AUDIO_FEEDBACK_ENABLED = os.getenv("VOICEMODE_AUDIO_FEEDBACK", "true").lower() in ("true", "1", "yes", "on")
+# Skip TTS configuration (skip text-to-speech for faster responses)
+SKIP_TTS = os.getenv("VOICEMODE_SKIP_TTS", "false").lower() in ("true", "1", "yes", "on")
 # Local provider preference configuration
 PREFER_LOCAL = os.getenv("VOICEMODE_PREFER_LOCAL", "true").lower() in ("true", "1", "yes", "on")

{voice_mode-2.22.3 → voice_mode-2.24.0}/voice_mode/tools/converse.py RENAMED Viewed

@@ -32,6 +32,7 @@ from voice_mode.config import (
     CHANNELS,
     DEBUG,
     DEBUG_DIR,
+    VAD_DEBUG,
     SAVE_AUDIO,
     AUDIO_DIR,
     OPENAI_API_KEY,
@@ -48,6 +49,7 @@ from voice_mode.config import (
     VAD_AGGRESSIVENESS,
     SILENCE_THRESHOLD_MS,
     MIN_RECORDING_DURATION,
+    SKIP_TTS,
     VAD_CHUNK_DURATION_MS,
     INITIAL_SILENCE_GRACE_PERIOD,
     DEFAULT_LISTEN_DURATION,
@@ -871,7 +873,7 @@ def record_audio(duration: float) -> np.ndarray:
             sys.stderr = original_stderr
-def record_audio_with_silence_detection(max_duration: float, disable_silence_detection: bool = False, min_duration: float = 0.0, vad_aggressiveness: Optional[int] = None) -> np.ndarray:
+def record_audio_with_silence_detection(max_duration: float, disable_silence_detection: bool = False, min_duration: float = 0.0, vad_aggressiveness: Optional[int] = None) -> Tuple[np.ndarray, bool]:
     """Record audio from microphone with automatic silence detection.
     Uses WebRTC VAD to detect when the user stops speaking and automatically
@@ -884,21 +886,25 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
         vad_aggressiveness: VAD aggressiveness level (0-3). If None, uses VAD_AGGRESSIVENESS from config
     Returns:
-        Numpy array of recorded audio samples
+        Tuple of (audio_data, speech_detected):
+            - audio_data: Numpy array of recorded audio samples
+            - speech_detected: Boolean indicating if speech was detected during recording
     """
     logger.info(f"record_audio_with_silence_detection called - VAD_AVAILABLE={VAD_AVAILABLE}, DISABLE_SILENCE_DETECTION={DISABLE_SILENCE_DETECTION}, min_duration={min_duration}")
     if not VAD_AVAILABLE:
         logger.warning("webrtcvad not available, falling back to fixed duration recording")
-        return record_audio(max_duration)
+        # For fallback, assume speech is present since we can't detect
+        return (record_audio(max_duration), True)
     if DISABLE_SILENCE_DETECTION or disable_silence_detection:
         if disable_silence_detection:
             logger.info("Silence detection disabled for this interaction by request")
         else:
             logger.info("Silence detection disabled globally via VOICEMODE_DISABLE_SILENCE_DETECTION")
-        return record_audio(max_duration)
+        # For fallback, assume speech is present since we can't detect
+        return (record_audio(max_duration), True)
     logger.info(f"🎤 Recording with silence detection (max {max_duration}s)...")
@@ -939,6 +945,16 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
                     f"Min duration: {MIN_RECORDING_DURATION}s, "
                     f"Initial grace period: {INITIAL_SILENCE_GRACE_PERIOD}s")
+        if VAD_DEBUG:
+            logger.info(f"[VAD_DEBUG] Starting VAD recording with config:")
+            logger.info(f"[VAD_DEBUG]   max_duration: {max_duration}s")
+            logger.info(f"[VAD_DEBUG]   min_duration: {min_duration}s")
+            logger.info(f"[VAD_DEBUG]   effective_min_duration: {max(MIN_RECORDING_DURATION, min_duration)}s")
+            logger.info(f"[VAD_DEBUG]   VAD aggressiveness: {effective_vad_aggressiveness}")
+            logger.info(f"[VAD_DEBUG]   Silence threshold: {SILENCE_THRESHOLD_MS}ms")
+            logger.info(f"[VAD_DEBUG]   Sample rate: {SAMPLE_RATE}Hz (VAD using {vad_sample_rate}Hz)")
+            logger.info(f"[VAD_DEBUG]   Chunk duration: {VAD_CHUNK_DURATION_MS}ms")
         def audio_callback(indata, frames, time, status):
             """Callback for continuous audio stream"""
             if status:
@@ -978,35 +994,53 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
                         # Check if chunk contains speech
                         try:
                             is_speech = vad.is_speech(chunk_bytes, vad_sample_rate)
+                            if VAD_DEBUG:
+                                # Log VAD decision every 500ms for less spam
+                                if int(recording_duration * 1000) % 500 == 0:
+                                    rms = np.sqrt(np.mean(chunk.astype(float)**2))
+                                    logger.info(f"[VAD_DEBUG] t={recording_duration:.1f}s: speech={is_speech}, RMS={rms:.0f}, state={'WAITING' if not speech_detected else 'ACTIVE'}")
                         except Exception as vad_e:
                             logger.warning(f"VAD error: {vad_e}, treating as speech")
                             is_speech = True
-                        if is_speech:
-                            if not speech_detected:
-                                logger.debug("Speech detected, recording...")
-                            speech_detected = True
-                            silence_duration_ms = 0
+                        # State machine for speech detection
+                        if not speech_detected:
+                            # WAITING_FOR_SPEECH state
+                            if is_speech:
+                                logger.info("🎤 Speech detected, starting active recording")
+                                if VAD_DEBUG:
+                                    logger.info(f"[VAD_DEBUG] STATE CHANGE: WAITING_FOR_SPEECH -> SPEECH_ACTIVE at t={recording_duration:.1f}s")
+                                speech_detected = True
+                                silence_duration_ms = 0
+                            # No timeout in this state - just keep waiting
+                            # The only exit is speech detection or max_duration
                         else:
-                            silence_duration_ms += VAD_CHUNK_DURATION_MS
-                            if speech_detected and silence_duration_ms % 200 == 0:  # Log every 200ms
-                                logger.debug(f"Silence: {silence_duration_ms}ms")
+                            # We have detected speech at some point
+                            if is_speech:
+                                # SPEECH_ACTIVE state - reset silence counter
+                                silence_duration_ms = 0
+                            else:
+                                # SILENCE_AFTER_SPEECH state - accumulate silence
+                                silence_duration_ms += VAD_CHUNK_DURATION_MS
+                                if VAD_DEBUG and silence_duration_ms % 100 == 0:  # More frequent logging in debug mode
+                                    logger.info(f"[VAD_DEBUG] Accumulating silence: {silence_duration_ms}/{SILENCE_THRESHOLD_MS}ms, t={recording_duration:.1f}s")
+                                elif silence_duration_ms % 200 == 0:  # Log every 200ms
+                                    logger.debug(f"Silence: {silence_duration_ms}ms")
+                                # Check if we should stop due to silence threshold
+                                # Use the larger of MIN_RECORDING_DURATION (global) or min_duration (parameter)
+                                effective_min_duration = max(MIN_RECORDING_DURATION, min_duration)
+                                if recording_duration >= effective_min_duration and silence_duration_ms >= SILENCE_THRESHOLD_MS:
+                                    logger.info(f"✓ Silence threshold reached after {recording_duration:.1f}s of recording")
+                                    if VAD_DEBUG:
+                                        logger.info(f"[VAD_DEBUG] STOP: silence_duration={silence_duration_ms}ms >= threshold={SILENCE_THRESHOLD_MS}ms")
+                                        logger.info(f"[VAD_DEBUG] STOP: recording_duration={recording_duration:.1f}s >= min_duration={effective_min_duration}s")
+                                    stop_recording = True
+                                elif VAD_DEBUG and recording_duration < effective_min_duration:
+                                    if int(recording_duration * 1000) % 500 == 0:  # Log every 500ms
+                                        logger.info(f"[VAD_DEBUG] Min duration not met: {recording_duration:.1f}s < {effective_min_duration}s")
                         recording_duration += chunk_duration_s
-                        # Check stop conditions
-                        # Use the larger of MIN_RECORDING_DURATION (global) or min_duration (parameter)
-                        effective_min_duration = max(MIN_RECORDING_DURATION, min_duration)
-                        if speech_detected and recording_duration >= effective_min_duration:
-                            if silence_duration_ms >= SILENCE_THRESHOLD_MS:
-                                logger.info(f"✓ Silence detected after {recording_duration:.1f}s (min: {effective_min_duration:.1f}s), stopping recording")
-                                stop_recording = True
-                        # Also stop if we haven't detected any speech after a grace period
-                        # Give user time to start speaking
-                        if not speech_detected and recording_duration >= INITIAL_SILENCE_GRACE_PERIOD:
-                            logger.info(f"No speech detected after {INITIAL_SILENCE_GRACE_PERIOD}s grace period, stopping recording")
-                            stop_recording = True
                     except queue.Empty:
                         # No audio data available, continue waiting
@@ -1018,17 +1052,26 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
             # Concatenate all chunks
             if chunks:
                 full_recording = np.concatenate(chunks)
-                logger.info(f"✓ Recorded {len(full_recording)} samples ({recording_duration:.1f}s)")
+                if not speech_detected:
+                    logger.info(f"✓ Recording completed ({recording_duration:.1f}s) - No speech detected")
+                    if VAD_DEBUG:
+                        logger.info(f"[VAD_DEBUG] FINAL STATE: No speech was ever detected during recording")
+                else:
+                    logger.info(f"✓ Recorded {len(full_recording)} samples ({recording_duration:.1f}s) with speech")
+                    if VAD_DEBUG:
+                        logger.info(f"[VAD_DEBUG] FINAL STATE: Speech was detected, recording complete")
                 if DEBUG:
                     # Calculate RMS for debug
                     rms = np.sqrt(np.mean(full_recording.astype(float) ** 2))
                     logger.debug(f"Recording stats - RMS: {rms:.2f}, Speech detected: {speech_detected}")
-                return full_recording
+                # Return tuple: (audio_data, speech_detected)
+                return (full_recording, speech_detected)
             else:
                 logger.warning("No audio chunks recorded")
-                return np.array([])
+                return (np.array([]), False)
         except Exception as e:
             logger.error(f"Recording with VAD failed: {e}")
@@ -1041,7 +1084,8 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
             logger.error(f"\n{help_message}")
             logger.info("Falling back to fixed duration recording")
-            return record_audio(max_duration)
+            # For fallback, assume speech is present since we can't detect
+            return (record_audio(max_duration), True)
         finally:
             # Restore stdio
@@ -1055,7 +1099,8 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
     except Exception as e:
         logger.error(f"VAD initialization failed: {e}")
         logger.info("Falling back to fixed duration recording")
-        return record_audio(max_duration)
+        # For fallback, assume speech is present since we can't detect
+        return (record_audio(max_duration), True)
 async def check_livekit_available() -> bool:
@@ -1248,7 +1293,8 @@ async def converse(
     audio_format: Optional[str] = None,
     disable_silence_detection: Union[bool, str] = False,
     speed: Optional[float] = None,
-    vad_aggressiveness: Optional[int] = None
+    vad_aggressiveness: Optional[int] = None,
+    skip_tts: Optional[Union[bool, str]] = None
 ) -> str:
     """Have a voice conversation - speak a message and optionally listen for response.
@@ -1320,6 +1366,11 @@ async def converse(
                             Use lower values (0-1) in quiet environments to catch all speech
                             Use higher values (2-3) in noisy environments to reduce false triggers
+        skip_tts: Skip text-to-speech and only show text (default: None uses VOICEMODE_SKIP_TTS env var)
+                  When True: Skip TTS for faster response, text-only output
+                  When False: Always use TTS regardless of environment setting
+                  When None: Follow VOICEMODE_SKIP_TTS environment variable
+                  Useful for rapid development iterations or when voice isn't needed
         If wait_for_response is False: Confirmation that message was spoken
         If wait_for_response is True: The voice response received (or error/timeout message)
@@ -1360,6 +1411,12 @@ async def converse(
         Remember: Lower values (0-1) = more permissive, may detect non-speech as speech
                  Higher values (2-3) = more strict, may miss soft speech or whispers
+    Skip TTS Examples:
+        - Fast iteration mode: converse("Processing your request", skip_tts=True)  # Text only, no voice
+        - Important announcement: converse("Warning: System will restart", skip_tts=False)  # Always use voice
+        - Quick confirmation: converse("Done!", skip_tts=True, wait_for_response=False)  # Fast text-only
+        - Follow user preference: converse("Hello")  # Uses VOICEMODE_SKIP_TTS setting
     """
     # Convert string booleans to actual booleans
     if isinstance(wait_for_response, str):
@@ -1368,6 +1425,16 @@ async def converse(
         disable_silence_detection = disable_silence_detection.lower() in ('true', '1', 'yes', 'on')
     if isinstance(audio_feedback, str):
         audio_feedback = audio_feedback.lower() in ('true', '1', 'yes', 'on')
+    if skip_tts is not None and isinstance(skip_tts, str):
+        skip_tts = skip_tts.lower() in ('true', '1', 'yes', 'on')
+    # Determine whether to skip TTS
+    if skip_tts is not None:
+        # Parameter explicitly set, use it
+        should_skip_tts = skip_tts
+    else:
+        # Use global setting
+        should_skip_tts = SKIP_TTS
     # Convert string speed to float
     if speed is not None and isinstance(speed, str):
@@ -1457,15 +1524,26 @@ async def converse(
         if not wait_for_response:
             try:
                 async with audio_operation_lock:
-                    success, tts_metrics, tts_config = await text_to_speech_with_failover(
-                        message=message,
-                        voice=voice,
-                        model=tts_model,
-                        instructions=tts_instructions,
-                        audio_format=audio_format,
-                        initial_provider=tts_provider,
-                        speed=speed
-                    )
+                    if should_skip_tts:
+                        # Skip TTS entirely
+                        success = True
+                        tts_metrics = {
+                            'ttfa': 0,
+                            'generation': 0,
+                            'playback': 0,
+                            'total': 0
+                        }
+                        tts_config = {'provider': 'no-op', 'voice': 'none'}
+                    else:
+                        success, tts_metrics, tts_config = await text_to_speech_with_failover(
+                            message=message,
+                            voice=voice,
+                            model=tts_model,
+                            instructions=tts_instructions,
+                            audio_format=audio_format,
+                            initial_provider=tts_provider,
+                            speed=speed
+                        )
                 # Include timing info if available
                 timing_info = ""
@@ -1589,15 +1667,26 @@ async def converse(
                 async with audio_operation_lock:
                     # Speak the message
                     tts_start = time.perf_counter()
-                    tts_success, tts_metrics, tts_config = await text_to_speech_with_failover(
-                        message=message,
-                        voice=voice,
-                        model=tts_model,
-                        instructions=tts_instructions,
-                        audio_format=audio_format,
-                        initial_provider=tts_provider,
-                        speed=speed
-                    )
+                    if should_skip_tts:
+                        # Skip TTS entirely for faster response
+                        tts_success = True
+                        tts_metrics = {
+                            'ttfa': 0,
+                            'generation': 0,
+                            'playback': 0,
+                            'total': 0
+                        }
+                        tts_config = {'provider': 'no-op', 'voice': 'none'}
+                    else:
+                        tts_success, tts_metrics, tts_config = await text_to_speech_with_failover(
+                            message=message,
+                            voice=voice,
+                            model=tts_model,
+                            instructions=tts_instructions,
+                            audio_format=audio_format,
+                            initial_provider=tts_provider,
+                            speed=speed
+                        )
                     # Add TTS sub-metrics
                     if tts_metrics:
@@ -1668,7 +1757,7 @@ async def converse(
                     record_start = time.perf_counter()
                     logger.debug(f"About to call record_audio_with_silence_detection with duration={listen_duration}, disable_silence_detection={disable_silence_detection}, min_duration={min_listen_duration}, vad_aggressiveness={vad_aggressiveness}")
-                    audio_data = await asyncio.get_event_loop().run_in_executor(
+                    audio_data, speech_detected = await asyncio.get_event_loop().run_in_executor(
                         None, record_audio_with_silence_detection, listen_duration, disable_silence_detection, min_listen_duration, vad_aggressiveness
                     )
                     timings['record'] = time.perf_counter() - record_start
@@ -1691,14 +1780,27 @@ async def converse(
                         result = "Error: Could not record audio"
                         return result
-                    # Convert to text
-                    # Log STT start
-                    if event_logger:
-                        event_logger.log_event(event_logger.STT_START)
-                    stt_start = time.perf_counter()
-                    response_text = await speech_to_text(audio_data, SAVE_AUDIO, AUDIO_DIR if SAVE_AUDIO else None, transport)
-                    timings['stt'] = time.perf_counter() - stt_start
+                    # Check if no speech was detected
+                    if not speech_detected:
+                        logger.info("No speech detected during recording - skipping STT processing")
+                        response_text = None
+                        timings['stt'] = 0.0
+                        # Still save the audio if configured
+                        if SAVE_AUDIO and AUDIO_DIR:
+                            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+                            audio_path = os.path.join(AUDIO_DIR, f"no_speech_{timestamp}.wav")
+                            write(audio_path, SAMPLE_RATE, audio_data)
+                            logger.debug(f"Saved no-speech audio to: {audio_path}")
+                    else:
+                        # Convert to text
+                        # Log STT start
+                        if event_logger:
+                            event_logger.log_event(event_logger.STT_START)
+                        stt_start = time.perf_counter()
+                        response_text = await speech_to_text(audio_data, SAVE_AUDIO, AUDIO_DIR if SAVE_AUDIO else None, transport)
+                        timings['stt'] = time.perf_counter() - stt_start
                     # Log STT complete
                     if event_logger:

{voice_mode-2.22.3 → voice_mode-2.24.0}/voice_mode/tools/service.py RENAMED Viewed

@@ -14,7 +14,7 @@ import psutil
 from voice_mode.server import mcp
 from voice_mode.config import WHISPER_PORT, KOKORO_PORT, LIVEKIT_PORT, SERVICE_AUTO_ENABLE
-from voice_mode.utils.services.common import find_process_by_port
+from voice_mode.utils.services.common import find_process_by_port, check_service_status
 from voice_mode.utils.services.whisper_helpers import find_whisper_server, find_whisper_model
 from voice_mode.utils.services.kokoro_helpers import find_kokoro_fastapi, has_gpu_support
@@ -195,10 +195,16 @@ async def status_service(service_name: str) -> str:
         port = LIVEKIT_PORT
     else:  # frontend
         port = 3000
-    proc = find_process_by_port(port)
-    if not proc:
-        return f"{service_name.capitalize()} is not running on port {port}"
+    status, proc = check_service_status(port)
+    if status == "not_available":
+        return f"❌ {service_name.capitalize()} is not available"
+    elif status == "forwarded":
+        return f"""🔄 {service_name.capitalize()} is available via port forwarding
+   Port: {port} (forwarded)
+   Local process: Not running
+   Remote: Accessible"""
     try:
         with proc.oneshot():
@@ -269,7 +275,7 @@ async def status_service(service_name: str) -> str:
         if extra_info_parts:
             extra_info = "\n   " + "\n   ".join(extra_info_parts)
-        return f"""✅ {service_name.capitalize()} is running
+        return f"""✅ {service_name.capitalize()} is running locally
    PID: {proc.pid}
    Port: {port}
    CPU: {cpu_percent:.1f}%

voice_mode-2.24.0/voice_mode/utils/services/common.py ADDED Viewed

@@ -0,0 +1,80 @@
+"""Common utilities for service management tools."""
+import psutil
+import socket
+from typing import Optional, Tuple
+import logging
+logger = logging.getLogger("voice-mode")
+def find_process_by_port(port: int) -> Optional[psutil.Process]:
+    """Find a process listening on the specified port.
+    Returns None if port is only accessible via SSH forwarding or other non-local means.
+    """
+    try:
+        for proc in psutil.process_iter(['pid', 'name']):
+            try:
+                # Skip if we can't access process info (might be another user's process)
+                if not proc.is_running():
+                    continue
+                # Skip SSH processes - these are port forwards, not actual services
+                proc_name = proc.name().lower()
+                if proc_name in ['ssh', 'sshd']:
+                    continue
+                for conn in proc.connections():
+                    if conn.laddr.port == port and conn.status == 'LISTEN':
+                        # Verify this is a real local process
+                        try:
+                            # Try to access basic process info to ensure it's real
+                            _ = proc.pid
+                            _ = proc.create_time()
+                            return proc
+                        except (psutil.NoSuchProcess, psutil.AccessDenied):
+                            # Process doesn't actually exist or we can't access it
+                            continue
+            except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
+                continue
+    except Exception as e:
+        logger.error(f"Error finding process by port: {e}")
+    return None
+def is_port_accessible(port: int, host: str = "127.0.0.1", timeout: float = 1.0) -> bool:
+    """Check if a port is accessible (can connect to it).
+    This will return True for both locally running services and SSH-forwarded ports.
+    """
+    try:
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+            sock.settimeout(timeout)
+            result = sock.connect_ex((host, port))
+            return result == 0
+    except Exception as e:
+        logger.error(f"Error checking port accessibility: {e}")
+        return False
+def check_service_status(port: int) -> Tuple[str, Optional[psutil.Process]]:
+    """Check the status of a service on a given port.
+    Returns:
+        Tuple of (status, process):
+        - ("local", process) if running locally
+        - ("forwarded", None) if accessible but not local
+        - ("not_available", None) if not accessible at all
+    """
+    # First check if there's a local process
+    proc = find_process_by_port(port)
+    if proc:
+        return ("local", proc)
+    # No local process, check if port is accessible (might be forwarded)
+    if is_port_accessible(port):
+        return ("forwarded", None)
+    # Not accessible at all
+    return ("not_available", None)

voice_mode-2.22.3/voice_mode/utils/services/common.py DELETED Viewed

@@ -1,22 +0,0 @@
-"""Common utilities for service management tools."""
-import psutil
-from typing import Optional
-import logging
-logger = logging.getLogger("voice-mode")
-def find_process_by_port(port: int) -> Optional[psutil.Process]:
-    """Find a process listening on the specified port."""
-    try:
-        for proc in psutil.process_iter(['pid', 'name']):
-            try:
-                for conn in proc.connections():
-                    if conn.laddr.port == port and conn.status == 'LISTEN':
-                        return proc
-            except (psutil.NoSuchProcess, psutil.AccessDenied):
-                continue
-    except Exception as e:
-        logger.error(f"Error finding process by port: {e}")
-    return None