voice-mode 2.28.1__tar.gz → 2.28.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {voice_mode-2.28.1 → voice_mode-2.28.3}/CHANGELOG.md +31 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/PKG-INFO +1 -1
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/__version__.py +1 -1
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/config.py +6 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/core.py +85 -9
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/converse.py +47 -8
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/service.py +10 -2
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/services/kokoro/install.py +9 -1
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/services/livekit/install.py +9 -1
- {voice_mode-2.28.1 → voice_mode-2.28.3}/.gitignore +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/README.md +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/build_hooks.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/pyproject.toml +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/__init__.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/__main__.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/cli.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/cli_commands/__init__.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/cli_commands/exchanges.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/conversation_logger.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/data/versions.json +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/exchanges/__init__.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/exchanges/conversations.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/exchanges/filters.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/exchanges/formatters.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/exchanges/models.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/exchanges/reader.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/exchanges/stats.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/frontend/README.md +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/frontend/app/api/connection-details/route.ts +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/frontend/app/favicon.ico +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/frontend/app/globals.css +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/frontend/app/layout.tsx +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/frontend/app/page.tsx +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/frontend/components/CloseIcon.tsx +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/frontend/components/NoAgentNotification.tsx +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/frontend/components/TranscriptionView.tsx +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/frontend/hooks/useCombinedTranscriptions.ts +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/frontend/hooks/useLocalMicTrack.ts +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/frontend/next-env.d.ts +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/frontend/next.config.mjs +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/frontend/package-lock.json +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/frontend/package.json +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/frontend/pnpm-lock.yaml +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/frontend/postcss.config.mjs +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/frontend/tailwind.config.ts +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/frontend/tsconfig.json +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/prompts/README.md +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/prompts/__init__.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/prompts/converse.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/prompts/release_notes.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/prompts/services.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/provider_discovery.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/providers.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/resources/__init__.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/resources/audio_files.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/resources/changelog.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/resources/configuration.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/resources/statistics.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/resources/version.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/resources/whisper_models.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/server.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/shared.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/simple_failover.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/statistics.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/streaming.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/templates/launchd/com.voicemode.frontend.plist +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/templates/launchd/com.voicemode.kokoro.plist +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/templates/launchd/com.voicemode.livekit.plist +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/templates/launchd/com.voicemode.whisper.plist +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/templates/launchd/start-kokoro-with-health-check.sh +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/templates/launchd/start-whisper-with-health-check.sh +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/templates/systemd/voicemode-frontend.service +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/templates/systemd/voicemode-kokoro.service +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/templates/systemd/voicemode-livekit.service +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/templates/systemd/voicemode-whisper.service +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/__init__.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/configuration_management.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/dependencies.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/devices.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/diagnostics.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/providers.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/services/kokoro/uninstall.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/services/list_versions.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/services/livekit/__init__.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/services/livekit/frontend.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/services/livekit/production_server.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/services/livekit/uninstall.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/services/version_info.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/services/whisper/__init__.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/services/whisper/install.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/services/whisper/list_models.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/services/whisper/model_active.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/services/whisper/model_benchmark.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/services/whisper/model_install.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/services/whisper/model_remove.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/services/whisper/models.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/services/whisper/uninstall.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/statistics.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/voice_registry.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/utils/__init__.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/utils/audio_diagnostics.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/utils/event_logger.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/utils/ffmpeg_check.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/utils/format_migration.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/utils/gpu_detection.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/utils/migration_helpers.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/utils/services/common.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/utils/services/kokoro_helpers.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/utils/services/livekit_helpers.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/utils/services/whisper_helpers.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/utils/services/whisper_version.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/utils/version_helpers.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/version.py +0 -0
- {voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/voice_preferences.py +0 -0
@@ -7,6 +7,37 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
9
|
|
10
|
+
## [2.28.3] - 2025-08-24
|
11
|
+
|
12
|
+
### Fixed
|
13
|
+
- **Parameter type handling for MCP tools**
|
14
|
+
- Fixed vad_aggressiveness parameter to accept string values from LLMs
|
15
|
+
- Fixed port parameters in kokoro_install and livekit_install
|
16
|
+
- Fixed lines parameter in service management tool
|
17
|
+
- All numeric parameters now properly convert strings to integers
|
18
|
+
- Addresses systemic issue where Claude Code MCP client passes strings
|
19
|
+
|
20
|
+
- **Installer script uvx command corrections**
|
21
|
+
- Fixed MCP configuration to use correct command `uvx voice-mode` (without --refresh)
|
22
|
+
- Installer now always refreshes to latest version at start
|
23
|
+
- Removed unnecessary --refresh flags from runtime commands
|
24
|
+
- Updated user-facing command examples to show correct usage
|
25
|
+
|
26
|
+
## [2.28.2] - 2025-08-24
|
27
|
+
|
28
|
+
### Added
|
29
|
+
- **Configurable audio feedback pip delays**
|
30
|
+
- Added VOICEMODE_PIP_LEADING_SILENCE and VOICEMODE_PIP_TRAILING_SILENCE environment variables
|
31
|
+
- Allows customization of silence before and after audio feedback chimes
|
32
|
+
- Configurable via converse tool parameters pip_leading_silence and pip_trailing_silence
|
33
|
+
- Helps prevent audio cutoff on Bluetooth devices and other audio systems with delay
|
34
|
+
|
35
|
+
### Fixed
|
36
|
+
- **Audio feedback for Bluetooth devices**
|
37
|
+
- Added silence buffer before chimes to prevent Bluetooth audio cutoff
|
38
|
+
- Improved compatibility with devices that have audio activation delay
|
39
|
+
- Better audio feedback experience across different output devices
|
40
|
+
|
10
41
|
## [2.28.1] - 2025-08-24
|
11
42
|
|
12
43
|
### Added
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: voice-mode
|
3
|
-
Version: 2.28.
|
3
|
+
Version: 2.28.3
|
4
4
|
Summary: VoiceMode - Voice interaction capabilities for AI assistants (formerly voice-mcp)
|
5
5
|
Project-URL: Homepage, https://github.com/mbailey/voicemode
|
6
6
|
Project-URL: Repository, https://github.com/mbailey/voicemode
|
@@ -291,6 +291,12 @@ INITIAL_SILENCE_GRACE_PERIOD = float(os.getenv("VOICEMODE_INITIAL_SILENCE_GRACE_
|
|
291
291
|
# Default listen duration for converse tool
|
292
292
|
DEFAULT_LISTEN_DURATION = float(os.getenv("VOICEMODE_DEFAULT_LISTEN_DURATION", "120.0")) # Default 120s listening time
|
293
293
|
|
294
|
+
# Audio feedback pip/chime configuration
|
295
|
+
# Leading silence before pips to allow Bluetooth devices to wake up
|
296
|
+
PIP_LEADING_SILENCE = float(os.getenv("VOICEMODE_PIP_LEADING_SILENCE", "0.1")) # Default 0.1s - minimal delay for Bluetooth
|
297
|
+
# Trailing silence after pips to prevent cutoff
|
298
|
+
PIP_TRAILING_SILENCE = float(os.getenv("VOICEMODE_PIP_TRAILING_SILENCE", "0.2")) # Default 0.2s - reduced for responsiveness
|
299
|
+
|
294
300
|
# Audio format configuration
|
295
301
|
AUDIO_FORMAT = os.getenv("VOICEMODE_AUDIO_FORMAT", "pcm").lower()
|
296
302
|
TTS_AUDIO_FORMAT = os.getenv("VOICEMODE_TTS_AUDIO_FORMAT", "pcm").lower() # Default to PCM for optimal streaming
|
@@ -395,8 +395,9 @@ async def text_to_speech(
|
|
395
395
|
if event_logger:
|
396
396
|
event_logger.log_event(event_logger.TTS_PLAYBACK_START)
|
397
397
|
|
398
|
-
# Add
|
399
|
-
|
398
|
+
# Add configurable silence at the beginning to prevent clipping
|
399
|
+
from .config import PIP_LEADING_SILENCE
|
400
|
+
silence_duration = PIP_LEADING_SILENCE # seconds
|
400
401
|
silence_samples = int(audio.frame_rate * silence_duration)
|
401
402
|
# Match the shape of the samples array exactly
|
402
403
|
if samples.ndim == 1:
|
@@ -507,13 +508,21 @@ async def text_to_speech(
|
|
507
508
|
return False, metrics
|
508
509
|
|
509
510
|
|
510
|
-
def generate_chime(
|
511
|
+
def generate_chime(
|
512
|
+
frequencies: list,
|
513
|
+
duration: float = 0.1,
|
514
|
+
sample_rate: int = SAMPLE_RATE,
|
515
|
+
leading_silence: Optional[float] = None,
|
516
|
+
trailing_silence: Optional[float] = None
|
517
|
+
) -> np.ndarray:
|
511
518
|
"""Generate a chime sound with given frequencies.
|
512
519
|
|
513
520
|
Args:
|
514
521
|
frequencies: List of frequencies to play in sequence
|
515
522
|
duration: Duration of each tone in seconds
|
516
523
|
sample_rate: Sample rate for audio generation
|
524
|
+
leading_silence: Optional override for leading silence duration (seconds)
|
525
|
+
trailing_silence: Optional override for trailing silence duration (seconds)
|
517
526
|
|
518
527
|
Returns:
|
519
528
|
Numpy array of audio samples
|
@@ -521,12 +530,30 @@ def generate_chime(frequencies: list, duration: float = 0.1, sample_rate: int =
|
|
521
530
|
samples_per_tone = int(sample_rate * duration)
|
522
531
|
fade_samples = int(sample_rate * 0.01) # 10ms fade
|
523
532
|
|
533
|
+
# Determine amplitude based on output device
|
534
|
+
amplitude = 0.0375 # Default (very quiet)
|
535
|
+
try:
|
536
|
+
import sounddevice as sd
|
537
|
+
default_output = sd.default.device[1]
|
538
|
+
if default_output is not None:
|
539
|
+
devices = sd.query_devices()
|
540
|
+
device_name = devices[default_output]['name'].lower()
|
541
|
+
# Check for Bluetooth devices (AirPods, Bluetooth headphones, etc)
|
542
|
+
if 'airpod' in device_name or 'bluetooth' in device_name or 'bt' in device_name:
|
543
|
+
amplitude = 0.15 # Higher amplitude for Bluetooth devices
|
544
|
+
logger.debug(f"Bluetooth device detected ({devices[default_output]['name']}), using amplitude {amplitude}")
|
545
|
+
else:
|
546
|
+
amplitude = 0.075 # Moderate amplitude for built-in speakers
|
547
|
+
logger.debug(f"Built-in speaker detected ({devices[default_output]['name']}), using amplitude {amplitude}")
|
548
|
+
except Exception as e:
|
549
|
+
logger.debug(f"Could not detect output device type: {e}, using default amplitude {amplitude}")
|
550
|
+
|
524
551
|
all_samples = []
|
525
552
|
|
526
553
|
for freq in frequencies:
|
527
554
|
# Generate sine wave
|
528
555
|
t = np.linspace(0, duration, samples_per_tone, False)
|
529
|
-
tone =
|
556
|
+
tone = amplitude * np.sin(2 * np.pi * freq * t)
|
530
557
|
|
531
558
|
# Apply fade in/out to prevent clicks
|
532
559
|
fade_in = np.linspace(0, 1, fade_samples)
|
@@ -540,21 +567,55 @@ def generate_chime(frequencies: list, duration: float = 0.1, sample_rate: int =
|
|
540
567
|
# Concatenate all tones
|
541
568
|
chime = np.concatenate(all_samples)
|
542
569
|
|
570
|
+
# Import config values if not overridden
|
571
|
+
from .config import PIP_LEADING_SILENCE, PIP_TRAILING_SILENCE
|
572
|
+
|
573
|
+
# Use parameter overrides or fall back to config
|
574
|
+
actual_leading_silence = leading_silence if leading_silence is not None else PIP_LEADING_SILENCE
|
575
|
+
actual_trailing_silence = trailing_silence if trailing_silence is not None else PIP_TRAILING_SILENCE
|
576
|
+
|
577
|
+
# Add leading silence for Bluetooth wake-up time
|
578
|
+
# This prevents the beginning of the chime from being cut off
|
579
|
+
silence_samples = int(sample_rate * actual_leading_silence)
|
580
|
+
silence = np.zeros(silence_samples)
|
581
|
+
|
582
|
+
# Add trailing silence to prevent end cutoff
|
583
|
+
trailing_silence_samples = int(sample_rate * actual_trailing_silence)
|
584
|
+
trailing_silence = np.zeros(trailing_silence_samples)
|
585
|
+
|
586
|
+
# Combine: leading silence + chime + trailing silence
|
587
|
+
chime_with_buffer = np.concatenate([silence, chime, trailing_silence])
|
588
|
+
|
543
589
|
# Convert to 16-bit integer
|
544
|
-
chime_int16 = (
|
590
|
+
chime_int16 = (chime_with_buffer * 32767).astype(np.int16)
|
545
591
|
|
546
592
|
return chime_int16
|
547
593
|
|
548
594
|
|
549
|
-
async def play_chime_start(
|
595
|
+
async def play_chime_start(
|
596
|
+
sample_rate: int = SAMPLE_RATE,
|
597
|
+
leading_silence: Optional[float] = None,
|
598
|
+
trailing_silence: Optional[float] = None
|
599
|
+
) -> bool:
|
550
600
|
"""Play the recording start chime (ascending tones).
|
551
601
|
|
602
|
+
Args:
|
603
|
+
sample_rate: Sample rate for audio
|
604
|
+
leading_silence: Optional override for leading silence duration (seconds)
|
605
|
+
trailing_silence: Optional override for trailing silence duration (seconds)
|
606
|
+
|
552
607
|
Returns:
|
553
608
|
True if chime played successfully, False otherwise
|
554
609
|
"""
|
555
610
|
try:
|
556
611
|
import sounddevice as sd
|
557
|
-
chime = generate_chime(
|
612
|
+
chime = generate_chime(
|
613
|
+
[800, 1000],
|
614
|
+
duration=0.1,
|
615
|
+
sample_rate=sample_rate,
|
616
|
+
leading_silence=leading_silence,
|
617
|
+
trailing_silence=trailing_silence
|
618
|
+
)
|
558
619
|
sd.play(chime, sample_rate)
|
559
620
|
sd.wait()
|
560
621
|
return True
|
@@ -563,15 +624,30 @@ async def play_chime_start(sample_rate: int = SAMPLE_RATE) -> bool:
|
|
563
624
|
return False
|
564
625
|
|
565
626
|
|
566
|
-
async def play_chime_end(
|
627
|
+
async def play_chime_end(
|
628
|
+
sample_rate: int = SAMPLE_RATE,
|
629
|
+
leading_silence: Optional[float] = None,
|
630
|
+
trailing_silence: Optional[float] = None
|
631
|
+
) -> bool:
|
567
632
|
"""Play the recording end chime (descending tones).
|
568
633
|
|
634
|
+
Args:
|
635
|
+
sample_rate: Sample rate for audio
|
636
|
+
leading_silence: Optional override for leading silence duration (seconds)
|
637
|
+
trailing_silence: Optional override for trailing silence duration (seconds)
|
638
|
+
|
569
639
|
Returns:
|
570
640
|
True if chime played successfully, False otherwise
|
571
641
|
"""
|
572
642
|
try:
|
573
643
|
import sounddevice as sd
|
574
|
-
chime = generate_chime(
|
644
|
+
chime = generate_chime(
|
645
|
+
[1000, 800],
|
646
|
+
duration=0.1,
|
647
|
+
sample_rate=sample_rate,
|
648
|
+
leading_silence=leading_silence,
|
649
|
+
trailing_silence=trailing_silence
|
650
|
+
)
|
575
651
|
sd.play(chime, sample_rate)
|
576
652
|
sd.wait()
|
577
653
|
return True
|
@@ -784,7 +784,9 @@ async def play_audio_feedback(
|
|
784
784
|
style: str = "whisper",
|
785
785
|
feedback_type: Optional[str] = None,
|
786
786
|
voice: str = "nova",
|
787
|
-
model: str = "gpt-4o-mini-tts"
|
787
|
+
model: str = "gpt-4o-mini-tts",
|
788
|
+
pip_leading_silence: Optional[float] = None,
|
789
|
+
pip_trailing_silence: Optional[float] = None
|
788
790
|
) -> None:
|
789
791
|
"""Play an audio feedback chime
|
790
792
|
|
@@ -796,6 +798,8 @@ async def play_audio_feedback(
|
|
796
798
|
feedback_type: Kept for compatibility, not used
|
797
799
|
voice: Kept for compatibility, not used
|
798
800
|
model: Kept for compatibility, not used
|
801
|
+
pip_leading_silence: Optional override for leading silence duration
|
802
|
+
pip_trailing_silence: Optional override for trailing silence duration
|
799
803
|
"""
|
800
804
|
# Use parameter override if provided, otherwise use global setting
|
801
805
|
if enabled is False:
|
@@ -810,11 +814,17 @@ async def play_audio_feedback(
|
|
810
814
|
return
|
811
815
|
|
812
816
|
try:
|
813
|
-
# Play appropriate chime
|
817
|
+
# Play appropriate chime with optional delay overrides
|
814
818
|
if text == "listening":
|
815
|
-
await play_chime_start(
|
819
|
+
await play_chime_start(
|
820
|
+
leading_silence=pip_leading_silence,
|
821
|
+
trailing_silence=pip_trailing_silence
|
822
|
+
)
|
816
823
|
elif text == "finished":
|
817
|
-
await play_chime_end(
|
824
|
+
await play_chime_end(
|
825
|
+
leading_silence=pip_leading_silence,
|
826
|
+
trailing_silence=pip_trailing_silence
|
827
|
+
)
|
818
828
|
except Exception as e:
|
819
829
|
logger.debug(f"Audio feedback failed: {e}")
|
820
830
|
# Don't interrupt the main flow if feedback fails
|
@@ -1313,8 +1323,10 @@ async def converse(
|
|
1313
1323
|
audio_format: Optional[str] = None,
|
1314
1324
|
disable_silence_detection: Union[bool, str] = False,
|
1315
1325
|
speed: Optional[float] = None,
|
1316
|
-
vad_aggressiveness: Optional[int] = None,
|
1317
|
-
skip_tts: Optional[Union[bool, str]] = None
|
1326
|
+
vad_aggressiveness: Optional[Union[int, str]] = None,
|
1327
|
+
skip_tts: Optional[Union[bool, str]] = None,
|
1328
|
+
pip_leading_silence: Optional[float] = None,
|
1329
|
+
pip_trailing_silence: Optional[float] = None
|
1318
1330
|
) -> str:
|
1319
1331
|
"""Have a voice conversation - speak a message and optionally listen for response.
|
1320
1332
|
|
@@ -1391,6 +1403,10 @@ async def converse(
|
|
1391
1403
|
When False: Always use TTS regardless of environment setting
|
1392
1404
|
When None: Follow VOICEMODE_SKIP_TTS environment variable
|
1393
1405
|
Useful for rapid development iterations or when voice isn't needed
|
1406
|
+
pip_leading_silence: Override leading silence before chimes (default: None uses VOICEMODE_PIP_LEADING_SILENCE env var)
|
1407
|
+
Time in seconds to add before the chime starts (e.g., 1.0 for Bluetooth devices)
|
1408
|
+
pip_trailing_silence: Override trailing silence after chimes (default: None uses VOICEMODE_PIP_TRAILING_SILENCE env var)
|
1409
|
+
Time in seconds to add after the chime ends (e.g., 0.5 to prevent cutoff)
|
1394
1410
|
If wait_for_response is False: Confirmation that message was spoken
|
1395
1411
|
If wait_for_response is True: The voice response received (or error/timeout message)
|
1396
1412
|
|
@@ -1479,6 +1495,15 @@ async def converse(
|
|
1479
1495
|
if skip_tts is not None and isinstance(skip_tts, str):
|
1480
1496
|
skip_tts = skip_tts.lower() in ('true', '1', 'yes', 'on')
|
1481
1497
|
|
1498
|
+
# Convert vad_aggressiveness to integer if provided as string
|
1499
|
+
if vad_aggressiveness is not None and isinstance(vad_aggressiveness, str):
|
1500
|
+
try:
|
1501
|
+
vad_aggressiveness = int(vad_aggressiveness)
|
1502
|
+
# Validation will happen later in the function
|
1503
|
+
except ValueError:
|
1504
|
+
logger.warning(f"Invalid VAD aggressiveness value '{vad_aggressiveness}', using default")
|
1505
|
+
vad_aggressiveness = None
|
1506
|
+
|
1482
1507
|
# Determine whether to skip TTS
|
1483
1508
|
if skip_tts is not None:
|
1484
1509
|
# Parameter explicitly set, use it
|
@@ -1797,7 +1822,14 @@ async def converse(
|
|
1797
1822
|
await asyncio.sleep(0.5)
|
1798
1823
|
|
1799
1824
|
# Play "listening" feedback sound
|
1800
|
-
await play_audio_feedback(
|
1825
|
+
await play_audio_feedback(
|
1826
|
+
"listening",
|
1827
|
+
openai_clients,
|
1828
|
+
audio_feedback,
|
1829
|
+
audio_feedback_style or "whisper",
|
1830
|
+
pip_leading_silence=pip_leading_silence,
|
1831
|
+
pip_trailing_silence=pip_trailing_silence
|
1832
|
+
)
|
1801
1833
|
|
1802
1834
|
# Record response
|
1803
1835
|
logger.info(f"🎤 Listening for {listen_duration} seconds...")
|
@@ -1821,7 +1853,14 @@ async def converse(
|
|
1821
1853
|
})
|
1822
1854
|
|
1823
1855
|
# Play "finished" feedback sound
|
1824
|
-
await play_audio_feedback(
|
1856
|
+
await play_audio_feedback(
|
1857
|
+
"finished",
|
1858
|
+
openai_clients,
|
1859
|
+
audio_feedback,
|
1860
|
+
audio_feedback_style or "whisper",
|
1861
|
+
pip_leading_silence=pip_leading_silence,
|
1862
|
+
pip_trailing_silence=pip_trailing_silence
|
1863
|
+
)
|
1825
1864
|
|
1826
1865
|
# Mark the end of recording - this is when user expects response to start
|
1827
1866
|
user_done_time = time.perf_counter()
|
@@ -8,7 +8,7 @@ import platform
|
|
8
8
|
import subprocess
|
9
9
|
import time
|
10
10
|
from pathlib import Path
|
11
|
-
from typing import Literal, Optional, Dict, Any
|
11
|
+
from typing import Literal, Optional, Dict, Any, Union
|
12
12
|
|
13
13
|
import psutil
|
14
14
|
|
@@ -980,7 +980,7 @@ async def view_logs(service_name: str, lines: Optional[int] = None) -> str:
|
|
980
980
|
async def service(
|
981
981
|
service_name: Literal["whisper", "kokoro", "livekit", "frontend"],
|
982
982
|
action: Literal["status", "start", "stop", "restart", "enable", "disable", "logs", "update-service-files"] = "status",
|
983
|
-
lines: Optional[int] = None
|
983
|
+
lines: Optional[Union[int, str]] = None
|
984
984
|
) -> str:
|
985
985
|
"""Unified service management tool for voice mode services.
|
986
986
|
|
@@ -1007,6 +1007,14 @@ async def service(
|
|
1007
1007
|
service("kokoro", "start") # Start Kokoro service
|
1008
1008
|
service("whisper", "logs", 100) # View last 100 lines of Whisper logs
|
1009
1009
|
"""
|
1010
|
+
# Convert lines to integer if provided as string
|
1011
|
+
if lines is not None and isinstance(lines, str):
|
1012
|
+
try:
|
1013
|
+
lines = int(lines)
|
1014
|
+
except ValueError:
|
1015
|
+
logger.warning(f"Invalid lines value '{lines}', using default 50")
|
1016
|
+
lines = 50
|
1017
|
+
|
1010
1018
|
# Route to appropriate handler
|
1011
1019
|
if action == "status":
|
1012
1020
|
return await status_service(service_name)
|
@@ -26,7 +26,7 @@ logger = logging.getLogger("voice-mode")
|
|
26
26
|
async def kokoro_install(
|
27
27
|
install_dir: Optional[str] = None,
|
28
28
|
models_dir: Optional[str] = None,
|
29
|
-
port: int = 8880,
|
29
|
+
port: Union[int, str] = 8880,
|
30
30
|
auto_start: Union[bool, str] = True,
|
31
31
|
install_models: Union[bool, str] = True,
|
32
32
|
force_reinstall: Union[bool, str] = False,
|
@@ -54,6 +54,14 @@ async def kokoro_install(
|
|
54
54
|
Installation status with service configuration details
|
55
55
|
"""
|
56
56
|
try:
|
57
|
+
# Convert port to integer if provided as string
|
58
|
+
if isinstance(port, str):
|
59
|
+
try:
|
60
|
+
port = int(port)
|
61
|
+
except ValueError:
|
62
|
+
logger.warning(f"Invalid port value '{port}', using default 8880")
|
63
|
+
port = 8880
|
64
|
+
|
57
65
|
# Check for and migrate old installations
|
58
66
|
migration_msg = auto_migrate_if_needed("kokoro")
|
59
67
|
|
@@ -124,7 +124,7 @@ room:
|
|
124
124
|
@mcp.tool()
|
125
125
|
async def livekit_install(
|
126
126
|
install_dir: Optional[str] = None,
|
127
|
-
port: int = 7880,
|
127
|
+
port: Union[int, str] = 7880,
|
128
128
|
force_reinstall: Union[bool, str] = False,
|
129
129
|
auto_enable: Optional[Union[bool, str]] = None,
|
130
130
|
version: str = "latest"
|
@@ -159,6 +159,14 @@ async def livekit_install(
|
|
159
159
|
# Check system
|
160
160
|
system = platform.system()
|
161
161
|
|
162
|
+
# Convert port to integer if provided as string
|
163
|
+
if isinstance(port, str):
|
164
|
+
try:
|
165
|
+
port = int(port)
|
166
|
+
except ValueError:
|
167
|
+
logger.warning(f"Invalid port value '{port}', using default 7880")
|
168
|
+
port = 7880
|
169
|
+
|
162
170
|
# Handle string boolean conversions
|
163
171
|
if isinstance(force_reinstall, str):
|
164
172
|
force_reinstall = force_reinstall.lower() in ("true", "1", "yes", "on")
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/frontend/app/api/connection-details/route.ts
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/frontend/components/NoAgentNotification.tsx
RENAMED
File without changes
|
{voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/frontend/components/TranscriptionView.tsx
RENAMED
File without changes
|
{voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/frontend/hooks/useCombinedTranscriptions.ts
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/templates/launchd/com.voicemode.frontend.plist
RENAMED
File without changes
|
{voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/templates/launchd/com.voicemode.kokoro.plist
RENAMED
File without changes
|
{voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/templates/launchd/com.voicemode.livekit.plist
RENAMED
File without changes
|
{voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/templates/launchd/com.voicemode.whisper.plist
RENAMED
File without changes
|
File without changes
|
File without changes
|
{voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/templates/systemd/voicemode-frontend.service
RENAMED
File without changes
|
{voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/templates/systemd/voicemode-kokoro.service
RENAMED
File without changes
|
{voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/templates/systemd/voicemode-livekit.service
RENAMED
File without changes
|
{voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/templates/systemd/voicemode-whisper.service
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/services/livekit/production_server.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{voice_mode-2.28.1 → voice_mode-2.28.3}/voice_mode/tools/services/whisper/model_benchmark.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|