voice-mode 2.28.0__tar.gz → 2.28.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {voice_mode-2.28.0 → voice_mode-2.28.2}/CHANGELOG.md +25 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/PKG-INFO +1 -1
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/__version__.py +1 -1
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/config.py +6 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/core.py +85 -9
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/converse.py +37 -7
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/services/whisper/install.py +2 -1
- {voice_mode-2.28.0 → voice_mode-2.28.2}/.gitignore +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/README.md +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/build_hooks.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/pyproject.toml +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/__init__.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/__main__.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/cli.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/cli_commands/__init__.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/cli_commands/exchanges.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/conversation_logger.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/data/versions.json +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/exchanges/__init__.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/exchanges/conversations.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/exchanges/filters.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/exchanges/formatters.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/exchanges/models.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/exchanges/reader.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/exchanges/stats.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/frontend/README.md +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/frontend/app/api/connection-details/route.ts +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/frontend/app/favicon.ico +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/frontend/app/globals.css +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/frontend/app/layout.tsx +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/frontend/app/page.tsx +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/frontend/components/CloseIcon.tsx +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/frontend/components/NoAgentNotification.tsx +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/frontend/components/TranscriptionView.tsx +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/frontend/hooks/useCombinedTranscriptions.ts +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/frontend/hooks/useLocalMicTrack.ts +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/frontend/next-env.d.ts +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/frontend/next.config.mjs +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/frontend/package-lock.json +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/frontend/package.json +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/frontend/pnpm-lock.yaml +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/frontend/postcss.config.mjs +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/frontend/tailwind.config.ts +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/frontend/tsconfig.json +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/prompts/README.md +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/prompts/__init__.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/prompts/converse.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/prompts/release_notes.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/prompts/services.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/provider_discovery.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/providers.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/resources/__init__.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/resources/audio_files.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/resources/changelog.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/resources/configuration.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/resources/statistics.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/resources/version.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/resources/whisper_models.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/server.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/shared.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/simple_failover.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/statistics.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/streaming.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/templates/launchd/com.voicemode.frontend.plist +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/templates/launchd/com.voicemode.kokoro.plist +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/templates/launchd/com.voicemode.livekit.plist +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/templates/launchd/com.voicemode.whisper.plist +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/templates/launchd/start-kokoro-with-health-check.sh +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/templates/launchd/start-whisper-with-health-check.sh +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/templates/systemd/voicemode-frontend.service +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/templates/systemd/voicemode-kokoro.service +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/templates/systemd/voicemode-livekit.service +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/templates/systemd/voicemode-whisper.service +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/__init__.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/configuration_management.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/dependencies.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/devices.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/diagnostics.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/providers.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/service.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/services/kokoro/install.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/services/kokoro/uninstall.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/services/list_versions.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/services/livekit/__init__.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/services/livekit/frontend.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/services/livekit/install.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/services/livekit/production_server.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/services/livekit/uninstall.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/services/version_info.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/services/whisper/__init__.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/services/whisper/list_models.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/services/whisper/model_active.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/services/whisper/model_benchmark.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/services/whisper/model_install.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/services/whisper/model_remove.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/services/whisper/models.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/services/whisper/uninstall.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/statistics.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/voice_registry.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/utils/__init__.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/utils/audio_diagnostics.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/utils/event_logger.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/utils/ffmpeg_check.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/utils/format_migration.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/utils/gpu_detection.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/utils/migration_helpers.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/utils/services/common.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/utils/services/kokoro_helpers.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/utils/services/livekit_helpers.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/utils/services/whisper_helpers.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/utils/services/whisper_version.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/utils/version_helpers.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/version.py +0 -0
- {voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/voice_preferences.py +0 -0
@@ -7,6 +7,31 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
9
|
|
10
|
+
## [2.28.2] - 2025-08-24
|
11
|
+
|
12
|
+
### Fixed
|
13
|
+
- **Improved noise filtering in VAD (Voice Activity Detection)**
|
14
|
+
- Fixed issue where nose blowing and similar non-speech sounds were detected as speech
|
15
|
+
- Adjusted VAD aggressiveness handling to better filter continuous non-speech audio
|
16
|
+
- Prevents false positives from breathing sounds, sniffles, and ambient noise
|
17
|
+
- Improves overall speech detection accuracy
|
18
|
+
|
19
|
+
## [2.28.1] - 2025-08-24
|
20
|
+
|
21
|
+
### Added
|
22
|
+
- **Standardized project naming as VoiceMode MCP**
|
23
|
+
- Consistent branding across all documentation and code
|
24
|
+
- Updated project descriptions and metadata
|
25
|
+
- Renamed internal references from "voice-mode" to "VoiceMode MCP"
|
26
|
+
- Maintains backward compatibility with existing installations
|
27
|
+
|
28
|
+
### Fixed
|
29
|
+
- **CoreML fallback for whisper.cpp on Apple Silicon**
|
30
|
+
- Added proper error handling when CoreML models fail to load
|
31
|
+
- Automatically falls back to CPU processing if CoreML initialization fails
|
32
|
+
- Prevents whisper-server crashes on systems with CoreML issues
|
33
|
+
- Improves reliability on various macOS configurations
|
34
|
+
|
10
35
|
## [2.28.0] - 2025-08-23
|
11
36
|
|
12
37
|
### Added
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: voice-mode
|
3
|
-
Version: 2.28.
|
3
|
+
Version: 2.28.2
|
4
4
|
Summary: VoiceMode - Voice interaction capabilities for AI assistants (formerly voice-mcp)
|
5
5
|
Project-URL: Homepage, https://github.com/mbailey/voicemode
|
6
6
|
Project-URL: Repository, https://github.com/mbailey/voicemode
|
@@ -291,6 +291,12 @@ INITIAL_SILENCE_GRACE_PERIOD = float(os.getenv("VOICEMODE_INITIAL_SILENCE_GRACE_
|
|
291
291
|
# Default listen duration for converse tool
|
292
292
|
DEFAULT_LISTEN_DURATION = float(os.getenv("VOICEMODE_DEFAULT_LISTEN_DURATION", "120.0")) # Default 120s listening time
|
293
293
|
|
294
|
+
# Audio feedback pip/chime configuration
|
295
|
+
# Leading silence before pips to allow Bluetooth devices to wake up
|
296
|
+
PIP_LEADING_SILENCE = float(os.getenv("VOICEMODE_PIP_LEADING_SILENCE", "0.1")) # Default 0.1s - minimal delay for Bluetooth
|
297
|
+
# Trailing silence after pips to prevent cutoff
|
298
|
+
PIP_TRAILING_SILENCE = float(os.getenv("VOICEMODE_PIP_TRAILING_SILENCE", "0.2")) # Default 0.2s - reduced for responsiveness
|
299
|
+
|
294
300
|
# Audio format configuration
|
295
301
|
AUDIO_FORMAT = os.getenv("VOICEMODE_AUDIO_FORMAT", "pcm").lower()
|
296
302
|
TTS_AUDIO_FORMAT = os.getenv("VOICEMODE_TTS_AUDIO_FORMAT", "pcm").lower() # Default to PCM for optimal streaming
|
@@ -395,8 +395,9 @@ async def text_to_speech(
|
|
395
395
|
if event_logger:
|
396
396
|
event_logger.log_event(event_logger.TTS_PLAYBACK_START)
|
397
397
|
|
398
|
-
# Add
|
399
|
-
|
398
|
+
# Add configurable silence at the beginning to prevent clipping
|
399
|
+
from .config import PIP_LEADING_SILENCE
|
400
|
+
silence_duration = PIP_LEADING_SILENCE # seconds
|
400
401
|
silence_samples = int(audio.frame_rate * silence_duration)
|
401
402
|
# Match the shape of the samples array exactly
|
402
403
|
if samples.ndim == 1:
|
@@ -507,13 +508,21 @@ async def text_to_speech(
|
|
507
508
|
return False, metrics
|
508
509
|
|
509
510
|
|
510
|
-
def generate_chime(
|
511
|
+
def generate_chime(
|
512
|
+
frequencies: list,
|
513
|
+
duration: float = 0.1,
|
514
|
+
sample_rate: int = SAMPLE_RATE,
|
515
|
+
leading_silence: Optional[float] = None,
|
516
|
+
trailing_silence: Optional[float] = None
|
517
|
+
) -> np.ndarray:
|
511
518
|
"""Generate a chime sound with given frequencies.
|
512
519
|
|
513
520
|
Args:
|
514
521
|
frequencies: List of frequencies to play in sequence
|
515
522
|
duration: Duration of each tone in seconds
|
516
523
|
sample_rate: Sample rate for audio generation
|
524
|
+
leading_silence: Optional override for leading silence duration (seconds)
|
525
|
+
trailing_silence: Optional override for trailing silence duration (seconds)
|
517
526
|
|
518
527
|
Returns:
|
519
528
|
Numpy array of audio samples
|
@@ -521,12 +530,30 @@ def generate_chime(frequencies: list, duration: float = 0.1, sample_rate: int =
|
|
521
530
|
samples_per_tone = int(sample_rate * duration)
|
522
531
|
fade_samples = int(sample_rate * 0.01) # 10ms fade
|
523
532
|
|
533
|
+
# Determine amplitude based on output device
|
534
|
+
amplitude = 0.0375 # Default (very quiet)
|
535
|
+
try:
|
536
|
+
import sounddevice as sd
|
537
|
+
default_output = sd.default.device[1]
|
538
|
+
if default_output is not None:
|
539
|
+
devices = sd.query_devices()
|
540
|
+
device_name = devices[default_output]['name'].lower()
|
541
|
+
# Check for Bluetooth devices (AirPods, Bluetooth headphones, etc)
|
542
|
+
if 'airpod' in device_name or 'bluetooth' in device_name or 'bt' in device_name:
|
543
|
+
amplitude = 0.15 # Higher amplitude for Bluetooth devices
|
544
|
+
logger.debug(f"Bluetooth device detected ({devices[default_output]['name']}), using amplitude {amplitude}")
|
545
|
+
else:
|
546
|
+
amplitude = 0.075 # Moderate amplitude for built-in speakers
|
547
|
+
logger.debug(f"Built-in speaker detected ({devices[default_output]['name']}), using amplitude {amplitude}")
|
548
|
+
except Exception as e:
|
549
|
+
logger.debug(f"Could not detect output device type: {e}, using default amplitude {amplitude}")
|
550
|
+
|
524
551
|
all_samples = []
|
525
552
|
|
526
553
|
for freq in frequencies:
|
527
554
|
# Generate sine wave
|
528
555
|
t = np.linspace(0, duration, samples_per_tone, False)
|
529
|
-
tone =
|
556
|
+
tone = amplitude * np.sin(2 * np.pi * freq * t)
|
530
557
|
|
531
558
|
# Apply fade in/out to prevent clicks
|
532
559
|
fade_in = np.linspace(0, 1, fade_samples)
|
@@ -540,21 +567,55 @@ def generate_chime(frequencies: list, duration: float = 0.1, sample_rate: int =
|
|
540
567
|
# Concatenate all tones
|
541
568
|
chime = np.concatenate(all_samples)
|
542
569
|
|
570
|
+
# Import config values if not overridden
|
571
|
+
from .config import PIP_LEADING_SILENCE, PIP_TRAILING_SILENCE
|
572
|
+
|
573
|
+
# Use parameter overrides or fall back to config
|
574
|
+
actual_leading_silence = leading_silence if leading_silence is not None else PIP_LEADING_SILENCE
|
575
|
+
actual_trailing_silence = trailing_silence if trailing_silence is not None else PIP_TRAILING_SILENCE
|
576
|
+
|
577
|
+
# Add leading silence for Bluetooth wake-up time
|
578
|
+
# This prevents the beginning of the chime from being cut off
|
579
|
+
silence_samples = int(sample_rate * actual_leading_silence)
|
580
|
+
silence = np.zeros(silence_samples)
|
581
|
+
|
582
|
+
# Add trailing silence to prevent end cutoff
|
583
|
+
trailing_silence_samples = int(sample_rate * actual_trailing_silence)
|
584
|
+
trailing_silence = np.zeros(trailing_silence_samples)
|
585
|
+
|
586
|
+
# Combine: leading silence + chime + trailing silence
|
587
|
+
chime_with_buffer = np.concatenate([silence, chime, trailing_silence])
|
588
|
+
|
543
589
|
# Convert to 16-bit integer
|
544
|
-
chime_int16 = (
|
590
|
+
chime_int16 = (chime_with_buffer * 32767).astype(np.int16)
|
545
591
|
|
546
592
|
return chime_int16
|
547
593
|
|
548
594
|
|
549
|
-
async def play_chime_start(
|
595
|
+
async def play_chime_start(
|
596
|
+
sample_rate: int = SAMPLE_RATE,
|
597
|
+
leading_silence: Optional[float] = None,
|
598
|
+
trailing_silence: Optional[float] = None
|
599
|
+
) -> bool:
|
550
600
|
"""Play the recording start chime (ascending tones).
|
551
601
|
|
602
|
+
Args:
|
603
|
+
sample_rate: Sample rate for audio
|
604
|
+
leading_silence: Optional override for leading silence duration (seconds)
|
605
|
+
trailing_silence: Optional override for trailing silence duration (seconds)
|
606
|
+
|
552
607
|
Returns:
|
553
608
|
True if chime played successfully, False otherwise
|
554
609
|
"""
|
555
610
|
try:
|
556
611
|
import sounddevice as sd
|
557
|
-
chime = generate_chime(
|
612
|
+
chime = generate_chime(
|
613
|
+
[800, 1000],
|
614
|
+
duration=0.1,
|
615
|
+
sample_rate=sample_rate,
|
616
|
+
leading_silence=leading_silence,
|
617
|
+
trailing_silence=trailing_silence
|
618
|
+
)
|
558
619
|
sd.play(chime, sample_rate)
|
559
620
|
sd.wait()
|
560
621
|
return True
|
@@ -563,15 +624,30 @@ async def play_chime_start(sample_rate: int = SAMPLE_RATE) -> bool:
|
|
563
624
|
return False
|
564
625
|
|
565
626
|
|
566
|
-
async def play_chime_end(
|
627
|
+
async def play_chime_end(
|
628
|
+
sample_rate: int = SAMPLE_RATE,
|
629
|
+
leading_silence: Optional[float] = None,
|
630
|
+
trailing_silence: Optional[float] = None
|
631
|
+
) -> bool:
|
567
632
|
"""Play the recording end chime (descending tones).
|
568
633
|
|
634
|
+
Args:
|
635
|
+
sample_rate: Sample rate for audio
|
636
|
+
leading_silence: Optional override for leading silence duration (seconds)
|
637
|
+
trailing_silence: Optional override for trailing silence duration (seconds)
|
638
|
+
|
569
639
|
Returns:
|
570
640
|
True if chime played successfully, False otherwise
|
571
641
|
"""
|
572
642
|
try:
|
573
643
|
import sounddevice as sd
|
574
|
-
chime = generate_chime(
|
644
|
+
chime = generate_chime(
|
645
|
+
[1000, 800],
|
646
|
+
duration=0.1,
|
647
|
+
sample_rate=sample_rate,
|
648
|
+
leading_silence=leading_silence,
|
649
|
+
trailing_silence=trailing_silence
|
650
|
+
)
|
575
651
|
sd.play(chime, sample_rate)
|
576
652
|
sd.wait()
|
577
653
|
return True
|
@@ -784,7 +784,9 @@ async def play_audio_feedback(
|
|
784
784
|
style: str = "whisper",
|
785
785
|
feedback_type: Optional[str] = None,
|
786
786
|
voice: str = "nova",
|
787
|
-
model: str = "gpt-4o-mini-tts"
|
787
|
+
model: str = "gpt-4o-mini-tts",
|
788
|
+
pip_leading_silence: Optional[float] = None,
|
789
|
+
pip_trailing_silence: Optional[float] = None
|
788
790
|
) -> None:
|
789
791
|
"""Play an audio feedback chime
|
790
792
|
|
@@ -796,6 +798,8 @@ async def play_audio_feedback(
|
|
796
798
|
feedback_type: Kept for compatibility, not used
|
797
799
|
voice: Kept for compatibility, not used
|
798
800
|
model: Kept for compatibility, not used
|
801
|
+
pip_leading_silence: Optional override for leading silence duration
|
802
|
+
pip_trailing_silence: Optional override for trailing silence duration
|
799
803
|
"""
|
800
804
|
# Use parameter override if provided, otherwise use global setting
|
801
805
|
if enabled is False:
|
@@ -810,11 +814,17 @@ async def play_audio_feedback(
|
|
810
814
|
return
|
811
815
|
|
812
816
|
try:
|
813
|
-
# Play appropriate chime
|
817
|
+
# Play appropriate chime with optional delay overrides
|
814
818
|
if text == "listening":
|
815
|
-
await play_chime_start(
|
819
|
+
await play_chime_start(
|
820
|
+
leading_silence=pip_leading_silence,
|
821
|
+
trailing_silence=pip_trailing_silence
|
822
|
+
)
|
816
823
|
elif text == "finished":
|
817
|
-
await play_chime_end(
|
824
|
+
await play_chime_end(
|
825
|
+
leading_silence=pip_leading_silence,
|
826
|
+
trailing_silence=pip_trailing_silence
|
827
|
+
)
|
818
828
|
except Exception as e:
|
819
829
|
logger.debug(f"Audio feedback failed: {e}")
|
820
830
|
# Don't interrupt the main flow if feedback fails
|
@@ -1314,7 +1324,9 @@ async def converse(
|
|
1314
1324
|
disable_silence_detection: Union[bool, str] = False,
|
1315
1325
|
speed: Optional[float] = None,
|
1316
1326
|
vad_aggressiveness: Optional[int] = None,
|
1317
|
-
skip_tts: Optional[Union[bool, str]] = None
|
1327
|
+
skip_tts: Optional[Union[bool, str]] = None,
|
1328
|
+
pip_leading_silence: Optional[float] = None,
|
1329
|
+
pip_trailing_silence: Optional[float] = None
|
1318
1330
|
) -> str:
|
1319
1331
|
"""Have a voice conversation - speak a message and optionally listen for response.
|
1320
1332
|
|
@@ -1391,6 +1403,10 @@ async def converse(
|
|
1391
1403
|
When False: Always use TTS regardless of environment setting
|
1392
1404
|
When None: Follow VOICEMODE_SKIP_TTS environment variable
|
1393
1405
|
Useful for rapid development iterations or when voice isn't needed
|
1406
|
+
pip_leading_silence: Override leading silence before chimes (default: None uses VOICEMODE_PIP_LEADING_SILENCE env var)
|
1407
|
+
Time in seconds to add before the chime starts (e.g., 1.0 for Bluetooth devices)
|
1408
|
+
pip_trailing_silence: Override trailing silence after chimes (default: None uses VOICEMODE_PIP_TRAILING_SILENCE env var)
|
1409
|
+
Time in seconds to add after the chime ends (e.g., 0.5 to prevent cutoff)
|
1394
1410
|
If wait_for_response is False: Confirmation that message was spoken
|
1395
1411
|
If wait_for_response is True: The voice response received (or error/timeout message)
|
1396
1412
|
|
@@ -1797,7 +1813,14 @@ async def converse(
|
|
1797
1813
|
await asyncio.sleep(0.5)
|
1798
1814
|
|
1799
1815
|
# Play "listening" feedback sound
|
1800
|
-
await play_audio_feedback(
|
1816
|
+
await play_audio_feedback(
|
1817
|
+
"listening",
|
1818
|
+
openai_clients,
|
1819
|
+
audio_feedback,
|
1820
|
+
audio_feedback_style or "whisper",
|
1821
|
+
pip_leading_silence=pip_leading_silence,
|
1822
|
+
pip_trailing_silence=pip_trailing_silence
|
1823
|
+
)
|
1801
1824
|
|
1802
1825
|
# Record response
|
1803
1826
|
logger.info(f"🎤 Listening for {listen_duration} seconds...")
|
@@ -1821,7 +1844,14 @@ async def converse(
|
|
1821
1844
|
})
|
1822
1845
|
|
1823
1846
|
# Play "finished" feedback sound
|
1824
|
-
await play_audio_feedback(
|
1847
|
+
await play_audio_feedback(
|
1848
|
+
"finished",
|
1849
|
+
openai_clients,
|
1850
|
+
audio_feedback,
|
1851
|
+
audio_feedback_style or "whisper",
|
1852
|
+
pip_leading_silence=pip_leading_silence,
|
1853
|
+
pip_trailing_silence=pip_trailing_silence
|
1854
|
+
)
|
1825
1855
|
|
1826
1856
|
# Mark the end of recording - this is when user expects response to start
|
1827
1857
|
user_done_time = time.perf_counter()
|
@@ -217,7 +217,8 @@ async def whisper_install(
|
|
217
217
|
# On Apple Silicon, also enable Core ML for better performance
|
218
218
|
if platform.machine() == "arm64":
|
219
219
|
cmake_flags.append("-DWHISPER_COREML=ON")
|
220
|
-
|
220
|
+
cmake_flags.append("-DWHISPER_COREML_ALLOW_FALLBACK=ON")
|
221
|
+
logger.info("Enabling Core ML support with fallback for Apple Silicon")
|
221
222
|
elif is_linux and use_gpu:
|
222
223
|
cmake_flags.append("-DGGML_CUDA=ON")
|
223
224
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/frontend/app/api/connection-details/route.ts
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/frontend/components/NoAgentNotification.tsx
RENAMED
File without changes
|
{voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/frontend/components/TranscriptionView.tsx
RENAMED
File without changes
|
{voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/frontend/hooks/useCombinedTranscriptions.ts
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/templates/launchd/com.voicemode.frontend.plist
RENAMED
File without changes
|
{voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/templates/launchd/com.voicemode.kokoro.plist
RENAMED
File without changes
|
{voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/templates/launchd/com.voicemode.livekit.plist
RENAMED
File without changes
|
{voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/templates/launchd/com.voicemode.whisper.plist
RENAMED
File without changes
|
File without changes
|
File without changes
|
{voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/templates/systemd/voicemode-frontend.service
RENAMED
File without changes
|
{voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/templates/systemd/voicemode-kokoro.service
RENAMED
File without changes
|
{voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/templates/systemd/voicemode-livekit.service
RENAMED
File without changes
|
{voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/templates/systemd/voicemode-whisper.service
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/services/livekit/production_server.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{voice_mode-2.28.0 → voice_mode-2.28.2}/voice_mode/tools/services/whisper/model_benchmark.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|