PyPI - speechflow - Versions diffs - 0.1.0__py3-none-any.whl - Mend

speechflow 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

speechflow/__init__.py +24 -0
speechflow/audio/__init__.py +7 -0
speechflow/audio/player.py +211 -0
speechflow/audio/writer.py +162 -0
speechflow/core/__init__.py +10 -0
speechflow/core/base.py +69 -0
speechflow/core/exceptions.py +23 -0
speechflow/engines/__init__.py +7 -0
speechflow/engines/fishaudio.py +189 -0
speechflow/engines/fishspeech.py +72 -0
speechflow/engines/gemini.py +284 -0
speechflow/engines/kokoro.py +272 -0
speechflow/engines/openai.py +152 -0
speechflow/engines/stylebert.py +336 -0
speechflow-0.1.0.dist-info/METADATA +236 -0
speechflow-0.1.0.dist-info/RECORD +18 -0
speechflow-0.1.0.dist-info/WHEEL +4 -0
speechflow-0.1.0.dist-info/licenses/LICENSE +21 -0

speechflow/__init__.py ADDED Viewed

@@ -0,0 +1,24 @@
+from .audio import AudioPlayer, AudioWriter
+from .core import AudioData, AudioProcessingError, EngineNotFoundError, TTSEngineBase, TTSError
+from .engines import FishAudioTTSEngine, GeminiTTSEngine, KokoroTTSEngine, OpenAITTSEngine, StyleBertTTSEngine
+__version__ = "0.1.0"
+__all__ = [
+    # Core
+    "TTSEngineBase",
+    "AudioData",
+    # Exceptions
+    "TTSError",
+    "EngineNotFoundError",
+    "AudioProcessingError",
+    # Audio components
+    "AudioPlayer",
+    "AudioWriter",
+    # Engines
+    "FishAudioTTSEngine",
+    "GeminiTTSEngine",
+    "KokoroTTSEngine",
+    "OpenAITTSEngine",
+    "StyleBertTTSEngine",
+]

speechflow/audio/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+from .player import AudioPlayer
+from .writer import AudioWriter
+__all__ = [
+    "AudioPlayer",
+    "AudioWriter",
+]

speechflow/audio/player.py ADDED Viewed

@@ -0,0 +1,211 @@
+import queue
+import threading
+from typing import Iterator, Optional
+import numpy as np
+import pyaudio
+from ..core.base import AudioData
+from ..core.exceptions import AudioProcessingError
+class AudioPlayer:
+    """Audio player using PyAudio for both single audio and streaming playback."""
+    def __init__(self):
+        self.pyaudio = pyaudio.PyAudio()
+        self.stream: Optional[pyaudio.Stream] = None
+        self.current_sample_rate: Optional[int] = None
+        self.current_channels: Optional[int] = None
+        # For streaming playback
+        self.audio_queue = queue.Queue(maxsize=100)
+        self.stop_event = threading.Event()
+        self.playback_thread = None
+    def _ensure_stream(self, sample_rate: int, channels: int) -> None:
+        """Ensure stream is open with correct parameters."""
+        # Check if we need a new stream
+        if self.stream is None or self.current_sample_rate != sample_rate or self.current_channels != channels:
+            # Close existing stream if any
+            if self.stream is not None:
+                self.close_stream()
+            # Open new stream
+            self.stream = self.pyaudio.open(
+                format=pyaudio.paFloat32,
+                channels=channels,
+                rate=sample_rate,
+                output=True,
+                frames_per_buffer=2048,  # Balanced buffer for smooth playback
+            )
+            self.current_sample_rate = sample_rate
+            self.current_channels = channels
+    def play(self, audio: AudioData) -> AudioData:
+        """Play audio data (blocking).
+        Args:
+            audio: AudioData to play
+        """
+        try:
+            self._ensure_stream(audio.sample_rate, audio.channels)
+            # Ensure audio data is in the correct format
+            if audio.data.dtype != np.float32:
+                audio_data = audio.data.astype(np.float32)
+            else:
+                audio_data = audio.data
+            # Play audio (blocking)
+            assert self.stream is not None, "Stream must be initialized before playing"
+            self.stream.write(audio_data.tobytes())
+            # Return the original audio data
+            return audio
+        except Exception as e:
+            raise AudioProcessingError(f"Failed to play audio: {str(e)}")
+    def play_stream(self, audio_stream: Iterator[AudioData]) -> AudioData:
+        """Play audio from a stream of AudioData chunks.
+        This method starts playback immediately when the first chunk arrives
+        and continues playing subsequent chunks seamlessly.
+        Args:
+            audio_stream: Iterator yielding AudioData chunks
+        Returns:
+            AudioData: Combined audio data from all chunks
+        """
+        # Start playback thread
+        self.stop_event.clear()
+        self.playback_thread = threading.Thread(target=self._playback_worker)
+        self.playback_thread.start()
+        # Collect all chunks for return value
+        all_chunks = []
+        sample_rate = None
+        channels = None
+        format = None
+        try:
+            # Feed chunks to the queue
+            for chunk in audio_stream:
+                if self.stop_event.is_set():
+                    break
+                # Initialize stream with first chunk's parameters
+                if self.stream is None:
+                    self._ensure_stream(chunk.sample_rate, chunk.channels)
+                # Set audio parameters from first valid chunk
+                if sample_rate is None and chunk.sample_rate is not None:
+                    sample_rate = chunk.sample_rate
+                if channels is None and chunk.channels is not None:
+                    channels = chunk.channels
+                if format is None and chunk.format is not None:
+                    format = chunk.format
+                # Ensure audio data is in the correct format
+                if chunk.data.dtype != np.float32:
+                    audio_data = chunk.data.astype(np.float32)
+                else:
+                    audio_data = chunk.data
+                # Store chunk for return value
+                all_chunks.append(audio_data)
+                # Put chunk in queue (will block if queue is full)
+                try:
+                    self.audio_queue.put(audio_data.tobytes(), timeout=2.0)
+                except queue.Full:
+                    print("Warning: Audio queue is full, skipping chunk")
+        finally:
+            # Signal end of stream
+            self.audio_queue.put(None)
+            # Wait for playback to complete
+            if self.playback_thread:
+                self.playback_thread.join(timeout=30.0)
+            # Clean up
+            self._cleanup_stream()
+        # Combine all chunks into a single AudioData
+        if not all_chunks:
+            raise AudioProcessingError("No audio chunks received from stream")
+        # Check if audio parameters were initialized
+        if sample_rate is None or channels is None or format is None:
+            raise AudioProcessingError("Audio parameters not initialized. Stream may have ended without sending any chunks.")
+        combined_data = np.concatenate(all_chunks)
+        return AudioData(data=combined_data, sample_rate=sample_rate, channels=channels, format=format)
+    def _playback_worker(self):
+        """Worker thread for continuous playback."""
+        while not self.stop_event.is_set():
+            try:
+                # Get audio data from queue
+                audio_data = self.audio_queue.get(timeout=0.1)
+                if audio_data is None:
+                    # End of stream marker
+                    break
+                # Play the chunk
+                if self.stream and not self.stream.is_stopped():
+                    self.stream.write(audio_data)
+            except queue.Empty:
+                continue
+            except Exception as e:
+                print(f"Playback error: {e}")
+                break
+    def _cleanup_stream(self):
+        """Clean up PyAudio stream."""
+        if self.stream:
+            self.stream.stop_stream()
+            self.stream.close()
+            self.stream = None
+            self.current_sample_rate = None
+            self.current_channels = None
+    def close_stream(self) -> None:
+        """Close the current stream."""
+        self._cleanup_stream()
+    def stop(self):
+        """Stop playback and clean up resources."""
+        self.stop_event.set()
+        # Clear queue
+        try:
+            while True:
+                self.audio_queue.get_nowait()
+        except queue.Empty:
+            pass
+        # Wait for playback thread
+        if self.playback_thread and self.playback_thread.is_alive():
+            self.playback_thread.join(timeout=2.0)
+        self._cleanup_stream()
+    def __enter__(self):
+        """Context manager entry."""
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit."""
+        self.stop()
+    def __del__(self):
+        """Clean up PyAudio instance."""
+        self.stop()
+        if hasattr(self, "pyaudio"):
+            self.pyaudio.terminate()

speechflow/audio/writer.py ADDED Viewed

@@ -0,0 +1,162 @@
+import wave
+from pathlib import Path
+from typing import Iterator
+import numpy as np
+from ..core.base import AudioData
+from ..core.exceptions import AudioProcessingError
+class AudioWriter:
+    """Audio file writer supporting various formats."""
+    def save(self, audio: AudioData, output_path: str | Path) -> AudioData:
+        """Save audio data to file.
+        Args:
+            audio: AudioData to save
+            output_path: Path to save the audio file
+        """
+        output_path = Path(output_path)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        # Get file extension
+        extension = output_path.suffix.lower()
+        if extension in [".wav", ".wave"]:
+            self._save_wav(audio, output_path)
+        else:
+            raise AudioProcessingError(f"Unsupported audio format: {extension}")
+        return audio
+    def save_stream(self, audio_stream: Iterator[AudioData], output_path: str | Path) -> AudioData:
+        """Save streaming audio data to file.
+        This method accumulates all chunks from the stream and saves them as a single file.
+        For WAV format, it writes the header after collecting all data to ensure correct file size.
+        Args:
+            audio_stream: Iterator yielding AudioData chunks
+            output_path: Path to save the audio file
+        Returns:
+            AudioData: Combined audio data from all chunks
+        """
+        output_path = Path(output_path)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        # Get file extension
+        extension = output_path.suffix.lower()
+        if extension in [".wav", ".wave"]:
+            return self._save_wav_stream(audio_stream, output_path)
+        else:
+            raise AudioProcessingError(f"Unsupported audio format for streaming: {extension}")
+    def _save_wav(self, audio: AudioData, output_path: Path) -> None:
+        """Save audio as WAV file.
+        Args:
+            audio: AudioData to save
+            output_path: Path to save the WAV file
+        """
+        try:
+            # Convert float32 to int16 for WAV format
+            if audio.data.dtype == np.float32:
+                # Clip to [-1, 1] range and convert to int16
+                audio_data = np.clip(audio.data, -1.0, 1.0)
+                audio_data = (audio_data * 32767).astype(np.int16)
+            else:
+                audio_data = audio.data.astype(np.int16)
+            # Write WAV file
+            assert audio.sample_rate is not None, "Sample rate must be set"
+            assert audio.channels is not None, "Channels must be set"
+            with wave.open(str(output_path), "wb") as wav_file:
+                wav_file.setnchannels(audio.channels)
+                wav_file.setsampwidth(2)  # 16-bit audio
+                wav_file.setframerate(audio.sample_rate)
+                wav_file.writeframes(audio_data.tobytes())
+        except Exception as e:
+            raise AudioProcessingError(f"Failed to save WAV file: {str(e)}")
+    def _save_wav_stream(self, audio_stream: Iterator[AudioData], output_path: Path) -> AudioData:
+        """Save streaming audio as WAV file.
+        Args:
+            audio_stream: Iterator yielding AudioData chunks
+            output_path: Path to save the WAV file
+        Returns:
+            AudioData: Combined audio data from all chunks
+        """
+        try:
+            # Collect all chunks
+            chunks = []
+            chunks_float32 = []  # Keep original float32 data for return value
+            sample_rate = None
+            channels = None
+            format = None
+            for chunk in audio_stream:
+                # Get audio parameters from first chunk
+                if sample_rate is None:
+                    sample_rate = chunk.sample_rate
+                    channels = chunk.channels
+                    format = chunk.format
+                # Verify consistency
+                if chunk.sample_rate != sample_rate or chunk.channels != channels:
+                    raise AudioProcessingError(
+                        "Inconsistent audio parameters in stream. "
+                        f"Expected {sample_rate}Hz/{channels}ch, "
+                        f"got {chunk.sample_rate}Hz/{chunk.channels}ch"
+                    )
+                # Store original float32 data for return value
+                if chunk.data.dtype != np.float32:
+                    chunks_float32.append(chunk.data.astype(np.float32))
+                else:
+                    chunks_float32.append(chunk.data)
+                # Convert and store chunk for saving
+                if chunk.data.dtype == np.float32:
+                    audio_data = np.clip(chunk.data, -1.0, 1.0)
+                    audio_data = (audio_data * 32767).astype(np.int16)
+                else:
+                    audio_data = chunk.data.astype(np.int16)
+                chunks.append(audio_data)
+            if not chunks:
+                raise AudioProcessingError("No audio data received from stream")
+            # Concatenate all chunks
+            combined_audio = np.concatenate(chunks)
+            # Write WAV file with complete data
+            assert sample_rate is not None, "Sample rate must be set"
+            assert channels is not None, "Channels must be set"
+            assert format is not None, "Format must be set"
+            with wave.open(str(output_path), "wb") as wav_file:
+                wav_file.setnchannels(channels)
+                wav_file.setsampwidth(2)  # 16-bit audio
+                wav_file.setframerate(sample_rate)
+                wav_file.writeframes(combined_audio.tobytes())
+            # Create combined AudioData from original float32 data
+            combined_float32 = np.concatenate(chunks_float32)
+            assert sample_rate is not None, "Sample rate must be set"
+            assert channels is not None, "Channels must be set"
+            assert format is not None, "Format must be set"
+            return AudioData(data=combined_float32, sample_rate=sample_rate, channels=channels, format=format)
+        except AudioProcessingError:
+            raise
+        except Exception as e:
+            raise AudioProcessingError(f"Failed to save streaming WAV file: {str(e)}")

speechflow/core/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+from .base import TTSEngineBase, AudioData
+from .exceptions import TTSError, EngineNotFoundError, AudioProcessingError
+__all__ = [
+    "TTSEngineBase",
+    "AudioData",
+    "TTSError",
+    "EngineNotFoundError",
+    "AudioProcessingError",
+]

speechflow/core/base.py ADDED Viewed

@@ -0,0 +1,69 @@
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Any, AsyncIterator, Iterator
+import numpy as np
+@dataclass
+class AudioData:
+    """Container for audio data and metadata."""
+    data: np.ndarray
+    sample_rate: int
+    channels: int = 1
+    format: str = "pcm"
+    @property
+    def duration(self) -> float:
+        """Get audio duration in seconds."""
+        return len(self.data) / self.sample_rate
+class TTSEngineBase(ABC):
+    """Abstract base class for TTS engines."""
+    def __init__(self):
+        """Initialize TTS engine."""
+        pass
+    @abstractmethod
+    def get(self, text: str, model: str | None = None, voice: str | None = None) -> AudioData:
+        """Synthesize speech from text.
+        Args:
+            text: Text to synthesize
+            model: Optional model name
+            voice: Optional voice name
+        Returns:
+            AudioData containing the synthesized speech
+        """
+        pass
+    async def aget(self, text: str, model: str | None = None, voice: str | None = None) -> AudioData:
+        """Asynchronously synthesize speech from text.
+        Args:
+            text: Text to synthesize
+            model: Optional model name
+            voice: Optional voice name
+        Returns:
+            AudioData containing the synthesized speech
+        """
+        pass
+    @abstractmethod
+    def stream(self, text: str, model: str | None = None, voice: str | None = None) -> Iterator[AudioData]:
+        """Stream synthesized speech in chunks.
+        Args:
+            text: Text to synthesize
+            model: Optional model name
+            voice: Optional voice name
+        Yields:
+            AudioData chunks
+        """
+        pass

speechflow/core/exceptions.py ADDED Viewed

@@ -0,0 +1,23 @@
+class TTSError(Exception):
+    """Base exception for TalkFlow TTS library."""
+    pass
+class EngineNotFoundError(TTSError):
+    """Raised when requested TTS engine is not found or not supported."""
+    pass
+class AudioProcessingError(TTSError):
+    """Raised when audio processing fails."""
+    pass
+class ConfigurationError(TTSError):
+    """Raised when configuration is invalid."""
+    pass
+class StreamingError(TTSError):
+    """Raised when streaming audio fails."""
+    pass

speechflow/engines/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+from .fishaudio import FishAudioTTSEngine
+from .gemini import GeminiTTSEngine
+from .kokoro import KokoroTTSEngine
+from .openai import OpenAITTSEngine
+from .stylebert import StyleBertTTSEngine
+__all__ = ["FishAudioTTSEngine", "GeminiTTSEngine", "KokoroTTSEngine", "OpenAITTSEngine", "StyleBertTTSEngine"]