speechflow 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
speechflow/__init__.py ADDED
@@ -0,0 +1,24 @@
1
+ from .audio import AudioPlayer, AudioWriter
2
+ from .core import AudioData, AudioProcessingError, EngineNotFoundError, TTSEngineBase, TTSError
3
+ from .engines import FishAudioTTSEngine, GeminiTTSEngine, KokoroTTSEngine, OpenAITTSEngine, StyleBertTTSEngine
4
+
5
+ __version__ = "0.1.0"
6
+
7
+ __all__ = [
8
+ # Core
9
+ "TTSEngineBase",
10
+ "AudioData",
11
+ # Exceptions
12
+ "TTSError",
13
+ "EngineNotFoundError",
14
+ "AudioProcessingError",
15
+ # Audio components
16
+ "AudioPlayer",
17
+ "AudioWriter",
18
+ # Engines
19
+ "FishAudioTTSEngine",
20
+ "GeminiTTSEngine",
21
+ "KokoroTTSEngine",
22
+ "OpenAITTSEngine",
23
+ "StyleBertTTSEngine",
24
+ ]
@@ -0,0 +1,7 @@
1
+ from .player import AudioPlayer
2
+ from .writer import AudioWriter
3
+
4
+ __all__ = [
5
+ "AudioPlayer",
6
+ "AudioWriter",
7
+ ]
@@ -0,0 +1,211 @@
1
+ import queue
2
+ import threading
3
+ from typing import Iterator, Optional
4
+
5
+ import numpy as np
6
+ import pyaudio
7
+
8
+ from ..core.base import AudioData
9
+ from ..core.exceptions import AudioProcessingError
10
+
11
+
12
+ class AudioPlayer:
13
+ """Audio player using PyAudio for both single audio and streaming playback."""
14
+
15
+ def __init__(self):
16
+ self.pyaudio = pyaudio.PyAudio()
17
+ self.stream: Optional[pyaudio.Stream] = None
18
+ self.current_sample_rate: Optional[int] = None
19
+ self.current_channels: Optional[int] = None
20
+
21
+ # For streaming playback
22
+ self.audio_queue = queue.Queue(maxsize=100)
23
+ self.stop_event = threading.Event()
24
+ self.playback_thread = None
25
+
26
+ def _ensure_stream(self, sample_rate: int, channels: int) -> None:
27
+ """Ensure stream is open with correct parameters."""
28
+ # Check if we need a new stream
29
+ if self.stream is None or self.current_sample_rate != sample_rate or self.current_channels != channels:
30
+ # Close existing stream if any
31
+ if self.stream is not None:
32
+ self.close_stream()
33
+
34
+ # Open new stream
35
+ self.stream = self.pyaudio.open(
36
+ format=pyaudio.paFloat32,
37
+ channels=channels,
38
+ rate=sample_rate,
39
+ output=True,
40
+ frames_per_buffer=2048, # Balanced buffer for smooth playback
41
+ )
42
+ self.current_sample_rate = sample_rate
43
+ self.current_channels = channels
44
+
45
+ def play(self, audio: AudioData) -> AudioData:
46
+ """Play audio data (blocking).
47
+
48
+ Args:
49
+ audio: AudioData to play
50
+ """
51
+ try:
52
+ self._ensure_stream(audio.sample_rate, audio.channels)
53
+
54
+ # Ensure audio data is in the correct format
55
+ if audio.data.dtype != np.float32:
56
+ audio_data = audio.data.astype(np.float32)
57
+ else:
58
+ audio_data = audio.data
59
+
60
+ # Play audio (blocking)
61
+ assert self.stream is not None, "Stream must be initialized before playing"
62
+ self.stream.write(audio_data.tobytes())
63
+
64
+ # Return the original audio data
65
+ return audio
66
+
67
+ except Exception as e:
68
+ raise AudioProcessingError(f"Failed to play audio: {str(e)}")
69
+
70
+ def play_stream(self, audio_stream: Iterator[AudioData]) -> AudioData:
71
+ """Play audio from a stream of AudioData chunks.
72
+
73
+ This method starts playback immediately when the first chunk arrives
74
+ and continues playing subsequent chunks seamlessly.
75
+
76
+ Args:
77
+ audio_stream: Iterator yielding AudioData chunks
78
+
79
+ Returns:
80
+ AudioData: Combined audio data from all chunks
81
+ """
82
+ # Start playback thread
83
+ self.stop_event.clear()
84
+ self.playback_thread = threading.Thread(target=self._playback_worker)
85
+ self.playback_thread.start()
86
+
87
+ # Collect all chunks for return value
88
+ all_chunks = []
89
+ sample_rate = None
90
+ channels = None
91
+ format = None
92
+
93
+ try:
94
+ # Feed chunks to the queue
95
+ for chunk in audio_stream:
96
+ if self.stop_event.is_set():
97
+ break
98
+
99
+ # Initialize stream with first chunk's parameters
100
+ if self.stream is None:
101
+ self._ensure_stream(chunk.sample_rate, chunk.channels)
102
+
103
+ # Set audio parameters from first valid chunk
104
+ if sample_rate is None and chunk.sample_rate is not None:
105
+ sample_rate = chunk.sample_rate
106
+ if channels is None and chunk.channels is not None:
107
+ channels = chunk.channels
108
+ if format is None and chunk.format is not None:
109
+ format = chunk.format
110
+
111
+ # Ensure audio data is in the correct format
112
+ if chunk.data.dtype != np.float32:
113
+ audio_data = chunk.data.astype(np.float32)
114
+ else:
115
+ audio_data = chunk.data
116
+
117
+ # Store chunk for return value
118
+ all_chunks.append(audio_data)
119
+
120
+ # Put chunk in queue (will block if queue is full)
121
+ try:
122
+ self.audio_queue.put(audio_data.tobytes(), timeout=2.0)
123
+ except queue.Full:
124
+ print("Warning: Audio queue is full, skipping chunk")
125
+
126
+ finally:
127
+ # Signal end of stream
128
+ self.audio_queue.put(None)
129
+
130
+ # Wait for playback to complete
131
+ if self.playback_thread:
132
+ self.playback_thread.join(timeout=30.0)
133
+
134
+ # Clean up
135
+ self._cleanup_stream()
136
+
137
+ # Combine all chunks into a single AudioData
138
+ if not all_chunks:
139
+ raise AudioProcessingError("No audio chunks received from stream")
140
+
141
+ # Check if audio parameters were initialized
142
+ if sample_rate is None or channels is None or format is None:
143
+ raise AudioProcessingError("Audio parameters not initialized. Stream may have ended without sending any chunks.")
144
+
145
+ combined_data = np.concatenate(all_chunks)
146
+ return AudioData(data=combined_data, sample_rate=sample_rate, channels=channels, format=format)
147
+
148
+ def _playback_worker(self):
149
+ """Worker thread for continuous playback."""
150
+ while not self.stop_event.is_set():
151
+ try:
152
+ # Get audio data from queue
153
+ audio_data = self.audio_queue.get(timeout=0.1)
154
+
155
+ if audio_data is None:
156
+ # End of stream marker
157
+ break
158
+
159
+ # Play the chunk
160
+ if self.stream and not self.stream.is_stopped():
161
+ self.stream.write(audio_data)
162
+
163
+ except queue.Empty:
164
+ continue
165
+ except Exception as e:
166
+ print(f"Playback error: {e}")
167
+ break
168
+
169
+ def _cleanup_stream(self):
170
+ """Clean up PyAudio stream."""
171
+ if self.stream:
172
+ self.stream.stop_stream()
173
+ self.stream.close()
174
+ self.stream = None
175
+ self.current_sample_rate = None
176
+ self.current_channels = None
177
+
178
+ def close_stream(self) -> None:
179
+ """Close the current stream."""
180
+ self._cleanup_stream()
181
+
182
+ def stop(self):
183
+ """Stop playback and clean up resources."""
184
+ self.stop_event.set()
185
+
186
+ # Clear queue
187
+ try:
188
+ while True:
189
+ self.audio_queue.get_nowait()
190
+ except queue.Empty:
191
+ pass
192
+
193
+ # Wait for playback thread
194
+ if self.playback_thread and self.playback_thread.is_alive():
195
+ self.playback_thread.join(timeout=2.0)
196
+
197
+ self._cleanup_stream()
198
+
199
+ def __enter__(self):
200
+ """Context manager entry."""
201
+ return self
202
+
203
+ def __exit__(self, exc_type, exc_val, exc_tb):
204
+ """Context manager exit."""
205
+ self.stop()
206
+
207
+ def __del__(self):
208
+ """Clean up PyAudio instance."""
209
+ self.stop()
210
+ if hasattr(self, "pyaudio"):
211
+ self.pyaudio.terminate()
@@ -0,0 +1,162 @@
1
+ import wave
2
+ from pathlib import Path
3
+ from typing import Iterator
4
+
5
+ import numpy as np
6
+
7
+ from ..core.base import AudioData
8
+ from ..core.exceptions import AudioProcessingError
9
+
10
+
11
+ class AudioWriter:
12
+ """Audio file writer supporting various formats."""
13
+
14
+ def save(self, audio: AudioData, output_path: str | Path) -> AudioData:
15
+ """Save audio data to file.
16
+
17
+ Args:
18
+ audio: AudioData to save
19
+ output_path: Path to save the audio file
20
+ """
21
+ output_path = Path(output_path)
22
+ output_path.parent.mkdir(parents=True, exist_ok=True)
23
+
24
+ # Get file extension
25
+ extension = output_path.suffix.lower()
26
+
27
+ if extension in [".wav", ".wave"]:
28
+ self._save_wav(audio, output_path)
29
+ else:
30
+ raise AudioProcessingError(f"Unsupported audio format: {extension}")
31
+
32
+ return audio
33
+
34
+ def save_stream(self, audio_stream: Iterator[AudioData], output_path: str | Path) -> AudioData:
35
+ """Save streaming audio data to file.
36
+
37
+ This method accumulates all chunks from the stream and saves them as a single file.
38
+ For WAV format, it writes the header after collecting all data to ensure correct file size.
39
+
40
+ Args:
41
+ audio_stream: Iterator yielding AudioData chunks
42
+ output_path: Path to save the audio file
43
+
44
+ Returns:
45
+ AudioData: Combined audio data from all chunks
46
+ """
47
+ output_path = Path(output_path)
48
+ output_path.parent.mkdir(parents=True, exist_ok=True)
49
+
50
+ # Get file extension
51
+ extension = output_path.suffix.lower()
52
+
53
+ if extension in [".wav", ".wave"]:
54
+ return self._save_wav_stream(audio_stream, output_path)
55
+ else:
56
+ raise AudioProcessingError(f"Unsupported audio format for streaming: {extension}")
57
+
58
+ def _save_wav(self, audio: AudioData, output_path: Path) -> None:
59
+ """Save audio as WAV file.
60
+
61
+ Args:
62
+ audio: AudioData to save
63
+ output_path: Path to save the WAV file
64
+ """
65
+ try:
66
+ # Convert float32 to int16 for WAV format
67
+ if audio.data.dtype == np.float32:
68
+ # Clip to [-1, 1] range and convert to int16
69
+ audio_data = np.clip(audio.data, -1.0, 1.0)
70
+ audio_data = (audio_data * 32767).astype(np.int16)
71
+ else:
72
+ audio_data = audio.data.astype(np.int16)
73
+
74
+ # Write WAV file
75
+ assert audio.sample_rate is not None, "Sample rate must be set"
76
+ assert audio.channels is not None, "Channels must be set"
77
+
78
+ with wave.open(str(output_path), "wb") as wav_file:
79
+ wav_file.setnchannels(audio.channels)
80
+ wav_file.setsampwidth(2) # 16-bit audio
81
+ wav_file.setframerate(audio.sample_rate)
82
+ wav_file.writeframes(audio_data.tobytes())
83
+
84
+ except Exception as e:
85
+ raise AudioProcessingError(f"Failed to save WAV file: {str(e)}")
86
+
87
+ def _save_wav_stream(self, audio_stream: Iterator[AudioData], output_path: Path) -> AudioData:
88
+ """Save streaming audio as WAV file.
89
+
90
+ Args:
91
+ audio_stream: Iterator yielding AudioData chunks
92
+ output_path: Path to save the WAV file
93
+
94
+ Returns:
95
+ AudioData: Combined audio data from all chunks
96
+ """
97
+ try:
98
+ # Collect all chunks
99
+ chunks = []
100
+ chunks_float32 = [] # Keep original float32 data for return value
101
+ sample_rate = None
102
+ channels = None
103
+ format = None
104
+
105
+ for chunk in audio_stream:
106
+ # Get audio parameters from first chunk
107
+ if sample_rate is None:
108
+ sample_rate = chunk.sample_rate
109
+ channels = chunk.channels
110
+ format = chunk.format
111
+
112
+ # Verify consistency
113
+ if chunk.sample_rate != sample_rate or chunk.channels != channels:
114
+ raise AudioProcessingError(
115
+ "Inconsistent audio parameters in stream. "
116
+ f"Expected {sample_rate}Hz/{channels}ch, "
117
+ f"got {chunk.sample_rate}Hz/{chunk.channels}ch"
118
+ )
119
+
120
+ # Store original float32 data for return value
121
+ if chunk.data.dtype != np.float32:
122
+ chunks_float32.append(chunk.data.astype(np.float32))
123
+ else:
124
+ chunks_float32.append(chunk.data)
125
+
126
+ # Convert and store chunk for saving
127
+ if chunk.data.dtype == np.float32:
128
+ audio_data = np.clip(chunk.data, -1.0, 1.0)
129
+ audio_data = (audio_data * 32767).astype(np.int16)
130
+ else:
131
+ audio_data = chunk.data.astype(np.int16)
132
+
133
+ chunks.append(audio_data)
134
+
135
+ if not chunks:
136
+ raise AudioProcessingError("No audio data received from stream")
137
+
138
+ # Concatenate all chunks
139
+ combined_audio = np.concatenate(chunks)
140
+
141
+ # Write WAV file with complete data
142
+ assert sample_rate is not None, "Sample rate must be set"
143
+ assert channels is not None, "Channels must be set"
144
+ assert format is not None, "Format must be set"
145
+ with wave.open(str(output_path), "wb") as wav_file:
146
+ wav_file.setnchannels(channels)
147
+ wav_file.setsampwidth(2) # 16-bit audio
148
+ wav_file.setframerate(sample_rate)
149
+ wav_file.writeframes(combined_audio.tobytes())
150
+
151
+ # Create combined AudioData from original float32 data
152
+ combined_float32 = np.concatenate(chunks_float32)
153
+
154
+ assert sample_rate is not None, "Sample rate must be set"
155
+ assert channels is not None, "Channels must be set"
156
+ assert format is not None, "Format must be set"
157
+ return AudioData(data=combined_float32, sample_rate=sample_rate, channels=channels, format=format)
158
+
159
+ except AudioProcessingError:
160
+ raise
161
+ except Exception as e:
162
+ raise AudioProcessingError(f"Failed to save streaming WAV file: {str(e)}")
@@ -0,0 +1,10 @@
1
+ from .base import TTSEngineBase, AudioData
2
+ from .exceptions import TTSError, EngineNotFoundError, AudioProcessingError
3
+
4
+ __all__ = [
5
+ "TTSEngineBase",
6
+ "AudioData",
7
+ "TTSError",
8
+ "EngineNotFoundError",
9
+ "AudioProcessingError",
10
+ ]
@@ -0,0 +1,69 @@
1
+ from abc import ABC, abstractmethod
2
+ from dataclasses import dataclass
3
+ from typing import Any, AsyncIterator, Iterator
4
+
5
+ import numpy as np
6
+
7
+
8
+ @dataclass
9
+ class AudioData:
10
+ """Container for audio data and metadata."""
11
+
12
+ data: np.ndarray
13
+ sample_rate: int
14
+ channels: int = 1
15
+ format: str = "pcm"
16
+
17
+ @property
18
+ def duration(self) -> float:
19
+ """Get audio duration in seconds."""
20
+ return len(self.data) / self.sample_rate
21
+
22
+
23
+ class TTSEngineBase(ABC):
24
+ """Abstract base class for TTS engines."""
25
+
26
+ def __init__(self):
27
+ """Initialize TTS engine."""
28
+ pass
29
+
30
+ @abstractmethod
31
+ def get(self, text: str, model: str | None = None, voice: str | None = None) -> AudioData:
32
+ """Synthesize speech from text.
33
+
34
+ Args:
35
+ text: Text to synthesize
36
+ model: Optional model name
37
+ voice: Optional voice name
38
+
39
+ Returns:
40
+ AudioData containing the synthesized speech
41
+ """
42
+ pass
43
+
44
+ async def aget(self, text: str, model: str | None = None, voice: str | None = None) -> AudioData:
45
+ """Asynchronously synthesize speech from text.
46
+
47
+ Args:
48
+ text: Text to synthesize
49
+ model: Optional model name
50
+ voice: Optional voice name
51
+
52
+ Returns:
53
+ AudioData containing the synthesized speech
54
+ """
55
+ pass
56
+
57
+ @abstractmethod
58
+ def stream(self, text: str, model: str | None = None, voice: str | None = None) -> Iterator[AudioData]:
59
+ """Stream synthesized speech in chunks.
60
+
61
+ Args:
62
+ text: Text to synthesize
63
+ model: Optional model name
64
+ voice: Optional voice name
65
+
66
+ Yields:
67
+ AudioData chunks
68
+ """
69
+ pass
@@ -0,0 +1,23 @@
1
+ class TTSError(Exception):
2
+ """Base exception for TalkFlow TTS library."""
3
+ pass
4
+
5
+
6
+ class EngineNotFoundError(TTSError):
7
+ """Raised when requested TTS engine is not found or not supported."""
8
+ pass
9
+
10
+
11
+ class AudioProcessingError(TTSError):
12
+ """Raised when audio processing fails."""
13
+ pass
14
+
15
+
16
+ class ConfigurationError(TTSError):
17
+ """Raised when configuration is invalid."""
18
+ pass
19
+
20
+
21
+ class StreamingError(TTSError):
22
+ """Raised when streaming audio fails."""
23
+ pass
@@ -0,0 +1,7 @@
1
+ from .fishaudio import FishAudioTTSEngine
2
+ from .gemini import GeminiTTSEngine
3
+ from .kokoro import KokoroTTSEngine
4
+ from .openai import OpenAITTSEngine
5
+ from .stylebert import StyleBertTTSEngine
6
+
7
+ __all__ = ["FishAudioTTSEngine", "GeminiTTSEngine", "KokoroTTSEngine", "OpenAITTSEngine", "StyleBertTTSEngine"]