sunholo 0.106.0__py3-none-any.whl → 0.106.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/senses/stream_voice.py +122 -96
- {sunholo-0.106.0.dist-info → sunholo-0.106.1.dist-info}/METADATA +2 -2
- {sunholo-0.106.0.dist-info → sunholo-0.106.1.dist-info}/RECORD +7 -7
- {sunholo-0.106.0.dist-info → sunholo-0.106.1.dist-info}/LICENSE.txt +0 -0
- {sunholo-0.106.0.dist-info → sunholo-0.106.1.dist-info}/WHEEL +0 -0
- {sunholo-0.106.0.dist-info → sunholo-0.106.1.dist-info}/entry_points.txt +0 -0
- {sunholo-0.106.0.dist-info → sunholo-0.106.1.dist-info}/top_level.txt +0 -0
sunholo/senses/stream_voice.py
CHANGED
|
@@ -60,7 +60,10 @@ class StreamingTTS:
|
|
|
60
60
|
self.voice_gender = texttospeech.SsmlVoiceGender.NEUTRAL
|
|
61
61
|
self.voice_name = "en-GB-Journey-D"
|
|
62
62
|
# Audio processing parameters
|
|
63
|
-
|
|
63
|
+
# Separate fade durations for playback and file saving
|
|
64
|
+
self.playback_fade_duration = 0.05 # 50ms fade for real-time playback
|
|
65
|
+
self.file_fade_duration = 0.01 # 10ms fade for file saving
|
|
66
|
+
self.stream = None
|
|
64
67
|
self._initialize_audio_device()
|
|
65
68
|
|
|
66
69
|
def set_voice(self, voice_name: str):
|
|
@@ -137,111 +140,101 @@ class StreamingTTS:
|
|
|
137
140
|
sd.default.channels = 1
|
|
138
141
|
sd.default.dtype = np.int16
|
|
139
142
|
|
|
140
|
-
#
|
|
141
|
-
|
|
142
|
-
with sd.OutputStream(
|
|
143
|
+
# Initialize persistent output stream
|
|
144
|
+
self.stream = sd.OutputStream(
|
|
143
145
|
samplerate=self.sample_rate,
|
|
144
146
|
channels=1,
|
|
145
147
|
dtype=np.int16,
|
|
146
148
|
latency='low'
|
|
147
|
-
)
|
|
148
|
-
|
|
149
|
+
)
|
|
150
|
+
self.stream.start()
|
|
149
151
|
|
|
150
152
|
log.info("Audio device initialized successfully")
|
|
151
153
|
except Exception as e:
|
|
152
154
|
log.error(f"Error initializing audio device: {e}")
|
|
153
155
|
raise
|
|
154
156
|
|
|
155
|
-
def _make_fade(
|
|
156
|
-
"""
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
specified in seconds).
|
|
174
|
-
out_length : int, optional
|
|
175
|
-
Length of fade-out in samples. If not specified, `fade_in` is
|
|
176
|
-
used also for the fade-out.
|
|
177
|
-
type : {'t', 'q', 'h', 'l', 'p'}, optional
|
|
178
|
-
Select the shape of the fade curve: 'q' for quarter of a sine
|
|
179
|
-
wave, 'h' for half a sine wave, 't' for linear ("triangular")
|
|
180
|
-
slope, 'l' for logarithmic, and 'p' for inverted parabola.
|
|
181
|
-
The default is logarithmic.
|
|
182
|
-
copy : bool, optional
|
|
183
|
-
If `False`, the fade is applied in-place and a reference to
|
|
184
|
-
`x` is returned.
|
|
185
|
-
|
|
186
|
-
"""
|
|
187
|
-
x = np.array(x, copy=copy)
|
|
188
|
-
|
|
189
|
-
if out_length is None:
|
|
190
|
-
out_length = in_length
|
|
191
|
-
|
|
192
|
-
def make_fade(length, type):
|
|
193
|
-
fade = np.arange(length) / length
|
|
194
|
-
if type == 't': # triangle
|
|
195
|
-
pass
|
|
196
|
-
elif type == 'q': # quarter of sinewave
|
|
197
|
-
fade = np.sin(fade * np.pi / 2)
|
|
198
|
-
elif type == 'h': # half of sinewave... eh cosine wave
|
|
199
|
-
fade = (1 - np.cos(fade * np.pi)) / 2
|
|
200
|
-
elif type == 'l': # logarithmic
|
|
201
|
-
fade = np.power(0.1, (1 - fade) * 5) # 5 means 100 db attenuation
|
|
202
|
-
elif type == 'p': # inverted parabola
|
|
203
|
-
fade = (1 - (1 - fade)**2)
|
|
204
|
-
else:
|
|
205
|
-
raise ValueError("Unknown fade type {0!r}".format(type))
|
|
206
|
-
return fade
|
|
207
|
-
|
|
208
|
-
# Using .T w/o [:] causes error: https://github.com/numpy/numpy/issues/2667
|
|
209
|
-
x[:in_length].T[:] *= make_fade(in_length, type)
|
|
210
|
-
x[len(x) - out_length:].T[:] *= make_fade(out_length, type)[::-1]
|
|
211
|
-
return x
|
|
157
|
+
def _make_fade(self, length: int, fade_type: str='l') -> np.ndarray:
|
|
158
|
+
"""Generate a fade curve of specified length and type."""
|
|
159
|
+
fade = np.arange(length, dtype=np.float32) / length
|
|
160
|
+
|
|
161
|
+
if fade_type == 't': # triangle
|
|
162
|
+
pass
|
|
163
|
+
elif fade_type == 'q': # quarter of sinewave
|
|
164
|
+
fade = np.sin(fade * np.pi / 2)
|
|
165
|
+
elif fade_type == 'h': # half of sinewave
|
|
166
|
+
fade = (1 - np.cos(fade * np.pi)) / 2
|
|
167
|
+
elif fade_type == 'l': # logarithmic
|
|
168
|
+
fade = np.power(0.1, (1 - fade) * 5)
|
|
169
|
+
elif fade_type == 'p': # inverted parabola
|
|
170
|
+
fade = (1 - (1 - fade)**2)
|
|
171
|
+
else:
|
|
172
|
+
raise ValueError(f"Unknown fade type {fade_type!r}")
|
|
173
|
+
|
|
174
|
+
return fade
|
|
212
175
|
|
|
213
|
-
def _apply_fade(self, audio: np.ndarray, fade_in: bool = True, fade_out: bool = True) -> np.ndarray:
|
|
214
|
-
"""Apply fade in/out to audio
|
|
215
|
-
|
|
216
|
-
|
|
176
|
+
def _apply_fade(self, audio: np.ndarray, fade_duration: float, fade_in: bool = True, fade_out: bool = True) -> np.ndarray:
|
|
177
|
+
"""Apply fade in/out to audio with specified duration."""
|
|
178
|
+
if audio.ndim != 1:
|
|
179
|
+
raise ValueError("Audio must be 1-dimensional")
|
|
180
|
+
|
|
181
|
+
fade_length = int(fade_duration * self.sample_rate)
|
|
182
|
+
audio = audio.astype(np.float32)
|
|
217
183
|
|
|
218
184
|
if fade_in:
|
|
219
|
-
fade_in_curve =
|
|
185
|
+
fade_in_curve = self._make_fade(fade_length, 'l')
|
|
220
186
|
audio[:fade_length] *= fade_in_curve
|
|
221
187
|
|
|
222
188
|
if fade_out:
|
|
223
|
-
fade_out_curve =
|
|
224
|
-
audio[-fade_length:] *= fade_out_curve
|
|
189
|
+
fade_out_curve = self._make_fade(fade_length, 'l')
|
|
190
|
+
audio[-fade_length:] *= fade_out_curve[::-1]
|
|
225
191
|
|
|
226
|
-
return audio.astype(np.int16)
|
|
192
|
+
return audio.astype(np.int16)
|
|
193
|
+
|
|
227
194
|
|
|
228
|
-
def _play_audio_chunk(self, audio_chunk: np.ndarray):
|
|
195
|
+
def _play_audio_chunk(self, audio_chunk: np.ndarray, is_final_chunk: bool = False):
|
|
229
196
|
"""Play a single audio chunk with proper device handling."""
|
|
230
197
|
try:
|
|
231
|
-
# Add
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
processed_audio = self._apply_fade(audio_with_padding)
|
|
198
|
+
# Add longer padding for the final chunk
|
|
199
|
+
padding_duration = 0.1 if is_final_chunk else 0.02
|
|
200
|
+
padding = np.zeros(int(padding_duration * self.sample_rate), dtype=np.int16)
|
|
235
201
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
202
|
+
if is_final_chunk:
|
|
203
|
+
# For final chunk, add extra padding and longer fade
|
|
204
|
+
audio_with_padding = np.concatenate([
|
|
205
|
+
padding,
|
|
206
|
+
audio_chunk,
|
|
207
|
+
padding,
|
|
208
|
+
np.zeros(int(0.2 * self.sample_rate), dtype=np.int16) # Extra tail padding
|
|
209
|
+
])
|
|
210
|
+
fade_duration = self.playback_fade_duration * 2 # Longer fade for end
|
|
211
|
+
else:
|
|
212
|
+
audio_with_padding = np.concatenate([padding, audio_chunk, padding])
|
|
213
|
+
fade_duration = self.playback_fade_duration
|
|
214
|
+
|
|
215
|
+
processed_audio = self._apply_fade(
|
|
216
|
+
audio_with_padding,
|
|
217
|
+
fade_duration=fade_duration,
|
|
218
|
+
fade_in=True,
|
|
219
|
+
fade_out=True
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
if self.stream and self.stream.active:
|
|
223
|
+
self.stream.write(processed_audio)
|
|
224
|
+
if is_final_chunk:
|
|
225
|
+
# Write a small buffer of silence at the end
|
|
226
|
+
final_silence = np.zeros(int(0.1 * self.sample_rate), dtype=np.int16)
|
|
227
|
+
self.stream.write(final_silence)
|
|
228
|
+
else:
|
|
229
|
+
with sd.OutputStream(
|
|
230
|
+
samplerate=self.sample_rate,
|
|
231
|
+
channels=1,
|
|
232
|
+
dtype=np.int16,
|
|
233
|
+
latency='low'
|
|
234
|
+
) as temp_stream:
|
|
235
|
+
temp_stream.write(processed_audio)
|
|
236
|
+
if is_final_chunk:
|
|
237
|
+
temp_stream.write(np.zeros(int(0.1 * self.sample_rate), dtype=np.int16))
|
|
245
238
|
|
|
246
239
|
except Exception as e:
|
|
247
240
|
log.error(f"Error during audio playback: {e}")
|
|
@@ -250,11 +243,31 @@ class StreamingTTS:
|
|
|
250
243
|
def audio_player(self):
|
|
251
244
|
"""Continuously play audio chunks from the queue."""
|
|
252
245
|
log.info("Audio player started")
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
246
|
+
try:
|
|
247
|
+
while self.is_playing or not self.audio_queue.empty():
|
|
248
|
+
if not self.audio_queue.empty():
|
|
249
|
+
audio_chunk = self.audio_queue.get()
|
|
250
|
+
self._play_audio_chunk(audio_chunk)
|
|
251
|
+
time.sleep(0.005) # Reduced sleep time for more responsive playback
|
|
252
|
+
finally:
|
|
253
|
+
# Ensure stream is properly closed
|
|
254
|
+
if self.stream and self.stream.active:
|
|
255
|
+
self.stream.stop()
|
|
256
|
+
self.stream.close()
|
|
257
|
+
self.stream = None
|
|
258
|
+
|
|
259
|
+
def __del__(self):
|
|
260
|
+
"""Cleanup method to ensure stream is closed."""
|
|
261
|
+
if hasattr(self, 'stream') and self.stream and self.stream.active:
|
|
262
|
+
# Write a small silence buffer before closing
|
|
263
|
+
final_silence = np.zeros(int(0.1 * self.sample_rate), dtype=np.int16)
|
|
264
|
+
try:
|
|
265
|
+
self.stream.write(final_silence)
|
|
266
|
+
time.sleep(0.1) # Let the final audio finish playing
|
|
267
|
+
except Exception:
|
|
268
|
+
pass # Ignore errors during cleanup
|
|
269
|
+
self.stream.stop()
|
|
270
|
+
self.stream.close()
|
|
258
271
|
|
|
259
272
|
def process_text_stream(self, text_generator):
|
|
260
273
|
"""Process incoming text stream and convert to audio."""
|
|
@@ -281,24 +294,37 @@ class StreamingTTS:
|
|
|
281
294
|
player_thread.join()
|
|
282
295
|
|
|
283
296
|
def save_to_file(self, text_generator, output_path):
|
|
284
|
-
"""Save the audio to a WAV file
|
|
297
|
+
"""Save the audio to a WAV file with minimal fading."""
|
|
285
298
|
import wave
|
|
286
299
|
|
|
287
300
|
all_audio = []
|
|
288
301
|
for text_chunk in text_generator:
|
|
289
302
|
audio_chunk = self.text_to_audio(text_chunk)
|
|
290
|
-
|
|
303
|
+
# Use shorter fade duration for file saving
|
|
304
|
+
processed_chunk = self._apply_fade(
|
|
305
|
+
audio_chunk,
|
|
306
|
+
fade_duration=self.file_fade_duration
|
|
307
|
+
)
|
|
291
308
|
all_audio.append(processed_chunk)
|
|
292
309
|
|
|
293
|
-
# Add
|
|
294
|
-
silence = np.zeros(int(0.
|
|
310
|
+
# Add minimal silence between chunks
|
|
311
|
+
silence = np.zeros(int(0.05 * self.sample_rate), dtype=np.int16)
|
|
295
312
|
final_audio = silence
|
|
296
|
-
|
|
313
|
+
|
|
314
|
+
for i, chunk in enumerate(all_audio):
|
|
315
|
+
if i == len(all_audio) - 1:
|
|
316
|
+
# For the last chunk, use a slightly longer fade out
|
|
317
|
+
chunk = self._apply_fade(
|
|
318
|
+
chunk,
|
|
319
|
+
fade_duration=self.file_fade_duration * 2,
|
|
320
|
+
fade_in=False,
|
|
321
|
+
fade_out=True
|
|
322
|
+
)
|
|
297
323
|
final_audio = np.concatenate([final_audio, chunk, silence])
|
|
298
324
|
|
|
299
325
|
with wave.open(output_path, 'wb') as wav_file:
|
|
300
326
|
wav_file.setnchannels(1)
|
|
301
|
-
wav_file.setsampwidth(2)
|
|
327
|
+
wav_file.setsampwidth(2)
|
|
302
328
|
wav_file.setframerate(self.sample_rate)
|
|
303
329
|
wav_file.writeframes(final_audio.tobytes())
|
|
304
330
|
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sunholo
|
|
3
|
-
Version: 0.106.
|
|
3
|
+
Version: 0.106.1
|
|
4
4
|
Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
|
|
5
5
|
Home-page: https://github.com/sunholo-data/sunholo-py
|
|
6
|
-
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.106.
|
|
6
|
+
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.106.1.tar.gz
|
|
7
7
|
Author: Holosun ApS
|
|
8
8
|
Author-email: multivac@sunholo.com
|
|
9
9
|
License: Apache License, Version 2.0
|
|
@@ -116,7 +116,7 @@ sunholo/qna/__init__.py,sha256=F8q1uR_HreoSX0IfmKY1qoSwIgXhO2Q8kuDSxh9_-EE,28
|
|
|
116
116
|
sunholo/qna/parsers.py,sha256=YpOaK5S_LxJ6FbliSYDc3AVOJ62RVduayoNnzi_p8CM,2494
|
|
117
117
|
sunholo/qna/retry.py,sha256=yMw7RTkw-RXCzfENPJOt8c32mXlpvOR589EGkvK-6yI,2028
|
|
118
118
|
sunholo/senses/__init__.py,sha256=fbWqVwwzkV5uRSb8lQzo4pn0ja_VYVWbUYapurSowBs,39
|
|
119
|
-
sunholo/senses/stream_voice.py,sha256=
|
|
119
|
+
sunholo/senses/stream_voice.py,sha256=JmHxhfrm97sIPIdb28n5BxcPsAPFgkKZGy6xNbclFtw,16832
|
|
120
120
|
sunholo/streaming/__init__.py,sha256=MpbydI2UYo_adttPQFkxNM33b-QRyNEbrKJx0C2AGPc,241
|
|
121
121
|
sunholo/streaming/content_buffer.py,sha256=0LHMwH4ctq5kjhIgMFNH0bA1RL0jMISlLVzzLcFrvv4,12766
|
|
122
122
|
sunholo/streaming/langserve.py,sha256=hi7q8WY8DPKrALl9m_dOMxWOdE-iEuk7YW05SVDFIX8,6514
|
|
@@ -149,9 +149,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
|
149
149
|
sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
|
|
150
150
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
|
151
151
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
|
152
|
-
sunholo-0.106.
|
|
153
|
-
sunholo-0.106.
|
|
154
|
-
sunholo-0.106.
|
|
155
|
-
sunholo-0.106.
|
|
156
|
-
sunholo-0.106.
|
|
157
|
-
sunholo-0.106.
|
|
152
|
+
sunholo-0.106.1.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
|
153
|
+
sunholo-0.106.1.dist-info/METADATA,sha256=LT-NrNweknkdB5XPQFE9yEPQ6Z421flnTD4utV8MjEM,8670
|
|
154
|
+
sunholo-0.106.1.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
|
|
155
|
+
sunholo-0.106.1.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
|
156
|
+
sunholo-0.106.1.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
|
157
|
+
sunholo-0.106.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|