vision-agents-plugins-kokoro 0.1.9__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vision-agents-plugins-kokoro might be problematic. Click here for more details.

.gitignore CHANGED
@@ -84,3 +84,4 @@ stream-py/
84
84
  # Artifacts / assets
85
85
  *.pt
86
86
  *.kef
87
+ *.onnx
PKG-INFO CHANGED
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vision-agents-plugins-kokoro
3
- Version: 0.1.9
3
+ Version: 0.1.12
4
4
  Summary: Kokoro TTS integration for Vision Agents
5
5
  Project-URL: Documentation, https://visionagents.ai/
6
6
  Project-URL: Website, https://visionagents.ai/
@@ -2,12 +2,12 @@ from __future__ import annotations
2
2
 
3
3
  import asyncio
4
4
  import logging
5
+ from typing import AsyncIterator, Iterator, List, Optional
5
6
 
6
7
  import numpy as np
7
- from typing import AsyncIterator, List, Optional
8
8
 
9
9
  from vision_agents.core import tts
10
- from getstream.video.rtc.audio_track import AudioStreamTrack
10
+ from getstream.video.rtc.track_util import PcmData, AudioFormat
11
11
 
12
12
  try:
13
13
  from kokoro import KPipeline # type: ignore
@@ -15,6 +15,9 @@ except ModuleNotFoundError: # pragma: no cover – mocked during CI
15
15
  KPipeline = None # type: ignore # noqa: N816
16
16
 
17
17
 
18
+ logger = logging.getLogger(__name__)
19
+
20
+
18
21
  class TTS(tts.TTS):
19
22
  """Text-to-Speech plugin backed by the Kokoro-82M model."""
20
23
 
@@ -44,22 +47,9 @@ class TTS(tts.TTS):
44
47
  self.sample_rate = sample_rate
45
48
  self.client = client if client is not None else self._pipeline
46
49
 
47
- def get_required_framerate(self) -> int:
48
- """Get the required framerate for Kokoro TTS."""
49
- return self.sample_rate
50
-
51
- def get_required_stereo(self) -> bool:
52
- """Get whether Kokoro TTS requires stereo audio."""
53
- return False # Kokoro returns mono audio
54
-
55
- def set_output_track(self, track: AudioStreamTrack) -> None: # noqa: D401
56
- if track.framerate != self.sample_rate:
57
- raise TypeError(
58
- f"Invalid framerate {track.framerate}, Kokoro requires {self.sample_rate} Hz"
59
- )
60
- super().set_output_track(track)
61
-
62
- async def stream_audio(self, text: str, *_, **__) -> AsyncIterator[bytes]: # noqa: D401
50
+ async def stream_audio(
51
+ self, text: str, *_, **__
52
+ ) -> PcmData | Iterator[PcmData] | AsyncIterator[PcmData]: # noqa: D401
63
53
  loop = asyncio.get_event_loop()
64
54
  chunks: List[bytes] = await loop.run_in_executor(
65
55
  None, lambda: list(self._generate_chunks(text))
@@ -67,7 +57,12 @@ class TTS(tts.TTS):
67
57
 
68
58
  async def _aiter():
69
59
  for chunk in chunks:
70
- yield chunk
60
+ yield PcmData.from_bytes(
61
+ chunk,
62
+ sample_rate=self.sample_rate,
63
+ channels=1,
64
+ format=AudioFormat.S16,
65
+ )
71
66
 
72
67
  return _aiter()
73
68
 
@@ -76,11 +71,7 @@ class TTS(tts.TTS):
76
71
  Clears the queue and stops playing audio.
77
72
 
78
73
  """
79
- try:
80
- await self.track.flush()
81
- return
82
- except Exception as e:
83
- logging.error(f"Error flushing audio track: {e}")
74
+ logger.info("🎤 Kokoro TTS stop requested (no-op)")
84
75
 
85
76
  def _generate_chunks(self, text: str):
86
77
  for _gs, _ps, audio in self._pipeline(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vision-agents-plugins-kokoro
3
- Version: 0.1.9
3
+ Version: 0.1.12
4
4
  Summary: Kokoro TTS integration for Vision Agents
5
5
  Project-URL: Documentation, https://visionagents.ai/
6
6
  Project-URL: Website, https://visionagents.ai/
@@ -0,0 +1,9 @@
1
+ ./.gitignore,sha256=ye7v72rmcYcz93U_u9IyYUvYJKEXGElBsTevPVyASo0,923
2
+ ./PKG-INFO,sha256=f4jj6wXm6Avw3NcTzQODNFTkCM1cwR3vMgzV1ULQges,2045
3
+ ./README.md,sha256=2olMbGoMYj07OyGlDj_AG2G42o61Tl-Ml4AMINXj4LM,1486
4
+ ./pyproject.toml,sha256=tKF8yoc19e21FLXZcomo17LEHGH929kFwX9EIXkRBQA,999
5
+ ./vision_agents/plugins/kokoro/__init__.py,sha256=hq4qcv6VHIM24dB_NZEhxogAKD04jX35ngeXhn2M-zQ,158
6
+ ./vision_agents/plugins/kokoro/tts.py,sha256=_vCCRgMOnYj5IlibxfOnvZf_3wYJcdTurswRtdSGRhs,2510
7
+ vision_agents_plugins_kokoro-0.1.12.dist-info/METADATA,sha256=f4jj6wXm6Avw3NcTzQODNFTkCM1cwR3vMgzV1ULQges,2045
8
+ vision_agents_plugins_kokoro-0.1.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
+ vision_agents_plugins_kokoro-0.1.12.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- ./.gitignore,sha256=S6wPCu4rBDB_yyTYoXbMIR-pn4OPv6b3Ulnx1n5RWvo,916
2
- ./PKG-INFO,sha256=459Q3UkPTN25M6CwXOwe2YTNA99gh9CeXx8g4G7z6bA,2044
3
- ./README.md,sha256=2olMbGoMYj07OyGlDj_AG2G42o61Tl-Ml4AMINXj4LM,1486
4
- ./pyproject.toml,sha256=tKF8yoc19e21FLXZcomo17LEHGH929kFwX9EIXkRBQA,999
5
- ./vision_agents/plugins/kokoro/__init__.py,sha256=hq4qcv6VHIM24dB_NZEhxogAKD04jX35ngeXhn2M-zQ,158
6
- ./vision_agents/plugins/kokoro/tts.py,sha256=-XjGD0riPnXsCGpVWo9EO42B1a9mV3CmvUVPhqTnyKA,2921
7
- vision_agents_plugins_kokoro-0.1.9.dist-info/METADATA,sha256=459Q3UkPTN25M6CwXOwe2YTNA99gh9CeXx8g4G7z6bA,2044
8
- vision_agents_plugins_kokoro-0.1.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
- vision_agents_plugins_kokoro-0.1.9.dist-info/RECORD,,