vision-agents-plugins-kokoro 0.0.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vision-agents-plugins-kokoro might be problematic. Click here for more details.

.gitignore ADDED
@@ -0,0 +1,32 @@
1
+ */__pycache__
2
+ */chat/__pycache__
3
+ */video/__pycache__
4
+ */chat/sync/__pycache__
5
+ */chat/async_/__pycache__
6
+ */sync/__pycache__
7
+ */async_/__pycache__
8
+ */video/sync/__pycache__
9
+ */model/__pycache__/
10
+ */cli/__pycache__
11
+ */cli/__pycache__
12
+ .env
13
+ .venv
14
+ .vscode/settings.json
15
+ *.pyc
16
+ dist/*
17
+ dist/*
18
+ *.log
19
+ .python-version
20
+ pyvenv.cfg
21
+ .idea*
22
+ bin/*
23
+ lib/*
24
+ shell.nix
25
+ pyrightconfig.json
26
+ .DS_Store
27
+
28
+ *.egg-info/
29
+ *.egg
30
+ *.pt
31
+ *.kef
32
+ .env.bak
PKG-INFO ADDED
@@ -0,0 +1,54 @@
1
+ Metadata-Version: 2.4
2
+ Name: vision-agents-plugins-kokoro
3
+ Version: 0.0.17
4
+ Summary: Kokoro TTS integration for Vision Agents
5
+ Project-URL: Documentation, https://visionagents.ai/
6
+ Project-URL: Website, https://visionagents.ai/
7
+ Project-URL: Source, https://github.com/GetStream/Vision-Agents
8
+ License-Expression: MIT
9
+ Keywords: AI,TTS,agents,kokoro,text-to-speech,voice agents
10
+ Requires-Python: >=3.10
11
+ Requires-Dist: kokoro>=0.9.4
12
+ Requires-Dist: misaki[en]>=0.9.4
13
+ Requires-Dist: numpy<2.3,>=2.2.6
14
+ Requires-Dist: vision-agents
15
+ Description-Content-Type: text/markdown
16
+
17
+ # GetStream Kokoro Plugin
18
+
19
+ This package integrates the open-weight [Kokoro-82M TTS model](https://github.com/hexgrad/kokoro) with the GetStream audio/video SDK.
20
+
21
+ It provides a drop-in `KokoroTTS` class that implements the common `getstream_common.tts.TTS` interface, allowing you to stream PCM audio generated by Kokoro directly into a WebRTC `AudioStreamTrack`.
22
+
23
+ ```py
24
+ from getstream.plugins.kokoro import KokoroTTS
25
+ from getstream.video.rtc.audio_track import AudioStreamTrack
26
+
27
+ track = AudioStreamTrack(framerate=24_000)
28
+
29
+ tts = KokoroTTS(lang_code="a", voice="af_heart")
30
+ tts.set_output_track(track)
31
+
32
+ await tts.send("Hello from Kokoro!")
33
+ ```
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ pip install getstream-plugins-kokoro
39
+ ```
40
+
41
+ This will pull in the required `kokoro`, `numpy` and `getstream[webrtc"]` dependencies. You also need `espeak-ng` **at runtime** for pronunciation fallback. On macOS you can install it with Homebrew:
42
+
43
+ ```bash
44
+ brew install espeak-ng
45
+ ```
46
+
47
+ ## Configuration options
48
+
49
+ | Parameter | Default | Description |
50
+ |-----------|---------|-------------|
51
+ | `lang_code` | `"a"` | Language group passed to `KPipeline` (`"a"` = American English, etc.) |
52
+ | `voice` | `"af_heart"` | Kokoro voice preset. See the [model card](https://huggingface.co/NeuML/kokoro-int8-onnx#speaker-reference) for available options. |
53
+ | `speed` | `1.0` | Playback speed multiplier. |
54
+ | `sample_rate` | `24000` | Output sample-rate (fixed by Kokoro). **The attached `AudioStreamTrack` must use the same value.** |
README.md ADDED
@@ -0,0 +1,38 @@
1
+ # GetStream Kokoro Plugin
2
+
3
+ This package integrates the open-weight [Kokoro-82M TTS model](https://github.com/hexgrad/kokoro) with the GetStream audio/video SDK.
4
+
5
+ It provides a drop-in `KokoroTTS` class that implements the common `getstream_common.tts.TTS` interface, allowing you to stream PCM audio generated by Kokoro directly into a WebRTC `AudioStreamTrack`.
6
+
7
+ ```py
8
+ from getstream.plugins.kokoro import KokoroTTS
9
+ from getstream.video.rtc.audio_track import AudioStreamTrack
10
+
11
+ track = AudioStreamTrack(framerate=24_000)
12
+
13
+ tts = KokoroTTS(lang_code="a", voice="af_heart")
14
+ tts.set_output_track(track)
15
+
16
+ await tts.send("Hello from Kokoro!")
17
+ ```
18
+
19
+ ## Installation
20
+
21
+ ```bash
22
+ pip install getstream-plugins-kokoro
23
+ ```
24
+
25
+ This will pull in the required `kokoro`, `numpy` and `getstream[webrtc"]` dependencies. You also need `espeak-ng` **at runtime** for pronunciation fallback. On macOS you can install it with Homebrew:
26
+
27
+ ```bash
28
+ brew install espeak-ng
29
+ ```
30
+
31
+ ## Configuration options
32
+
33
+ | Parameter | Default | Description |
34
+ |-----------|---------|-------------|
35
+ | `lang_code` | `"a"` | Language group passed to `KPipeline` (`"a"` = American English, etc.) |
36
+ | `voice` | `"af_heart"` | Kokoro voice preset. See the [model card](https://huggingface.co/NeuML/kokoro-int8-onnx#speaker-reference) for available options. |
37
+ | `speed` | `1.0` | Playback speed multiplier. |
38
+ | `sample_rate` | `24000` | Output sample-rate (fixed by Kokoro). **The attached `AudioStreamTrack` must use the same value.** |
pyproject.toml ADDED
@@ -0,0 +1,42 @@
1
+ [build-system]
2
+ requires = ["hatchling", "hatch-vcs"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "vision-agents-plugins-kokoro"
7
+ dynamic = ["version"]
8
+ description = "Kokoro TTS integration for Vision Agents"
9
+ readme = "README.md"
10
+ keywords = ["kokoro", "TTS", "text-to-speech", "AI", "voice agents", "agents"]
11
+ requires-python = ">=3.10"
12
+ license = "MIT"
13
+ dependencies = [
14
+ "vision-agents",
15
+ "kokoro>=0.9.4",
16
+ "misaki[en]>=0.9.4",
17
+ "numpy>=2.2.6,<2.3",
18
+ ]
19
+
20
+ [project.urls]
21
+ Documentation = "https://visionagents.ai/"
22
+ Website = "https://visionagents.ai/"
23
+ Source = "https://github.com/GetStream/Vision-Agents"
24
+
25
+ [tool.hatch.version]
26
+ source = "vcs"
27
+ raw-options = { root = "..", search_parent_directories = true, fallback_version = "0.0.0" }
28
+
29
+ [tool.hatch.build.targets.wheel]
30
+ packages = ["."]
31
+
32
+ [tool.hatch.build.targets.sdist]
33
+ include = ["/vision_agents"]
34
+
35
+ [tool.uv.sources]
36
+ vision-agents = { workspace = true }
37
+
38
+ [dependency-groups]
39
+ dev = [
40
+ "pytest>=8.4.1",
41
+ "pytest-asyncio>=1.0.0",
42
+ ]
@@ -0,0 +1,6 @@
1
+ from .tts import TTS
2
+
3
+ # Re-export under the new namespace for convenience
4
+ __path__ = __import__("pkgutil").extend_path(__path__, __name__)
5
+
6
+ __all__ = ["TTS"]
@@ -0,0 +1,92 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import logging
5
+
6
+ import numpy as np
7
+ from typing import AsyncIterator, List, Optional
8
+
9
+ from vision_agents.core import tts
10
+ from getstream.video.rtc.audio_track import AudioStreamTrack
11
+
12
+ try:
13
+ from kokoro import KPipeline # type: ignore
14
+ except ModuleNotFoundError: # pragma: no cover – mocked during CI
15
+ KPipeline = None # type: ignore # noqa: N816
16
+
17
+
18
+ class TTS(tts.TTS):
19
+ """Text-to-Speech plugin backed by the Kokoro-82M model."""
20
+
21
+ def __init__(
22
+ self,
23
+ lang_code: str = "a", # American English
24
+ voice: str = "af_heart",
25
+ speed: float = 1.0,
26
+ sample_rate: int = 24_000,
27
+ device: Optional[str] = None,
28
+ client: Optional[KPipeline] = None,
29
+ ) -> None:
30
+ super().__init__()
31
+
32
+ if KPipeline is None:
33
+ raise ImportError(
34
+ "The 'kokoro' package is not installed. ``pip install kokoro`` first."
35
+ )
36
+
37
+ self._pipeline = (
38
+ KPipeline(lang_code=lang_code)
39
+ if device is None
40
+ else KPipeline(lang_code=lang_code, device=device)
41
+ )
42
+ self.voice = voice
43
+ self.speed = speed
44
+ self.sample_rate = sample_rate
45
+ self.client = client if client is not None else self._pipeline
46
+
47
+ def get_required_framerate(self) -> int:
48
+ """Get the required framerate for Kokoro TTS."""
49
+ return self.sample_rate
50
+
51
+ def get_required_stereo(self) -> bool:
52
+ """Get whether Kokoro TTS requires stereo audio."""
53
+ return False # Kokoro returns mono audio
54
+
55
+ def set_output_track(self, track: AudioStreamTrack) -> None: # noqa: D401
56
+ if track.framerate != self.sample_rate:
57
+ raise TypeError(
58
+ f"Invalid framerate {track.framerate}, Kokoro requires {self.sample_rate} Hz"
59
+ )
60
+ super().set_output_track(track)
61
+
62
+ async def stream_audio(self, text: str, *_, **__) -> AsyncIterator[bytes]: # noqa: D401
63
+ loop = asyncio.get_event_loop()
64
+ chunks: List[bytes] = await loop.run_in_executor(
65
+ None, lambda: list(self._generate_chunks(text))
66
+ )
67
+
68
+ async def _aiter():
69
+ for chunk in chunks:
70
+ yield chunk
71
+
72
+ return _aiter()
73
+
74
+ async def stop_audio(self) -> None:
75
+ """
76
+ Clears the queue and stops playing audio.
77
+
78
+ """
79
+ try:
80
+ await self.track.flush()
81
+ return
82
+ except Exception as e:
83
+ logging.error(f"Error flushing audio track: {e}")
84
+
85
+ def _generate_chunks(self, text: str):
86
+ for _gs, _ps, audio in self._pipeline(
87
+ text, voice=self.voice, speed=self.speed, split_pattern=r"\n+"
88
+ ):
89
+ if not isinstance(audio, np.ndarray):
90
+ audio = np.asarray(audio)
91
+ pcm16 = (np.clip(audio, -1.0, 1.0) * 32767.0).astype("<i2")
92
+ yield pcm16.tobytes()
@@ -0,0 +1,54 @@
1
+ Metadata-Version: 2.4
2
+ Name: vision-agents-plugins-kokoro
3
+ Version: 0.0.17
4
+ Summary: Kokoro TTS integration for Vision Agents
5
+ Project-URL: Documentation, https://visionagents.ai/
6
+ Project-URL: Website, https://visionagents.ai/
7
+ Project-URL: Source, https://github.com/GetStream/Vision-Agents
8
+ License-Expression: MIT
9
+ Keywords: AI,TTS,agents,kokoro,text-to-speech,voice agents
10
+ Requires-Python: >=3.10
11
+ Requires-Dist: kokoro>=0.9.4
12
+ Requires-Dist: misaki[en]>=0.9.4
13
+ Requires-Dist: numpy<2.3,>=2.2.6
14
+ Requires-Dist: vision-agents
15
+ Description-Content-Type: text/markdown
16
+
17
+ # GetStream Kokoro Plugin
18
+
19
+ This package integrates the open-weight [Kokoro-82M TTS model](https://github.com/hexgrad/kokoro) with the GetStream audio/video SDK.
20
+
21
+ It provides a drop-in `KokoroTTS` class that implements the common `getstream_common.tts.TTS` interface, allowing you to stream PCM audio generated by Kokoro directly into a WebRTC `AudioStreamTrack`.
22
+
23
+ ```py
24
+ from getstream.plugins.kokoro import KokoroTTS
25
+ from getstream.video.rtc.audio_track import AudioStreamTrack
26
+
27
+ track = AudioStreamTrack(framerate=24_000)
28
+
29
+ tts = KokoroTTS(lang_code="a", voice="af_heart")
30
+ tts.set_output_track(track)
31
+
32
+ await tts.send("Hello from Kokoro!")
33
+ ```
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ pip install getstream-plugins-kokoro
39
+ ```
40
+
41
+ This will pull in the required `kokoro`, `numpy` and `getstream[webrtc"]` dependencies. You also need `espeak-ng` **at runtime** for pronunciation fallback. On macOS you can install it with Homebrew:
42
+
43
+ ```bash
44
+ brew install espeak-ng
45
+ ```
46
+
47
+ ## Configuration options
48
+
49
+ | Parameter | Default | Description |
50
+ |-----------|---------|-------------|
51
+ | `lang_code` | `"a"` | Language group passed to `KPipeline` (`"a"` = American English, etc.) |
52
+ | `voice` | `"af_heart"` | Kokoro voice preset. See the [model card](https://huggingface.co/NeuML/kokoro-int8-onnx#speaker-reference) for available options. |
53
+ | `speed` | `1.0` | Playback speed multiplier. |
54
+ | `sample_rate` | `24000` | Output sample-rate (fixed by Kokoro). **The attached `AudioStreamTrack` must use the same value.** |
@@ -0,0 +1,9 @@
1
+ ./.gitignore,sha256=LiiMFm4RUXduFZI42AL85GrllvISRUwTt4t3lrUDGUE,408
2
+ ./PKG-INFO,sha256=bv5cvNbI6YdgGQ2VD1uBhrSDp1bkMItYE-yXvZYXKK8,2045
3
+ ./README.md,sha256=2olMbGoMYj07OyGlDj_AG2G42o61Tl-Ml4AMINXj4LM,1486
4
+ ./pyproject.toml,sha256=tKF8yoc19e21FLXZcomo17LEHGH929kFwX9EIXkRBQA,999
5
+ ./vision_agents/plugins/kokoro/__init__.py,sha256=hq4qcv6VHIM24dB_NZEhxogAKD04jX35ngeXhn2M-zQ,158
6
+ ./vision_agents/plugins/kokoro/tts.py,sha256=-XjGD0riPnXsCGpVWo9EO42B1a9mV3CmvUVPhqTnyKA,2921
7
+ vision_agents_plugins_kokoro-0.0.17.dist-info/METADATA,sha256=bv5cvNbI6YdgGQ2VD1uBhrSDp1bkMItYE-yXvZYXKK8,2045
8
+ vision_agents_plugins_kokoro-0.0.17.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
+ vision_agents_plugins_kokoro-0.0.17.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.27.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any