vision-agents-plugins-kokoro 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
.gitignore ADDED
@@ -0,0 +1,90 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .cursor/*
7
+ # Distribution / packaging
8
+ .Python
9
+ build/
10
+ dist/
11
+ downloads/
12
+ develop-eggs/
13
+ eggs/
14
+ .eggs/
15
+ lib64/
16
+ parts/
17
+ sdist/
18
+ var/
19
+ wheels/
20
+ share/python-wheels/
21
+ pip-wheel-metadata/
22
+ MANIFEST
23
+ *.egg-info/
24
+ *.egg
25
+
26
+ # Installer logs
27
+ pip-log.txt
28
+ pip-delete-this-directory.txt
29
+
30
+ # Unit test / coverage reports
31
+ htmlcov/
32
+ .tox/
33
+ .nox/
34
+ .coverage
35
+ .coverage.*
36
+ .cache
37
+ coverage.xml
38
+ nosetests.xml
39
+ *.cover
40
+ *.py,cover
41
+ .hypothesis/
42
+ .pytest_cache/
43
+
44
+ # Type checker / lint caches
45
+ .mypy_cache/
46
+ .dmypy.json
47
+ dmypy.json
48
+ .pytype/
49
+ .pyre/
50
+ .ruff_cache/
51
+
52
+ # Environments
53
+ .venv
54
+ env/
55
+ venv/
56
+ ENV/
57
+ env.bak/
58
+ venv.bak/
59
+ .env
60
+ .env.local
61
+ .env.*.local
62
+ .env.bak
63
+ pyvenv.cfg
64
+ .python-version
65
+
66
+ # Editors / IDEs
67
+ .vscode/
68
+ .idea/
69
+
70
+ # Jupyter Notebook
71
+ .ipynb_checkpoints/
72
+
73
+ # OS / Misc
74
+ .DS_Store
75
+ *.log
76
+
77
+ # Tooling & repo-specific
78
+ pyrightconfig.json
79
+ shell.nix
80
+ bin/*
81
+ lib/*
82
+ stream-py/
83
+
84
+ # Artifacts / assets
85
+ *.pt
86
+ *.kef
87
+ *.onnx
88
+ profile.html
89
+
90
+ /opencode.json
PKG-INFO ADDED
@@ -0,0 +1,54 @@
1
+ Metadata-Version: 2.4
2
+ Name: vision-agents-plugins-kokoro
3
+ Version: 0.2.1
4
+ Summary: Kokoro TTS integration for Vision Agents
5
+ Project-URL: Documentation, https://visionagents.ai/
6
+ Project-URL: Website, https://visionagents.ai/
7
+ Project-URL: Source, https://github.com/GetStream/Vision-Agents
8
+ License-Expression: MIT
9
+ Keywords: AI,TTS,agents,kokoro,text-to-speech,voice agents
10
+ Requires-Python: >=3.10
11
+ Requires-Dist: kokoro>=0.9.4
12
+ Requires-Dist: misaki[en]>=0.9.4
13
+ Requires-Dist: numpy<2.3,>=2.2.6
14
+ Requires-Dist: vision-agents
15
+ Description-Content-Type: text/markdown
16
+
17
+ # GetStream Kokoro Plugin
18
+
19
+ This package integrates the open-weight [Kokoro-82M TTS model](https://github.com/hexgrad/kokoro) with the GetStream audio/video SDK.
20
+
21
+ It provides a drop-in `KokoroTTS` class that implements the common `getstream_common.tts.TTS` interface, allowing you to stream PCM audio generated by Kokoro directly into a WebRTC `AudioStreamTrack`.
22
+
23
+ ```py
24
+ from getstream.plugins.kokoro import KokoroTTS
25
+ from getstream.video.rtc.audio_track import AudioStreamTrack
26
+
27
+ track = AudioStreamTrack(framerate=24_000)
28
+
29
+ tts = KokoroTTS(lang_code="a", voice="af_heart")
30
+ tts.set_output_track(track)
31
+
32
+ await tts.send("Hello from Kokoro!")
33
+ ```
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ pip install getstream-plugins-kokoro
39
+ ```
40
+
41
+ This will pull in the required `kokoro`, `numpy` and `getstream[webrtc"]` dependencies. You also need `espeak-ng` **at runtime** for pronunciation fallback. On macOS you can install it with Homebrew:
42
+
43
+ ```bash
44
+ brew install espeak-ng
45
+ ```
46
+
47
+ ## Configuration options
48
+
49
+ | Parameter | Default | Description |
50
+ |-----------|---------|-------------|
51
+ | `lang_code` | `"a"` | Language group passed to `KPipeline` (`"a"` = American English, etc.) |
52
+ | `voice` | `"af_heart"` | Kokoro voice preset. See the [model card](https://huggingface.co/NeuML/kokoro-int8-onnx#speaker-reference) for available options. |
53
+ | `speed` | `1.0` | Playback speed multiplier. |
54
+ | `sample_rate` | `24000` | Output sample-rate (fixed by Kokoro). **The attached `AudioStreamTrack` must use the same value.** |
README.md ADDED
@@ -0,0 +1,38 @@
1
+ # GetStream Kokoro Plugin
2
+
3
+ This package integrates the open-weight [Kokoro-82M TTS model](https://github.com/hexgrad/kokoro) with the GetStream audio/video SDK.
4
+
5
+ It provides a drop-in `KokoroTTS` class that implements the common `getstream_common.tts.TTS` interface, allowing you to stream PCM audio generated by Kokoro directly into a WebRTC `AudioStreamTrack`.
6
+
7
+ ```py
8
+ from getstream.plugins.kokoro import KokoroTTS
9
+ from getstream.video.rtc.audio_track import AudioStreamTrack
10
+
11
+ track = AudioStreamTrack(framerate=24_000)
12
+
13
+ tts = KokoroTTS(lang_code="a", voice="af_heart")
14
+ tts.set_output_track(track)
15
+
16
+ await tts.send("Hello from Kokoro!")
17
+ ```
18
+
19
+ ## Installation
20
+
21
+ ```bash
22
+ pip install getstream-plugins-kokoro
23
+ ```
24
+
25
+ This will pull in the required `kokoro`, `numpy` and `getstream[webrtc"]` dependencies. You also need `espeak-ng` **at runtime** for pronunciation fallback. On macOS you can install it with Homebrew:
26
+
27
+ ```bash
28
+ brew install espeak-ng
29
+ ```
30
+
31
+ ## Configuration options
32
+
33
+ | Parameter | Default | Description |
34
+ |-----------|---------|-------------|
35
+ | `lang_code` | `"a"` | Language group passed to `KPipeline` (`"a"` = American English, etc.) |
36
+ | `voice` | `"af_heart"` | Kokoro voice preset. See the [model card](https://huggingface.co/NeuML/kokoro-int8-onnx#speaker-reference) for available options. |
37
+ | `speed` | `1.0` | Playback speed multiplier. |
38
+ | `sample_rate` | `24000` | Output sample-rate (fixed by Kokoro). **The attached `AudioStreamTrack` must use the same value.** |
pyproject.toml ADDED
@@ -0,0 +1,42 @@
1
+ [build-system]
2
+ requires = ["hatchling", "hatch-vcs"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "vision-agents-plugins-kokoro"
7
+ dynamic = ["version"]
8
+ description = "Kokoro TTS integration for Vision Agents"
9
+ readme = "README.md"
10
+ keywords = ["kokoro", "TTS", "text-to-speech", "AI", "voice agents", "agents"]
11
+ requires-python = ">=3.10"
12
+ license = "MIT"
13
+ dependencies = [
14
+ "vision-agents",
15
+ "kokoro>=0.9.4",
16
+ "misaki[en]>=0.9.4",
17
+ "numpy>=2.2.6,<2.3",
18
+ ]
19
+
20
+ [project.urls]
21
+ Documentation = "https://visionagents.ai/"
22
+ Website = "https://visionagents.ai/"
23
+ Source = "https://github.com/GetStream/Vision-Agents"
24
+
25
+ [tool.hatch.version]
26
+ source = "vcs"
27
+ raw-options = { root = "..", search_parent_directories = true, fallback_version = "0.0.0" }
28
+
29
+ [tool.hatch.build.targets.wheel]
30
+ packages = ["."]
31
+
32
+ [tool.hatch.build.targets.sdist]
33
+ include = ["/vision_agents"]
34
+
35
+ [tool.uv.sources]
36
+ vision-agents = { workspace = true }
37
+
38
+ [dependency-groups]
39
+ dev = [
40
+ "pytest>=8.4.1",
41
+ "pytest-asyncio>=1.0.0",
42
+ ]
@@ -0,0 +1,6 @@
1
+ from .tts import TTS
2
+
3
+ # Re-export under the new namespace for convenience
4
+ __path__ = __import__("pkgutil").extend_path(__path__, __name__)
5
+
6
+ __all__ = ["TTS"]
@@ -0,0 +1,83 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import logging
5
+ from typing import AsyncIterator, Iterator, List, Optional
6
+
7
+ import numpy as np
8
+
9
+ from vision_agents.core import tts
10
+ from getstream.video.rtc.track_util import PcmData, AudioFormat
11
+
12
+ try:
13
+ from kokoro import KPipeline # type: ignore
14
+ except ModuleNotFoundError: # pragma: no cover – mocked during CI
15
+ KPipeline = None # type: ignore # noqa: N816
16
+
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class TTS(tts.TTS):
22
+ """Text-to-Speech plugin backed by the Kokoro-82M model."""
23
+
24
+ def __init__(
25
+ self,
26
+ lang_code: str = "a", # American English
27
+ voice: str = "af_heart",
28
+ speed: float = 1.0,
29
+ sample_rate: int = 24_000,
30
+ device: Optional[str] = None,
31
+ client: Optional[KPipeline] = None,
32
+ ) -> None:
33
+ super().__init__()
34
+
35
+ if KPipeline is None:
36
+ raise ImportError(
37
+ "The 'kokoro' package is not installed. ``pip install kokoro`` first."
38
+ )
39
+
40
+ self._pipeline = (
41
+ KPipeline(lang_code=lang_code)
42
+ if device is None
43
+ else KPipeline(lang_code=lang_code, device=device)
44
+ )
45
+ self.voice = voice
46
+ self.speed = speed
47
+ self.sample_rate = sample_rate
48
+ self.client = client if client is not None else self._pipeline
49
+
50
+ async def stream_audio(
51
+ self, text: str, *_, **__
52
+ ) -> PcmData | Iterator[PcmData] | AsyncIterator[PcmData]: # noqa: D401
53
+ loop = asyncio.get_event_loop()
54
+ chunks: List[bytes] = await loop.run_in_executor(
55
+ None, lambda: list(self._generate_chunks(text))
56
+ )
57
+
58
+ async def _aiter():
59
+ for chunk in chunks:
60
+ yield PcmData.from_bytes(
61
+ chunk,
62
+ sample_rate=self.sample_rate,
63
+ channels=1,
64
+ format=AudioFormat.S16,
65
+ )
66
+
67
+ return _aiter()
68
+
69
+ async def stop_audio(self) -> None:
70
+ """
71
+ Clears the queue and stops playing audio.
72
+
73
+ """
74
+ logger.info("🎤 Kokoro TTS stop requested (no-op)")
75
+
76
+ def _generate_chunks(self, text: str):
77
+ for _gs, _ps, audio in self._pipeline(
78
+ text, voice=self.voice, speed=self.speed, split_pattern=r"\n+"
79
+ ):
80
+ if not isinstance(audio, np.ndarray):
81
+ audio = np.asarray(audio)
82
+ pcm16 = (np.clip(audio, -1.0, 1.0) * 32767.0).astype("<i2")
83
+ yield pcm16.tobytes()
@@ -0,0 +1,54 @@
1
+ Metadata-Version: 2.4
2
+ Name: vision-agents-plugins-kokoro
3
+ Version: 0.2.1
4
+ Summary: Kokoro TTS integration for Vision Agents
5
+ Project-URL: Documentation, https://visionagents.ai/
6
+ Project-URL: Website, https://visionagents.ai/
7
+ Project-URL: Source, https://github.com/GetStream/Vision-Agents
8
+ License-Expression: MIT
9
+ Keywords: AI,TTS,agents,kokoro,text-to-speech,voice agents
10
+ Requires-Python: >=3.10
11
+ Requires-Dist: kokoro>=0.9.4
12
+ Requires-Dist: misaki[en]>=0.9.4
13
+ Requires-Dist: numpy<2.3,>=2.2.6
14
+ Requires-Dist: vision-agents
15
+ Description-Content-Type: text/markdown
16
+
17
+ # GetStream Kokoro Plugin
18
+
19
+ This package integrates the open-weight [Kokoro-82M TTS model](https://github.com/hexgrad/kokoro) with the GetStream audio/video SDK.
20
+
21
+ It provides a drop-in `KokoroTTS` class that implements the common `getstream_common.tts.TTS` interface, allowing you to stream PCM audio generated by Kokoro directly into a WebRTC `AudioStreamTrack`.
22
+
23
+ ```py
24
+ from getstream.plugins.kokoro import KokoroTTS
25
+ from getstream.video.rtc.audio_track import AudioStreamTrack
26
+
27
+ track = AudioStreamTrack(framerate=24_000)
28
+
29
+ tts = KokoroTTS(lang_code="a", voice="af_heart")
30
+ tts.set_output_track(track)
31
+
32
+ await tts.send("Hello from Kokoro!")
33
+ ```
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ pip install getstream-plugins-kokoro
39
+ ```
40
+
41
+ This will pull in the required `kokoro`, `numpy` and `getstream[webrtc"]` dependencies. You also need `espeak-ng` **at runtime** for pronunciation fallback. On macOS you can install it with Homebrew:
42
+
43
+ ```bash
44
+ brew install espeak-ng
45
+ ```
46
+
47
+ ## Configuration options
48
+
49
+ | Parameter | Default | Description |
50
+ |-----------|---------|-------------|
51
+ | `lang_code` | `"a"` | Language group passed to `KPipeline` (`"a"` = American English, etc.) |
52
+ | `voice` | `"af_heart"` | Kokoro voice preset. See the [model card](https://huggingface.co/NeuML/kokoro-int8-onnx#speaker-reference) for available options. |
53
+ | `speed` | `1.0` | Playback speed multiplier. |
54
+ | `sample_rate` | `24000` | Output sample-rate (fixed by Kokoro). **The attached `AudioStreamTrack` must use the same value.** |
@@ -0,0 +1,9 @@
1
+ ./.gitignore,sha256=zrSq4X-Qh8j7QY0ukXt-RXj6StdhdbJdR3e8HoHbTTg,961
2
+ ./PKG-INFO,sha256=SLNoXAoj0EuwjnWGVGSharZYR6KpKnknjjgW2Pg007k,2044
3
+ ./README.md,sha256=2olMbGoMYj07OyGlDj_AG2G42o61Tl-Ml4AMINXj4LM,1486
4
+ ./pyproject.toml,sha256=tKF8yoc19e21FLXZcomo17LEHGH929kFwX9EIXkRBQA,999
5
+ ./vision_agents/plugins/kokoro/__init__.py,sha256=hq4qcv6VHIM24dB_NZEhxogAKD04jX35ngeXhn2M-zQ,158
6
+ ./vision_agents/plugins/kokoro/tts.py,sha256=_vCCRgMOnYj5IlibxfOnvZf_3wYJcdTurswRtdSGRhs,2510
7
+ vision_agents_plugins_kokoro-0.2.1.dist-info/METADATA,sha256=SLNoXAoj0EuwjnWGVGSharZYR6KpKnknjjgW2Pg007k,2044
8
+ vision_agents_plugins_kokoro-0.2.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
+ vision_agents_plugins_kokoro-0.2.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.27.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any