vision-agents-plugins-kokoro 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- .gitignore +90 -0
- PKG-INFO +54 -0
- README.md +38 -0
- pyproject.toml +42 -0
- vision_agents/plugins/kokoro/__init__.py +6 -0
- vision_agents/plugins/kokoro/tts.py +83 -0
- vision_agents_plugins_kokoro-0.2.1.dist-info/METADATA +54 -0
- vision_agents_plugins_kokoro-0.2.1.dist-info/RECORD +9 -0
- vision_agents_plugins_kokoro-0.2.1.dist-info/WHEEL +4 -0
.gitignore
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.cursor/*
|
|
7
|
+
# Distribution / packaging
|
|
8
|
+
.Python
|
|
9
|
+
build/
|
|
10
|
+
dist/
|
|
11
|
+
downloads/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
eggs/
|
|
14
|
+
.eggs/
|
|
15
|
+
lib64/
|
|
16
|
+
parts/
|
|
17
|
+
sdist/
|
|
18
|
+
var/
|
|
19
|
+
wheels/
|
|
20
|
+
share/python-wheels/
|
|
21
|
+
pip-wheel-metadata/
|
|
22
|
+
MANIFEST
|
|
23
|
+
*.egg-info/
|
|
24
|
+
*.egg
|
|
25
|
+
|
|
26
|
+
# Installer logs
|
|
27
|
+
pip-log.txt
|
|
28
|
+
pip-delete-this-directory.txt
|
|
29
|
+
|
|
30
|
+
# Unit test / coverage reports
|
|
31
|
+
htmlcov/
|
|
32
|
+
.tox/
|
|
33
|
+
.nox/
|
|
34
|
+
.coverage
|
|
35
|
+
.coverage.*
|
|
36
|
+
.cache
|
|
37
|
+
coverage.xml
|
|
38
|
+
nosetests.xml
|
|
39
|
+
*.cover
|
|
40
|
+
*.py,cover
|
|
41
|
+
.hypothesis/
|
|
42
|
+
.pytest_cache/
|
|
43
|
+
|
|
44
|
+
# Type checker / lint caches
|
|
45
|
+
.mypy_cache/
|
|
46
|
+
.dmypy.json
|
|
47
|
+
dmypy.json
|
|
48
|
+
.pytype/
|
|
49
|
+
.pyre/
|
|
50
|
+
.ruff_cache/
|
|
51
|
+
|
|
52
|
+
# Environments
|
|
53
|
+
.venv
|
|
54
|
+
env/
|
|
55
|
+
venv/
|
|
56
|
+
ENV/
|
|
57
|
+
env.bak/
|
|
58
|
+
venv.bak/
|
|
59
|
+
.env
|
|
60
|
+
.env.local
|
|
61
|
+
.env.*.local
|
|
62
|
+
.env.bak
|
|
63
|
+
pyvenv.cfg
|
|
64
|
+
.python-version
|
|
65
|
+
|
|
66
|
+
# Editors / IDEs
|
|
67
|
+
.vscode/
|
|
68
|
+
.idea/
|
|
69
|
+
|
|
70
|
+
# Jupyter Notebook
|
|
71
|
+
.ipynb_checkpoints/
|
|
72
|
+
|
|
73
|
+
# OS / Misc
|
|
74
|
+
.DS_Store
|
|
75
|
+
*.log
|
|
76
|
+
|
|
77
|
+
# Tooling & repo-specific
|
|
78
|
+
pyrightconfig.json
|
|
79
|
+
shell.nix
|
|
80
|
+
bin/*
|
|
81
|
+
lib/*
|
|
82
|
+
stream-py/
|
|
83
|
+
|
|
84
|
+
# Artifacts / assets
|
|
85
|
+
*.pt
|
|
86
|
+
*.kef
|
|
87
|
+
*.onnx
|
|
88
|
+
profile.html
|
|
89
|
+
|
|
90
|
+
/opencode.json
|
PKG-INFO
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: vision-agents-plugins-kokoro
|
|
3
|
+
Version: 0.2.1
|
|
4
|
+
Summary: Kokoro TTS integration for Vision Agents
|
|
5
|
+
Project-URL: Documentation, https://visionagents.ai/
|
|
6
|
+
Project-URL: Website, https://visionagents.ai/
|
|
7
|
+
Project-URL: Source, https://github.com/GetStream/Vision-Agents
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
Keywords: AI,TTS,agents,kokoro,text-to-speech,voice agents
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Requires-Dist: kokoro>=0.9.4
|
|
12
|
+
Requires-Dist: misaki[en]>=0.9.4
|
|
13
|
+
Requires-Dist: numpy<2.3,>=2.2.6
|
|
14
|
+
Requires-Dist: vision-agents
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
|
|
17
|
+
# GetStream Kokoro Plugin
|
|
18
|
+
|
|
19
|
+
This package integrates the open-weight [Kokoro-82M TTS model](https://github.com/hexgrad/kokoro) with the GetStream audio/video SDK.
|
|
20
|
+
|
|
21
|
+
It provides a drop-in `KokoroTTS` class that implements the common `getstream_common.tts.TTS` interface, allowing you to stream PCM audio generated by Kokoro directly into a WebRTC `AudioStreamTrack`.
|
|
22
|
+
|
|
23
|
+
```py
|
|
24
|
+
from getstream.plugins.kokoro import KokoroTTS
|
|
25
|
+
from getstream.video.rtc.audio_track import AudioStreamTrack
|
|
26
|
+
|
|
27
|
+
track = AudioStreamTrack(framerate=24_000)
|
|
28
|
+
|
|
29
|
+
tts = KokoroTTS(lang_code="a", voice="af_heart")
|
|
30
|
+
tts.set_output_track(track)
|
|
31
|
+
|
|
32
|
+
await tts.send("Hello from Kokoro!")
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Installation
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install getstream-plugins-kokoro
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
This will pull in the required `kokoro`, `numpy` and `getstream[webrtc"]` dependencies. You also need `espeak-ng` **at runtime** for pronunciation fallback. On macOS you can install it with Homebrew:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
brew install espeak-ng
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Configuration options
|
|
48
|
+
|
|
49
|
+
| Parameter | Default | Description |
|
|
50
|
+
|-----------|---------|-------------|
|
|
51
|
+
| `lang_code` | `"a"` | Language group passed to `KPipeline` (`"a"` = American English, etc.) |
|
|
52
|
+
| `voice` | `"af_heart"` | Kokoro voice preset. See the [model card](https://huggingface.co/NeuML/kokoro-int8-onnx#speaker-reference) for available options. |
|
|
53
|
+
| `speed` | `1.0` | Playback speed multiplier. |
|
|
54
|
+
| `sample_rate` | `24000` | Output sample-rate (fixed by Kokoro). **The attached `AudioStreamTrack` must use the same value.** |
|
README.md
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# GetStream Kokoro Plugin
|
|
2
|
+
|
|
3
|
+
This package integrates the open-weight [Kokoro-82M TTS model](https://github.com/hexgrad/kokoro) with the GetStream audio/video SDK.
|
|
4
|
+
|
|
5
|
+
It provides a drop-in `KokoroTTS` class that implements the common `getstream_common.tts.TTS` interface, allowing you to stream PCM audio generated by Kokoro directly into a WebRTC `AudioStreamTrack`.
|
|
6
|
+
|
|
7
|
+
```py
|
|
8
|
+
from getstream.plugins.kokoro import KokoroTTS
|
|
9
|
+
from getstream.video.rtc.audio_track import AudioStreamTrack
|
|
10
|
+
|
|
11
|
+
track = AudioStreamTrack(framerate=24_000)
|
|
12
|
+
|
|
13
|
+
tts = KokoroTTS(lang_code="a", voice="af_heart")
|
|
14
|
+
tts.set_output_track(track)
|
|
15
|
+
|
|
16
|
+
await tts.send("Hello from Kokoro!")
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Installation
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
pip install getstream-plugins-kokoro
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
This will pull in the required `kokoro`, `numpy` and `getstream[webrtc"]` dependencies. You also need `espeak-ng` **at runtime** for pronunciation fallback. On macOS you can install it with Homebrew:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
brew install espeak-ng
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Configuration options
|
|
32
|
+
|
|
33
|
+
| Parameter | Default | Description |
|
|
34
|
+
|-----------|---------|-------------|
|
|
35
|
+
| `lang_code` | `"a"` | Language group passed to `KPipeline` (`"a"` = American English, etc.) |
|
|
36
|
+
| `voice` | `"af_heart"` | Kokoro voice preset. See the [model card](https://huggingface.co/NeuML/kokoro-int8-onnx#speaker-reference) for available options. |
|
|
37
|
+
| `speed` | `1.0` | Playback speed multiplier. |
|
|
38
|
+
| `sample_rate` | `24000` | Output sample-rate (fixed by Kokoro). **The attached `AudioStreamTrack` must use the same value.** |
|
pyproject.toml
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling", "hatch-vcs"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "vision-agents-plugins-kokoro"
|
|
7
|
+
dynamic = ["version"]
|
|
8
|
+
description = "Kokoro TTS integration for Vision Agents"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
keywords = ["kokoro", "TTS", "text-to-speech", "AI", "voice agents", "agents"]
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
license = "MIT"
|
|
13
|
+
dependencies = [
|
|
14
|
+
"vision-agents",
|
|
15
|
+
"kokoro>=0.9.4",
|
|
16
|
+
"misaki[en]>=0.9.4",
|
|
17
|
+
"numpy>=2.2.6,<2.3",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
[project.urls]
|
|
21
|
+
Documentation = "https://visionagents.ai/"
|
|
22
|
+
Website = "https://visionagents.ai/"
|
|
23
|
+
Source = "https://github.com/GetStream/Vision-Agents"
|
|
24
|
+
|
|
25
|
+
[tool.hatch.version]
|
|
26
|
+
source = "vcs"
|
|
27
|
+
raw-options = { root = "..", search_parent_directories = true, fallback_version = "0.0.0" }
|
|
28
|
+
|
|
29
|
+
[tool.hatch.build.targets.wheel]
|
|
30
|
+
packages = ["."]
|
|
31
|
+
|
|
32
|
+
[tool.hatch.build.targets.sdist]
|
|
33
|
+
include = ["/vision_agents"]
|
|
34
|
+
|
|
35
|
+
[tool.uv.sources]
|
|
36
|
+
vision-agents = { workspace = true }
|
|
37
|
+
|
|
38
|
+
[dependency-groups]
|
|
39
|
+
dev = [
|
|
40
|
+
"pytest>=8.4.1",
|
|
41
|
+
"pytest-asyncio>=1.0.0",
|
|
42
|
+
]
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import logging
|
|
5
|
+
from typing import AsyncIterator, Iterator, List, Optional
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
from vision_agents.core import tts
|
|
10
|
+
from getstream.video.rtc.track_util import PcmData, AudioFormat
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
from kokoro import KPipeline # type: ignore
|
|
14
|
+
except ModuleNotFoundError: # pragma: no cover – mocked during CI
|
|
15
|
+
KPipeline = None # type: ignore # noqa: N816
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class TTS(tts.TTS):
|
|
22
|
+
"""Text-to-Speech plugin backed by the Kokoro-82M model."""
|
|
23
|
+
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
lang_code: str = "a", # American English
|
|
27
|
+
voice: str = "af_heart",
|
|
28
|
+
speed: float = 1.0,
|
|
29
|
+
sample_rate: int = 24_000,
|
|
30
|
+
device: Optional[str] = None,
|
|
31
|
+
client: Optional[KPipeline] = None,
|
|
32
|
+
) -> None:
|
|
33
|
+
super().__init__()
|
|
34
|
+
|
|
35
|
+
if KPipeline is None:
|
|
36
|
+
raise ImportError(
|
|
37
|
+
"The 'kokoro' package is not installed. ``pip install kokoro`` first."
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
self._pipeline = (
|
|
41
|
+
KPipeline(lang_code=lang_code)
|
|
42
|
+
if device is None
|
|
43
|
+
else KPipeline(lang_code=lang_code, device=device)
|
|
44
|
+
)
|
|
45
|
+
self.voice = voice
|
|
46
|
+
self.speed = speed
|
|
47
|
+
self.sample_rate = sample_rate
|
|
48
|
+
self.client = client if client is not None else self._pipeline
|
|
49
|
+
|
|
50
|
+
async def stream_audio(
|
|
51
|
+
self, text: str, *_, **__
|
|
52
|
+
) -> PcmData | Iterator[PcmData] | AsyncIterator[PcmData]: # noqa: D401
|
|
53
|
+
loop = asyncio.get_event_loop()
|
|
54
|
+
chunks: List[bytes] = await loop.run_in_executor(
|
|
55
|
+
None, lambda: list(self._generate_chunks(text))
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
async def _aiter():
|
|
59
|
+
for chunk in chunks:
|
|
60
|
+
yield PcmData.from_bytes(
|
|
61
|
+
chunk,
|
|
62
|
+
sample_rate=self.sample_rate,
|
|
63
|
+
channels=1,
|
|
64
|
+
format=AudioFormat.S16,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
return _aiter()
|
|
68
|
+
|
|
69
|
+
async def stop_audio(self) -> None:
|
|
70
|
+
"""
|
|
71
|
+
Clears the queue and stops playing audio.
|
|
72
|
+
|
|
73
|
+
"""
|
|
74
|
+
logger.info("🎤 Kokoro TTS stop requested (no-op)")
|
|
75
|
+
|
|
76
|
+
def _generate_chunks(self, text: str):
|
|
77
|
+
for _gs, _ps, audio in self._pipeline(
|
|
78
|
+
text, voice=self.voice, speed=self.speed, split_pattern=r"\n+"
|
|
79
|
+
):
|
|
80
|
+
if not isinstance(audio, np.ndarray):
|
|
81
|
+
audio = np.asarray(audio)
|
|
82
|
+
pcm16 = (np.clip(audio, -1.0, 1.0) * 32767.0).astype("<i2")
|
|
83
|
+
yield pcm16.tobytes()
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: vision-agents-plugins-kokoro
|
|
3
|
+
Version: 0.2.1
|
|
4
|
+
Summary: Kokoro TTS integration for Vision Agents
|
|
5
|
+
Project-URL: Documentation, https://visionagents.ai/
|
|
6
|
+
Project-URL: Website, https://visionagents.ai/
|
|
7
|
+
Project-URL: Source, https://github.com/GetStream/Vision-Agents
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
Keywords: AI,TTS,agents,kokoro,text-to-speech,voice agents
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Requires-Dist: kokoro>=0.9.4
|
|
12
|
+
Requires-Dist: misaki[en]>=0.9.4
|
|
13
|
+
Requires-Dist: numpy<2.3,>=2.2.6
|
|
14
|
+
Requires-Dist: vision-agents
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
|
|
17
|
+
# GetStream Kokoro Plugin
|
|
18
|
+
|
|
19
|
+
This package integrates the open-weight [Kokoro-82M TTS model](https://github.com/hexgrad/kokoro) with the GetStream audio/video SDK.
|
|
20
|
+
|
|
21
|
+
It provides a drop-in `KokoroTTS` class that implements the common `getstream_common.tts.TTS` interface, allowing you to stream PCM audio generated by Kokoro directly into a WebRTC `AudioStreamTrack`.
|
|
22
|
+
|
|
23
|
+
```py
|
|
24
|
+
from getstream.plugins.kokoro import KokoroTTS
|
|
25
|
+
from getstream.video.rtc.audio_track import AudioStreamTrack
|
|
26
|
+
|
|
27
|
+
track = AudioStreamTrack(framerate=24_000)
|
|
28
|
+
|
|
29
|
+
tts = KokoroTTS(lang_code="a", voice="af_heart")
|
|
30
|
+
tts.set_output_track(track)
|
|
31
|
+
|
|
32
|
+
await tts.send("Hello from Kokoro!")
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Installation
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install getstream-plugins-kokoro
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
This will pull in the required `kokoro`, `numpy` and `getstream[webrtc"]` dependencies. You also need `espeak-ng` **at runtime** for pronunciation fallback. On macOS you can install it with Homebrew:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
brew install espeak-ng
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Configuration options
|
|
48
|
+
|
|
49
|
+
| Parameter | Default | Description |
|
|
50
|
+
|-----------|---------|-------------|
|
|
51
|
+
| `lang_code` | `"a"` | Language group passed to `KPipeline` (`"a"` = American English, etc.) |
|
|
52
|
+
| `voice` | `"af_heart"` | Kokoro voice preset. See the [model card](https://huggingface.co/NeuML/kokoro-int8-onnx#speaker-reference) for available options. |
|
|
53
|
+
| `speed` | `1.0` | Playback speed multiplier. |
|
|
54
|
+
| `sample_rate` | `24000` | Output sample-rate (fixed by Kokoro). **The attached `AudioStreamTrack` must use the same value.** |
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
./.gitignore,sha256=zrSq4X-Qh8j7QY0ukXt-RXj6StdhdbJdR3e8HoHbTTg,961
|
|
2
|
+
./PKG-INFO,sha256=SLNoXAoj0EuwjnWGVGSharZYR6KpKnknjjgW2Pg007k,2044
|
|
3
|
+
./README.md,sha256=2olMbGoMYj07OyGlDj_AG2G42o61Tl-Ml4AMINXj4LM,1486
|
|
4
|
+
./pyproject.toml,sha256=tKF8yoc19e21FLXZcomo17LEHGH929kFwX9EIXkRBQA,999
|
|
5
|
+
./vision_agents/plugins/kokoro/__init__.py,sha256=hq4qcv6VHIM24dB_NZEhxogAKD04jX35ngeXhn2M-zQ,158
|
|
6
|
+
./vision_agents/plugins/kokoro/tts.py,sha256=_vCCRgMOnYj5IlibxfOnvZf_3wYJcdTurswRtdSGRhs,2510
|
|
7
|
+
vision_agents_plugins_kokoro-0.2.1.dist-info/METADATA,sha256=SLNoXAoj0EuwjnWGVGSharZYR6KpKnknjjgW2Pg007k,2044
|
|
8
|
+
vision_agents_plugins_kokoro-0.2.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
9
|
+
vision_agents_plugins_kokoro-0.2.1.dist-info/RECORD,,
|