PyPI - vision-agents-plugins-kokoro - Versions diffs - 0.2.1__py3-none-any.whl - Mend

vision-agents-plugins-kokoro 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

.gitignore +90 -0
PKG-INFO +54 -0
README.md +38 -0
pyproject.toml +42 -0
vision_agents/plugins/kokoro/__init__.py +6 -0
vision_agents/plugins/kokoro/tts.py +83 -0
vision_agents_plugins_kokoro-0.2.1.dist-info/METADATA +54 -0
vision_agents_plugins_kokoro-0.2.1.dist-info/RECORD +9 -0
vision_agents_plugins_kokoro-0.2.1.dist-info/WHEEL +4 -0

.gitignore ADDED Viewed

@@ -0,0 +1,90 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.cursor/*
+# Distribution / packaging
+.Python
+build/
+dist/
+downloads/
+develop-eggs/
+eggs/
+.eggs/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+pip-wheel-metadata/
+MANIFEST
+*.egg-info/
+*.egg
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+coverage.xml
+nosetests.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Type checker / lint caches
+.mypy_cache/
+.dmypy.json
+dmypy.json
+.pytype/
+.pyre/
+.ruff_cache/
+# Environments
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+.env
+.env.local
+.env.*.local
+.env.bak
+pyvenv.cfg
+.python-version
+# Editors / IDEs
+.vscode/
+.idea/
+# Jupyter Notebook
+.ipynb_checkpoints/
+# OS / Misc
+.DS_Store
+*.log
+# Tooling & repo-specific
+pyrightconfig.json
+shell.nix
+bin/*
+lib/*
+stream-py/
+# Artifacts / assets
+*.pt
+*.kef
+*.onnx
+profile.html
+/opencode.json

PKG-INFO ADDED Viewed

@@ -0,0 +1,54 @@
+Metadata-Version: 2.4
+Name: vision-agents-plugins-kokoro
+Version: 0.2.1
+Summary: Kokoro TTS integration for Vision Agents
+Project-URL: Documentation, https://visionagents.ai/
+Project-URL: Website, https://visionagents.ai/
+Project-URL: Source, https://github.com/GetStream/Vision-Agents
+License-Expression: MIT
+Keywords: AI,TTS,agents,kokoro,text-to-speech,voice agents
+Requires-Python: >=3.10
+Requires-Dist: kokoro>=0.9.4
+Requires-Dist: misaki[en]>=0.9.4
+Requires-Dist: numpy<2.3,>=2.2.6
+Requires-Dist: vision-agents
+Description-Content-Type: text/markdown
+# GetStream Kokoro Plugin
+This package integrates the open-weight [Kokoro-82M TTS model](https://github.com/hexgrad/kokoro) with the GetStream audio/video SDK.
+It provides a drop-in `KokoroTTS` class that implements the common `getstream_common.tts.TTS` interface, allowing you to stream PCM audio generated by Kokoro directly into a WebRTC `AudioStreamTrack`.
+```py
+from getstream.plugins.kokoro import KokoroTTS
+from getstream.video.rtc.audio_track import AudioStreamTrack
+track = AudioStreamTrack(framerate=24_000)
+tts = KokoroTTS(lang_code="a", voice="af_heart")
+tts.set_output_track(track)
+await tts.send("Hello from Kokoro!")
+```
+## Installation
+```bash
+pip install getstream-plugins-kokoro
+```
+This will pull in the required `kokoro`, `numpy` and `getstream[webrtc"]` dependencies.  You also need `espeak-ng` **at runtime** for pronunciation fallback.  On macOS you can install it with Homebrew:
+```bash
+brew install espeak-ng
+```
+## Configuration options
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| `lang_code` | `"a"` | Language group passed to `KPipeline` (`"a"` = American English, etc.) |
+| `voice` | `"af_heart"` | Kokoro voice preset.  See the [model card](https://huggingface.co/NeuML/kokoro-int8-onnx#speaker-reference) for available options. |
+| `speed` | `1.0` | Playback speed multiplier. |
+| `sample_rate` | `24000` | Output sample-rate (fixed by Kokoro).  **The attached `AudioStreamTrack` must use the same value.** |

README.md ADDED Viewed

@@ -0,0 +1,38 @@
+# GetStream Kokoro Plugin
+This package integrates the open-weight [Kokoro-82M TTS model](https://github.com/hexgrad/kokoro) with the GetStream audio/video SDK.
+It provides a drop-in `KokoroTTS` class that implements the common `getstream_common.tts.TTS` interface, allowing you to stream PCM audio generated by Kokoro directly into a WebRTC `AudioStreamTrack`.
+```py
+from getstream.plugins.kokoro import KokoroTTS
+from getstream.video.rtc.audio_track import AudioStreamTrack
+track = AudioStreamTrack(framerate=24_000)
+tts = KokoroTTS(lang_code="a", voice="af_heart")
+tts.set_output_track(track)
+await tts.send("Hello from Kokoro!")
+```
+## Installation
+```bash
+pip install getstream-plugins-kokoro
+```
+This will pull in the required `kokoro`, `numpy` and `getstream[webrtc"]` dependencies.  You also need `espeak-ng` **at runtime** for pronunciation fallback.  On macOS you can install it with Homebrew:
+```bash
+brew install espeak-ng
+```
+## Configuration options
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| `lang_code` | `"a"` | Language group passed to `KPipeline` (`"a"` = American English, etc.) |
+| `voice` | `"af_heart"` | Kokoro voice preset.  See the [model card](https://huggingface.co/NeuML/kokoro-int8-onnx#speaker-reference) for available options. |
+| `speed` | `1.0` | Playback speed multiplier. |
+| `sample_rate` | `24000` | Output sample-rate (fixed by Kokoro).  **The attached `AudioStreamTrack` must use the same value.** |

pyproject.toml ADDED Viewed

@@ -0,0 +1,42 @@
+[build-system]
+requires = ["hatchling", "hatch-vcs"]
+build-backend = "hatchling.build"
+[project]
+name = "vision-agents-plugins-kokoro"
+dynamic = ["version"]
+description = "Kokoro TTS integration for Vision Agents"
+readme = "README.md"
+keywords = ["kokoro", "TTS", "text-to-speech", "AI", "voice agents", "agents"]
+requires-python = ">=3.10"
+license = "MIT"
+dependencies = [
+    "vision-agents",
+    "kokoro>=0.9.4",
+    "misaki[en]>=0.9.4",
+    "numpy>=2.2.6,<2.3",
+]
+[project.urls]
+Documentation = "https://visionagents.ai/"
+Website = "https://visionagents.ai/"
+Source = "https://github.com/GetStream/Vision-Agents"
+[tool.hatch.version]
+source = "vcs"
+raw-options = { root = "..", search_parent_directories = true, fallback_version = "0.0.0" }
+[tool.hatch.build.targets.wheel]
+packages = ["."]
+[tool.hatch.build.targets.sdist]
+include = ["/vision_agents"]
+[tool.uv.sources]
+vision-agents = { workspace = true }
+[dependency-groups]
+dev = [
+    "pytest>=8.4.1",
+    "pytest-asyncio>=1.0.0",
+]

vision_agents/plugins/kokoro/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+from .tts import TTS
+# Re-export under the new namespace for convenience
+__path__ = __import__("pkgutil").extend_path(__path__, __name__)
+__all__ = ["TTS"]

vision_agents/plugins/kokoro/tts.py ADDED Viewed

@@ -0,0 +1,83 @@
+from __future__ import annotations
+import asyncio
+import logging
+from typing import AsyncIterator, Iterator, List, Optional
+import numpy as np
+from vision_agents.core import tts
+from getstream.video.rtc.track_util import PcmData, AudioFormat
+try:
+    from kokoro import KPipeline  # type: ignore
+except ModuleNotFoundError:  # pragma: no cover – mocked during CI
+    KPipeline = None  # type: ignore  # noqa: N816
+logger = logging.getLogger(__name__)
+class TTS(tts.TTS):
+    """Text-to-Speech plugin backed by the Kokoro-82M model."""
+    def __init__(
+        self,
+        lang_code: str = "a",  # American English
+        voice: str = "af_heart",
+        speed: float = 1.0,
+        sample_rate: int = 24_000,
+        device: Optional[str] = None,
+        client: Optional[KPipeline] = None,
+    ) -> None:
+        super().__init__()
+        if KPipeline is None:
+            raise ImportError(
+                "The 'kokoro' package is not installed. ``pip install kokoro`` first."
+            )
+        self._pipeline = (
+            KPipeline(lang_code=lang_code)
+            if device is None
+            else KPipeline(lang_code=lang_code, device=device)
+        )
+        self.voice = voice
+        self.speed = speed
+        self.sample_rate = sample_rate
+        self.client = client if client is not None else self._pipeline
+    async def stream_audio(
+        self, text: str, *_, **__
+    ) -> PcmData | Iterator[PcmData] | AsyncIterator[PcmData]:  # noqa: D401
+        loop = asyncio.get_event_loop()
+        chunks: List[bytes] = await loop.run_in_executor(
+            None, lambda: list(self._generate_chunks(text))
+        )
+        async def _aiter():
+            for chunk in chunks:
+                yield PcmData.from_bytes(
+                    chunk,
+                    sample_rate=self.sample_rate,
+                    channels=1,
+                    format=AudioFormat.S16,
+                )
+        return _aiter()
+    async def stop_audio(self) -> None:
+        """
+        Clears the queue and stops playing audio.
+        """
+        logger.info("🎤 Kokoro TTS stop requested (no-op)")
+    def _generate_chunks(self, text: str):
+        for _gs, _ps, audio in self._pipeline(
+            text, voice=self.voice, speed=self.speed, split_pattern=r"\n+"
+        ):
+            if not isinstance(audio, np.ndarray):
+                audio = np.asarray(audio)
+            pcm16 = (np.clip(audio, -1.0, 1.0) * 32767.0).astype("<i2")
+            yield pcm16.tobytes()

vision_agents_plugins_kokoro-0.2.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,54 @@
+Metadata-Version: 2.4
+Name: vision-agents-plugins-kokoro
+Version: 0.2.1
+Summary: Kokoro TTS integration for Vision Agents
+Project-URL: Documentation, https://visionagents.ai/
+Project-URL: Website, https://visionagents.ai/
+Project-URL: Source, https://github.com/GetStream/Vision-Agents
+License-Expression: MIT
+Keywords: AI,TTS,agents,kokoro,text-to-speech,voice agents
+Requires-Python: >=3.10
+Requires-Dist: kokoro>=0.9.4
+Requires-Dist: misaki[en]>=0.9.4
+Requires-Dist: numpy<2.3,>=2.2.6
+Requires-Dist: vision-agents
+Description-Content-Type: text/markdown
+# GetStream Kokoro Plugin
+This package integrates the open-weight [Kokoro-82M TTS model](https://github.com/hexgrad/kokoro) with the GetStream audio/video SDK.
+It provides a drop-in `KokoroTTS` class that implements the common `getstream_common.tts.TTS` interface, allowing you to stream PCM audio generated by Kokoro directly into a WebRTC `AudioStreamTrack`.
+```py
+from getstream.plugins.kokoro import KokoroTTS
+from getstream.video.rtc.audio_track import AudioStreamTrack
+track = AudioStreamTrack(framerate=24_000)
+tts = KokoroTTS(lang_code="a", voice="af_heart")
+tts.set_output_track(track)
+await tts.send("Hello from Kokoro!")
+```
+## Installation
+```bash
+pip install getstream-plugins-kokoro
+```
+This will pull in the required `kokoro`, `numpy` and `getstream[webrtc"]` dependencies.  You also need `espeak-ng` **at runtime** for pronunciation fallback.  On macOS you can install it with Homebrew:
+```bash
+brew install espeak-ng
+```
+## Configuration options
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| `lang_code` | `"a"` | Language group passed to `KPipeline` (`"a"` = American English, etc.) |
+| `voice` | `"af_heart"` | Kokoro voice preset.  See the [model card](https://huggingface.co/NeuML/kokoro-int8-onnx#speaker-reference) for available options. |
+| `speed` | `1.0` | Playback speed multiplier. |
+| `sample_rate` | `24000` | Output sample-rate (fixed by Kokoro).  **The attached `AudioStreamTrack` must use the same value.** |

vision_agents_plugins_kokoro-0.2.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+./.gitignore,sha256=zrSq4X-Qh8j7QY0ukXt-RXj6StdhdbJdR3e8HoHbTTg,961
+./PKG-INFO,sha256=SLNoXAoj0EuwjnWGVGSharZYR6KpKnknjjgW2Pg007k,2044
+./README.md,sha256=2olMbGoMYj07OyGlDj_AG2G42o61Tl-Ml4AMINXj4LM,1486
+./pyproject.toml,sha256=tKF8yoc19e21FLXZcomo17LEHGH929kFwX9EIXkRBQA,999
+./vision_agents/plugins/kokoro/__init__.py,sha256=hq4qcv6VHIM24dB_NZEhxogAKD04jX35ngeXhn2M-zQ,158
+./vision_agents/plugins/kokoro/tts.py,sha256=_vCCRgMOnYj5IlibxfOnvZf_3wYJcdTurswRtdSGRhs,2510
+vision_agents_plugins_kokoro-0.2.1.dist-info/METADATA,sha256=SLNoXAoj0EuwjnWGVGSharZYR6KpKnknjjgW2Pg007k,2044
+vision_agents_plugins_kokoro-0.2.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+vision_agents_plugins_kokoro-0.2.1.dist-info/RECORD,,

vision_agents_plugins_kokoro-0.2.1.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.27.0
+Root-Is-Purelib: true
+Tag: py3-none-any