vision-agents-plugins-liveavatar 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agents_plugins_liveavatar-0.6.0/.gitignore +102 -0
- vision_agents_plugins_liveavatar-0.6.0/PKG-INFO +14 -0
- vision_agents_plugins_liveavatar-0.6.0/pyproject.toml +41 -0
- vision_agents_plugins_liveavatar-0.6.0/vision_agents/plugins/liveavatar/__init__.py +3 -0
- vision_agents_plugins_liveavatar-0.6.0/vision_agents/plugins/liveavatar/exceptions.py +16 -0
- vision_agents_plugins_liveavatar-0.6.0/vision_agents/plugins/liveavatar/liveavatar_avatar.py +216 -0
- vision_agents_plugins_liveavatar-0.6.0/vision_agents/plugins/liveavatar/liveavatar_client.py +123 -0
- vision_agents_plugins_liveavatar-0.6.0/vision_agents/plugins/liveavatar/liveavatar_rtc_manager.py +132 -0
- vision_agents_plugins_liveavatar-0.6.0/vision_agents/plugins/liveavatar/liveavatar_websocket.py +102 -0
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.cursor/*
|
|
7
|
+
# Distribution / packaging
|
|
8
|
+
.Python
|
|
9
|
+
build/
|
|
10
|
+
dist/
|
|
11
|
+
downloads/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
eggs/
|
|
14
|
+
.eggs/
|
|
15
|
+
lib64/
|
|
16
|
+
parts/
|
|
17
|
+
sdist/
|
|
18
|
+
var/
|
|
19
|
+
wheels/
|
|
20
|
+
share/python-wheels/
|
|
21
|
+
pip-wheel-metadata/
|
|
22
|
+
MANIFEST
|
|
23
|
+
*.egg-info/
|
|
24
|
+
*.egg
|
|
25
|
+
|
|
26
|
+
# Installer logs
|
|
27
|
+
pip-log.txt
|
|
28
|
+
pip-delete-this-directory.txt
|
|
29
|
+
|
|
30
|
+
# Unit test / coverage reports
|
|
31
|
+
htmlcov/
|
|
32
|
+
.tox/
|
|
33
|
+
.nox/
|
|
34
|
+
.coverage
|
|
35
|
+
.coverage.*
|
|
36
|
+
.cache
|
|
37
|
+
coverage.xml
|
|
38
|
+
nosetests.xml
|
|
39
|
+
*.cover
|
|
40
|
+
*.py,cover
|
|
41
|
+
.hypothesis/
|
|
42
|
+
.pytest_cache/
|
|
43
|
+
|
|
44
|
+
# Type checker / lint caches
|
|
45
|
+
.mypy_cache/
|
|
46
|
+
.dmypy.json
|
|
47
|
+
dmypy.json
|
|
48
|
+
.pytype/
|
|
49
|
+
.pyre/
|
|
50
|
+
.ruff_cache/
|
|
51
|
+
|
|
52
|
+
# Environments
|
|
53
|
+
.venv
|
|
54
|
+
env/
|
|
55
|
+
venv/
|
|
56
|
+
ENV/
|
|
57
|
+
env.bak/
|
|
58
|
+
venv.bak/
|
|
59
|
+
.env
|
|
60
|
+
.env.local
|
|
61
|
+
.env.*.local
|
|
62
|
+
.env.bak
|
|
63
|
+
pyvenv.cfg
|
|
64
|
+
.python-version
|
|
65
|
+
|
|
66
|
+
# Editors / IDEs
|
|
67
|
+
.vscode/
|
|
68
|
+
.idea/
|
|
69
|
+
|
|
70
|
+
# Jupyter Notebook
|
|
71
|
+
.ipynb_checkpoints/
|
|
72
|
+
|
|
73
|
+
# OS / Misc
|
|
74
|
+
.DS_Store
|
|
75
|
+
*.log
|
|
76
|
+
|
|
77
|
+
# Tooling & repo-specific
|
|
78
|
+
pyrightconfig.json
|
|
79
|
+
shell.nix
|
|
80
|
+
bin/*
|
|
81
|
+
lib/*
|
|
82
|
+
stream-py/
|
|
83
|
+
|
|
84
|
+
# Example lock files (regenerated by uv sync)
|
|
85
|
+
examples/*/uv.lock
|
|
86
|
+
plugins/*/example/uv.lock
|
|
87
|
+
|
|
88
|
+
# Artifacts / assets
|
|
89
|
+
*.pt
|
|
90
|
+
*.kef
|
|
91
|
+
*.onnx
|
|
92
|
+
profile.html
|
|
93
|
+
|
|
94
|
+
/opencode.json
|
|
95
|
+
.ralph-tui/
|
|
96
|
+
.claude/*
|
|
97
|
+
!.claude/skills/
|
|
98
|
+
|
|
99
|
+
.uv-cache/
|
|
100
|
+
|
|
101
|
+
# pytest json report
|
|
102
|
+
.report.json
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: vision-agents-plugins-liveavatar
|
|
3
|
+
Version: 0.6.0
|
|
4
|
+
Summary: LiveAvatar plugin for Vision Agents
|
|
5
|
+
Project-URL: Documentation, https://visionagents.ai/
|
|
6
|
+
Project-URL: Website, https://visionagents.ai/
|
|
7
|
+
Project-URL: Source, https://github.com/GetStream/Vision-Agents
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
Keywords: AI,agents,avatars,heygen,liveavatar,voice agents
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Requires-Dist: httpx<1,>=0.28.1
|
|
12
|
+
Requires-Dist: livekit<2,>=1.1.2
|
|
13
|
+
Requires-Dist: vision-agents
|
|
14
|
+
Requires-Dist: websockets<16,>=13.0
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling", "hatch-vcs"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "vision-agents-plugins-liveavatar"
|
|
7
|
+
dynamic = ["version"]
|
|
8
|
+
description = "LiveAvatar plugin for Vision Agents"
|
|
9
|
+
keywords = ["heygen", "liveavatar", "avatars", "AI", "voice agents", "agents"]
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
dependencies = [
|
|
13
|
+
"vision-agents",
|
|
14
|
+
"livekit>=1.1.2,<2",
|
|
15
|
+
"httpx>=0.28.1,<1",
|
|
16
|
+
"websockets>=13.0,<16",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
[project.urls]
|
|
20
|
+
Documentation = "https://visionagents.ai/"
|
|
21
|
+
Website = "https://visionagents.ai/"
|
|
22
|
+
Source = "https://github.com/GetStream/Vision-Agents"
|
|
23
|
+
|
|
24
|
+
[tool.hatch.version]
|
|
25
|
+
source = "vcs"
|
|
26
|
+
raw-options = { root = "..", search_parent_directories = true, fallback_version = "0.0.0" }
|
|
27
|
+
|
|
28
|
+
[tool.hatch.build.targets.wheel]
|
|
29
|
+
packages = [".", "vision_agents"]
|
|
30
|
+
|
|
31
|
+
[tool.hatch.build.targets.sdist]
|
|
32
|
+
include = ["/vision_agents"]
|
|
33
|
+
|
|
34
|
+
[tool.uv.sources]
|
|
35
|
+
vision-agents = { workspace = true }
|
|
36
|
+
|
|
37
|
+
[dependency-groups]
|
|
38
|
+
dev = [
|
|
39
|
+
"pytest>=8.4.1",
|
|
40
|
+
"pytest-asyncio>=1.0.0",
|
|
41
|
+
]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
class LiveAvatarError(Exception):
|
|
2
|
+
"""Base exception for LiveAvatar API errors."""
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class LiveAvatarAPIError(LiveAvatarError):
|
|
6
|
+
"""Raised when an HTTP request to the LiveAvatar API fails."""
|
|
7
|
+
|
|
8
|
+
def __init__(
|
|
9
|
+
self,
|
|
10
|
+
message: str,
|
|
11
|
+
status_code: int | None = None,
|
|
12
|
+
body: str | None = None,
|
|
13
|
+
) -> None:
|
|
14
|
+
self.status_code = status_code
|
|
15
|
+
self.body = body
|
|
16
|
+
super().__init__(message)
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
import av
|
|
6
|
+
from getstream.video.rtc.track_util import PcmData
|
|
7
|
+
from vision_agents.core.agents.inference import (
|
|
8
|
+
AudioOutputChunk,
|
|
9
|
+
AudioOutputFlush,
|
|
10
|
+
AudioOutputStream,
|
|
11
|
+
)
|
|
12
|
+
from vision_agents.core.avatars import Avatar, AVSynchronizer
|
|
13
|
+
from vision_agents.core.utils.utils import cancel_and_wait
|
|
14
|
+
from vision_agents.core.utils.video_track import QueuedVideoTrack
|
|
15
|
+
|
|
16
|
+
from .liveavatar_client import LiveAvatarClient, Session
|
|
17
|
+
from .liveavatar_rtc_manager import LiveAvatarRTCManager
|
|
18
|
+
from .liveavatar_websocket import LiveAvatarWebSocket
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _task_done_callback(task: asyncio.Task[None]) -> None:
|
|
24
|
+
if not task.cancelled() and task.exception() is not None:
|
|
25
|
+
logger.exception(
|
|
26
|
+
"LiveAvatar background task %s failed",
|
|
27
|
+
task.get_name(),
|
|
28
|
+
exc_info=task.exception(),
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class LiveAvatar(Avatar):
|
|
33
|
+
"""LiveAvatar plugin (LITE mode, custom-agent integration path).
|
|
34
|
+
|
|
35
|
+
References:
|
|
36
|
+
- https://docs.liveavatar.com
|
|
37
|
+
- https://docs.liveavatar.com/docs/lite-mode/integration-paths
|
|
38
|
+
|
|
39
|
+
Sends TTS audio via the LiveAvatar media-server WebSocket and
|
|
40
|
+
receives synchronized lip-synced video and audio from the room
|
|
41
|
+
LiveAvatar provisions for the session.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
provider_name = "liveavatar"
|
|
45
|
+
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
avatar_id: str | None = None,
|
|
49
|
+
api_key: str | None = None,
|
|
50
|
+
base_url: str | None = None,
|
|
51
|
+
is_sandbox: bool = True,
|
|
52
|
+
max_session_duration: int | None = None,
|
|
53
|
+
video_quality: str = "high",
|
|
54
|
+
video_encoding: str = "H264",
|
|
55
|
+
width: int = 1280,
|
|
56
|
+
height: int = 720,
|
|
57
|
+
fps: int = 30,
|
|
58
|
+
buffer_seconds: float = 1.0,
|
|
59
|
+
) -> None:
|
|
60
|
+
"""Initialize the LiveAvatar plugin.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
avatar_id: LiveAvatar avatar UUID. Falls back to LIVEAVATAR_AVATAR_ID.
|
|
64
|
+
api_key: LiveAvatar API key. Falls back to LIVEAVATAR_API_KEY.
|
|
65
|
+
base_url: Override the LiveAvatar API base URL.
|
|
66
|
+
is_sandbox: Sandbox sessions don't burn credits but are duration-capped.
|
|
67
|
+
max_session_duration: Session length cap in seconds; None for the API default.
|
|
68
|
+
video_quality: One of "low", "medium", "high", "very_high".
|
|
69
|
+
Default - `"high"`.
|
|
70
|
+
video_encoding: One of "H264", "VP8".
|
|
71
|
+
width: Output video width in pixels.
|
|
72
|
+
height: Output video height in pixels.
|
|
73
|
+
fps: Output video frame rate. Must be > 0.
|
|
74
|
+
buffer_seconds: Max video buffer depth in seconds. Caps how many frames
|
|
75
|
+
can be queued ahead of audio playback. Must be > 0.
|
|
76
|
+
"""
|
|
77
|
+
super().__init__()
|
|
78
|
+
if buffer_seconds <= 0:
|
|
79
|
+
raise ValueError("buffer_seconds must be > 0")
|
|
80
|
+
if fps <= 0:
|
|
81
|
+
raise ValueError("fps must be > 0")
|
|
82
|
+
|
|
83
|
+
api_key = api_key or os.getenv("LIVEAVATAR_API_KEY")
|
|
84
|
+
if not api_key:
|
|
85
|
+
raise ValueError(
|
|
86
|
+
"LiveAvatar API key required. Set LIVEAVATAR_API_KEY or pass api_key."
|
|
87
|
+
)
|
|
88
|
+
avatar_id = avatar_id or os.getenv("LIVEAVATAR_AVATAR_ID")
|
|
89
|
+
if not avatar_id:
|
|
90
|
+
raise ValueError(
|
|
91
|
+
"Avatar ID required. Set LIVEAVATAR_AVATAR_ID or pass avatar_id."
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
self._client = LiveAvatarClient(api_key=api_key, base_url=base_url)
|
|
95
|
+
self._rtc_manager = LiveAvatarRTCManager(
|
|
96
|
+
on_video=self._on_video_frame,
|
|
97
|
+
on_audio=self._on_audio_frame,
|
|
98
|
+
on_disconnect=self._on_disconnect,
|
|
99
|
+
)
|
|
100
|
+
self._sync = AVSynchronizer(
|
|
101
|
+
width=width,
|
|
102
|
+
height=height,
|
|
103
|
+
fps=fps,
|
|
104
|
+
max_queue_size=int(fps * buffer_seconds),
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
self._avatar_id = avatar_id
|
|
108
|
+
self._is_sandbox = is_sandbox
|
|
109
|
+
self._max_session_duration = max_session_duration
|
|
110
|
+
self._video_quality = video_quality
|
|
111
|
+
self._video_encoding = video_encoding
|
|
112
|
+
|
|
113
|
+
self._session: Session | None = None
|
|
114
|
+
self._websocket: LiveAvatarWebSocket | None = None
|
|
115
|
+
self._audio_input_task: asyncio.Task[None] | None = None
|
|
116
|
+
self._connected = False
|
|
117
|
+
|
|
118
|
+
logger.debug("LiveAvatar initialized (%dx%d)", width, height)
|
|
119
|
+
|
|
120
|
+
def video_output(self) -> QueuedVideoTrack:
|
|
121
|
+
"""Return the video track that receives avatar video frames."""
|
|
122
|
+
return self._sync.video_output
|
|
123
|
+
|
|
124
|
+
def audio_output(self) -> AudioOutputStream:
|
|
125
|
+
"""Return the audio stream that receives avatar audio frames."""
|
|
126
|
+
return self._sync.audio_output
|
|
127
|
+
|
|
128
|
+
async def start(self) -> None:
|
|
129
|
+
"""Connect to LiveAvatar. Called by the Agent during ``join()``."""
|
|
130
|
+
await self._connect()
|
|
131
|
+
|
|
132
|
+
async def close(self) -> None:
|
|
133
|
+
"""Stop the session and release all resources."""
|
|
134
|
+
if self._audio_input_task is not None:
|
|
135
|
+
await cancel_and_wait(self._audio_input_task)
|
|
136
|
+
|
|
137
|
+
if self._websocket is not None:
|
|
138
|
+
try:
|
|
139
|
+
await self._websocket.close()
|
|
140
|
+
except Exception as exc:
|
|
141
|
+
logger.warning("Failed to close LiveAvatar websocket: %s", exc)
|
|
142
|
+
|
|
143
|
+
try:
|
|
144
|
+
await self._rtc_manager.close()
|
|
145
|
+
except Exception as exc:
|
|
146
|
+
logger.warning("Failed to close LiveAvatar RTC manager: %s", exc)
|
|
147
|
+
|
|
148
|
+
# Close sync AFTER rtc_manager so its receive tasks can't write to a
|
|
149
|
+
# closed stream during teardown.
|
|
150
|
+
self._sync.close()
|
|
151
|
+
|
|
152
|
+
if self._session is not None:
|
|
153
|
+
try:
|
|
154
|
+
await self._client.stop_session(session_id=self._session.session_id)
|
|
155
|
+
except Exception as exc:
|
|
156
|
+
logger.warning("Failed to stop LiveAvatar session: %s", exc)
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
await self._client.close()
|
|
160
|
+
finally:
|
|
161
|
+
self._connected = False
|
|
162
|
+
logger.debug("LiveAvatar closed")
|
|
163
|
+
|
|
164
|
+
async def _process_audio_input(self) -> None:
|
|
165
|
+
async for item in self.input_audio_stream:
|
|
166
|
+
if isinstance(item, AudioOutputChunk):
|
|
167
|
+
if item.data is not None and self._websocket is not None:
|
|
168
|
+
await self._websocket.send_audio_frame(item.data)
|
|
169
|
+
if item.final and self._websocket is not None:
|
|
170
|
+
await self._websocket.end_turn()
|
|
171
|
+
elif isinstance(item, AudioOutputFlush):
|
|
172
|
+
if self._websocket is not None:
|
|
173
|
+
await self._websocket.interrupt()
|
|
174
|
+
await self._sync.flush()
|
|
175
|
+
|
|
176
|
+
async def _connect(self) -> None:
|
|
177
|
+
token = await self._client.create_session_token(
|
|
178
|
+
self._avatar_id,
|
|
179
|
+
is_sandbox=self._is_sandbox,
|
|
180
|
+
max_session_duration=self._max_session_duration,
|
|
181
|
+
video_quality=self._video_quality,
|
|
182
|
+
video_encoding=self._video_encoding,
|
|
183
|
+
)
|
|
184
|
+
self._session = await self._client.start_session(token.session_token)
|
|
185
|
+
|
|
186
|
+
try:
|
|
187
|
+
await self._rtc_manager.connect(
|
|
188
|
+
self._session.livekit_url, self._session.livekit_agent_token
|
|
189
|
+
)
|
|
190
|
+
self._websocket = LiveAvatarWebSocket(self._session.ws_url)
|
|
191
|
+
await self._websocket.connect()
|
|
192
|
+
except Exception:
|
|
193
|
+
await self._rtc_manager.close()
|
|
194
|
+
if self._websocket is not None:
|
|
195
|
+
await self._websocket.close()
|
|
196
|
+
await self._client.stop_session(session_id=self._session.session_id)
|
|
197
|
+
raise
|
|
198
|
+
|
|
199
|
+
self._connected = True
|
|
200
|
+
logger.info(
|
|
201
|
+
"LiveAvatar connection established session_id=%s", self._session.session_id
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
if self._audio_input_task is None:
|
|
205
|
+
self._audio_input_task = asyncio.create_task(self._process_audio_input())
|
|
206
|
+
self._audio_input_task.add_done_callback(_task_done_callback)
|
|
207
|
+
|
|
208
|
+
async def _on_video_frame(self, frame: av.VideoFrame) -> None:
|
|
209
|
+
await self._sync.write_video(frame)
|
|
210
|
+
|
|
211
|
+
async def _on_audio_frame(self, pcm: PcmData) -> None:
|
|
212
|
+
await self._sync.write_audio(pcm)
|
|
213
|
+
|
|
214
|
+
async def _on_disconnect(self) -> None:
|
|
215
|
+
logger.info("LiveAvatar disconnected")
|
|
216
|
+
self._connected = False
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Self
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
|
|
7
|
+
from .exceptions import LiveAvatarAPIError
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
DEFAULT_BASE_URL = "https://api.liveavatar.com"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class SessionToken:
|
|
16
|
+
session_id: str
|
|
17
|
+
session_token: str
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class Session:
|
|
22
|
+
session_id: str
|
|
23
|
+
livekit_url: str
|
|
24
|
+
livekit_agent_token: str
|
|
25
|
+
livekit_client_token: str
|
|
26
|
+
ws_url: str
|
|
27
|
+
max_session_duration: int | None = None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class LiveAvatarClient:
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
api_key: str,
|
|
34
|
+
base_url: str | None = None,
|
|
35
|
+
timeout: float = 30.0,
|
|
36
|
+
) -> None:
|
|
37
|
+
if not api_key:
|
|
38
|
+
raise ValueError("LiveAvatar API key required")
|
|
39
|
+
self._api_key = api_key
|
|
40
|
+
self._base_url = (base_url or DEFAULT_BASE_URL).rstrip("/")
|
|
41
|
+
self._http = httpx.AsyncClient(
|
|
42
|
+
base_url=self._base_url,
|
|
43
|
+
timeout=timeout,
|
|
44
|
+
headers={"X-API-KEY": api_key},
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
async def close(self) -> None:
|
|
48
|
+
await self._http.aclose()
|
|
49
|
+
|
|
50
|
+
async def __aenter__(self) -> Self:
|
|
51
|
+
return self
|
|
52
|
+
|
|
53
|
+
async def __aexit__(self, *exc: object) -> None:
|
|
54
|
+
await self.close()
|
|
55
|
+
|
|
56
|
+
async def create_session_token(
|
|
57
|
+
self,
|
|
58
|
+
avatar_id: str,
|
|
59
|
+
*,
|
|
60
|
+
is_sandbox: bool = True,
|
|
61
|
+
max_session_duration: int | None = None,
|
|
62
|
+
video_quality: str = "high",
|
|
63
|
+
video_encoding: str = "H264",
|
|
64
|
+
) -> SessionToken:
|
|
65
|
+
body: dict[str, object] = {
|
|
66
|
+
"mode": "LITE",
|
|
67
|
+
"avatar_id": avatar_id,
|
|
68
|
+
"is_sandbox": is_sandbox,
|
|
69
|
+
"video_settings": {
|
|
70
|
+
"quality": video_quality,
|
|
71
|
+
"encoding": video_encoding,
|
|
72
|
+
},
|
|
73
|
+
}
|
|
74
|
+
if max_session_duration is not None:
|
|
75
|
+
body["max_session_duration"] = max_session_duration
|
|
76
|
+
|
|
77
|
+
resp = await self._http.post("/v1/sessions/token", json=body)
|
|
78
|
+
self._raise_for_status(resp)
|
|
79
|
+
data = resp.json()["data"]
|
|
80
|
+
return SessionToken(
|
|
81
|
+
session_id=data["session_id"],
|
|
82
|
+
session_token=data["session_token"],
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
async def start_session(self, session_token: str) -> Session:
|
|
86
|
+
resp = await self._http.post(
|
|
87
|
+
"/v1/sessions/start",
|
|
88
|
+
headers={"Authorization": f"Bearer {session_token}"},
|
|
89
|
+
json={},
|
|
90
|
+
)
|
|
91
|
+
self._raise_for_status(resp)
|
|
92
|
+
data = resp.json()["data"]
|
|
93
|
+
return Session(
|
|
94
|
+
session_id=data["session_id"],
|
|
95
|
+
livekit_url=data["livekit_url"],
|
|
96
|
+
livekit_agent_token=data["livekit_agent_token"],
|
|
97
|
+
livekit_client_token=data["livekit_client_token"],
|
|
98
|
+
ws_url=data["ws_url"],
|
|
99
|
+
max_session_duration=data.get("max_session_duration"),
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
async def stop_session(
|
|
103
|
+
self,
|
|
104
|
+
*,
|
|
105
|
+
session_id: str,
|
|
106
|
+
reason: str = "USER_CLOSED",
|
|
107
|
+
) -> None:
|
|
108
|
+
resp = await self._http.post(
|
|
109
|
+
"/v1/sessions/stop",
|
|
110
|
+
json={"session_id": session_id, "reason": reason},
|
|
111
|
+
)
|
|
112
|
+
self._raise_for_status(resp)
|
|
113
|
+
|
|
114
|
+
@staticmethod
|
|
115
|
+
def _raise_for_status(resp: httpx.Response) -> None:
|
|
116
|
+
try:
|
|
117
|
+
resp.raise_for_status()
|
|
118
|
+
except httpx.HTTPStatusError as e:
|
|
119
|
+
raise LiveAvatarAPIError(
|
|
120
|
+
f"{e.request.method} {e.request.url} -> {resp.status_code}: {resp.text}",
|
|
121
|
+
status_code=resp.status_code,
|
|
122
|
+
body=resp.text,
|
|
123
|
+
) from e
|
vision_agents_plugins_liveavatar-0.6.0/vision_agents/plugins/liveavatar/liveavatar_rtc_manager.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Callable, Coroutine
|
|
4
|
+
|
|
5
|
+
import av
|
|
6
|
+
import numpy as np
|
|
7
|
+
from getstream.video.rtc.track_util import AudioFormat, PcmData
|
|
8
|
+
from livekit import rtc
|
|
9
|
+
from vision_agents.core.utils.utils import cancel_and_wait
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
VideoCallback = Callable[[av.VideoFrame], Coroutine[None, None, None]]
|
|
15
|
+
AudioCallback = Callable[[PcmData], Coroutine[None, None, None]]
|
|
16
|
+
DisconnectCallback = Callable[[], Coroutine[None, None, None]]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _task_done_callback(task: asyncio.Task[None]) -> None:
|
|
20
|
+
if not task.cancelled() and task.exception() is not None:
|
|
21
|
+
logger.error(
|
|
22
|
+
"Background task %s failed", task.get_name(), exc_info=task.exception()
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class LiveAvatarRTCManager:
|
|
27
|
+
"""Joins HeyGen's avatar room and forwards remote video/audio to callbacks."""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
on_video: VideoCallback,
|
|
32
|
+
on_audio: AudioCallback,
|
|
33
|
+
on_disconnect: DisconnectCallback,
|
|
34
|
+
) -> None:
|
|
35
|
+
self._on_video = on_video
|
|
36
|
+
self._on_audio = on_audio
|
|
37
|
+
self._on_disconnect = on_disconnect
|
|
38
|
+
|
|
39
|
+
self._room: rtc.Room | None = None
|
|
40
|
+
self._connected = False
|
|
41
|
+
self._tasks: set[asyncio.Task[None]] = set()
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def is_connected(self) -> bool:
|
|
45
|
+
return self._connected
|
|
46
|
+
|
|
47
|
+
async def connect(self, room_url: str, token: str) -> None:
|
|
48
|
+
room = rtc.Room()
|
|
49
|
+
|
|
50
|
+
@room.on("participant_connected")
|
|
51
|
+
def on_participant_connected(participant: rtc.RemoteParticipant) -> None:
|
|
52
|
+
logger.info("LiveAvatar participant joined: %s", participant.identity)
|
|
53
|
+
|
|
54
|
+
@room.on("track_subscribed")
|
|
55
|
+
def on_track_subscribed(
|
|
56
|
+
track: rtc.Track,
|
|
57
|
+
publication: rtc.RemoteTrackPublication,
|
|
58
|
+
participant: rtc.RemoteParticipant,
|
|
59
|
+
) -> None:
|
|
60
|
+
if track.kind == rtc.TrackKind.KIND_VIDEO:
|
|
61
|
+
logger.info("Subscribed video track from %s", participant.identity)
|
|
62
|
+
video_stream = rtc.VideoStream(track)
|
|
63
|
+
self._create_task(self._consume_video(video_stream))
|
|
64
|
+
elif track.kind == rtc.TrackKind.KIND_AUDIO:
|
|
65
|
+
logger.info("Subscribed audio track from %s", participant.identity)
|
|
66
|
+
audio_stream = rtc.AudioStream(track, sample_rate=48000, num_channels=2)
|
|
67
|
+
self._create_task(self._consume_audio(audio_stream))
|
|
68
|
+
|
|
69
|
+
@room.on("participant_disconnected")
|
|
70
|
+
def on_participant_disconnected(participant: rtc.RemoteParticipant) -> None:
|
|
71
|
+
logger.info(
|
|
72
|
+
"LiveAvatar participant disconnected: %s; reason: %s",
|
|
73
|
+
participant.identity,
|
|
74
|
+
participant.disconnect_reason,
|
|
75
|
+
)
|
|
76
|
+
self._connected = False
|
|
77
|
+
self._create_task(self._on_disconnect())
|
|
78
|
+
if self._room is not None:
|
|
79
|
+
self._create_task(self._room.disconnect())
|
|
80
|
+
|
|
81
|
+
@room.on("disconnected")
|
|
82
|
+
def on_disconnected(reason: str) -> None:
|
|
83
|
+
# The "disconnected" callback may fire multiple times because we
|
|
84
|
+
# also disconnect ourselves when the avatar leaves.
|
|
85
|
+
if self._connected:
|
|
86
|
+
logger.info("LiveAvatar room disconnected; reason: %s", reason)
|
|
87
|
+
self._connected = False
|
|
88
|
+
self._create_task(self._on_disconnect())
|
|
89
|
+
|
|
90
|
+
logger.info("Connecting to LiveAvatar room url=%s", room_url)
|
|
91
|
+
await room.connect(room_url, token)
|
|
92
|
+
logger.info("Connected to LiveAvatar room")
|
|
93
|
+
|
|
94
|
+
self._room = room
|
|
95
|
+
self._connected = True
|
|
96
|
+
|
|
97
|
+
async def close(self) -> None:
|
|
98
|
+
try:
|
|
99
|
+
if self._room is not None:
|
|
100
|
+
await self._room.disconnect()
|
|
101
|
+
while self._tasks:
|
|
102
|
+
tasks = tuple(self._tasks)
|
|
103
|
+
self._tasks.clear()
|
|
104
|
+
await cancel_and_wait(*tasks)
|
|
105
|
+
finally:
|
|
106
|
+
self._room = None
|
|
107
|
+
self._connected = False
|
|
108
|
+
|
|
109
|
+
async def _consume_video(self, video_stream: rtc.VideoStream) -> None:
|
|
110
|
+
async for event in video_stream:
|
|
111
|
+
lk_frame = event.frame.convert(rtc.VideoBufferType.RGBA)
|
|
112
|
+
arr = np.frombuffer(lk_frame.data, dtype=np.uint8).reshape(
|
|
113
|
+
lk_frame.height, lk_frame.width, 4
|
|
114
|
+
)
|
|
115
|
+
await self._on_video(av.VideoFrame.from_ndarray(arr, format="rgba"))
|
|
116
|
+
|
|
117
|
+
async def _consume_audio(self, audio_stream: rtc.AudioStream) -> None:
|
|
118
|
+
async for event in audio_stream:
|
|
119
|
+
frame = event.frame
|
|
120
|
+
pcm = PcmData.from_bytes(
|
|
121
|
+
frame.data, # type: ignore[arg-type]
|
|
122
|
+
sample_rate=frame.sample_rate,
|
|
123
|
+
format=AudioFormat.S16,
|
|
124
|
+
channels=frame.num_channels,
|
|
125
|
+
)
|
|
126
|
+
await self._on_audio(pcm)
|
|
127
|
+
|
|
128
|
+
def _create_task(self, coro: Coroutine[None, None, None]) -> None:
|
|
129
|
+
task: asyncio.Task[None] = asyncio.create_task(coro)
|
|
130
|
+
self._tasks.add(task)
|
|
131
|
+
task.add_done_callback(self._tasks.discard)
|
|
132
|
+
task.add_done_callback(_task_done_callback)
|
vision_agents_plugins_liveavatar-0.6.0/vision_agents/plugins/liveavatar/liveavatar_websocket.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import base64
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import uuid
|
|
6
|
+
|
|
7
|
+
import websockets
|
|
8
|
+
from getstream.video.rtc.track_util import PcmData
|
|
9
|
+
from websockets.asyncio.client import ClientConnection
|
|
10
|
+
from websockets.exceptions import ConnectionClosed
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class LiveAvatarWebSocket:
|
|
16
|
+
"""Audio bridge to the LiveAvatar media server (LITE-mode events)."""
|
|
17
|
+
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
ws_url: str,
|
|
21
|
+
sample_rate: int = 24000,
|
|
22
|
+
num_channels: int = 1,
|
|
23
|
+
) -> None:
|
|
24
|
+
self._ws_url = ws_url
|
|
25
|
+
self._sample_rate = sample_rate
|
|
26
|
+
self._num_channels = num_channels
|
|
27
|
+
self._ws: ClientConnection | None = None
|
|
28
|
+
self._closed = False
|
|
29
|
+
self._reconnect_lock = asyncio.Lock()
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def connected(self) -> bool:
|
|
33
|
+
return self._ws is not None
|
|
34
|
+
|
|
35
|
+
async def connect(self) -> None:
|
|
36
|
+
if self.connected:
|
|
37
|
+
return
|
|
38
|
+
# ping_interval=None: continuous audio frames are themselves a
|
|
39
|
+
# liveness signal; the LiveAvatar media server stalls pong responses
|
|
40
|
+
# under load and the client tears down the conn (1011) otherwise.
|
|
41
|
+
self._ws = await websockets.connect(self._ws_url, ping_interval=None)
|
|
42
|
+
try:
|
|
43
|
+
await self._ws.send(
|
|
44
|
+
json.dumps(
|
|
45
|
+
{
|
|
46
|
+
"type": "start",
|
|
47
|
+
"encoding": "pcm_s16le",
|
|
48
|
+
"sample_rate": self._sample_rate,
|
|
49
|
+
"channels": self._num_channels,
|
|
50
|
+
}
|
|
51
|
+
)
|
|
52
|
+
)
|
|
53
|
+
except ConnectionClosed:
|
|
54
|
+
self._ws = None
|
|
55
|
+
raise
|
|
56
|
+
logger.info("liveavatar_ws connected url=%s", self._ws_url)
|
|
57
|
+
|
|
58
|
+
async def close(self) -> None:
|
|
59
|
+
self._closed = True
|
|
60
|
+
if self._ws is not None:
|
|
61
|
+
try:
|
|
62
|
+
await self._ws.close()
|
|
63
|
+
except ConnectionClosed:
|
|
64
|
+
pass
|
|
65
|
+
self._ws = None
|
|
66
|
+
|
|
67
|
+
async def send_audio_frame(self, pcm: PcmData) -> None:
|
|
68
|
+
pcm = pcm.resample(
|
|
69
|
+
target_sample_rate=self._sample_rate,
|
|
70
|
+
target_channels=self._num_channels,
|
|
71
|
+
)
|
|
72
|
+
b64 = base64.b64encode(pcm.to_bytes()).decode("ascii")
|
|
73
|
+
await self._send_json({"type": "agent.speak", "audio": b64})
|
|
74
|
+
|
|
75
|
+
async def end_turn(self) -> None:
|
|
76
|
+
await self._send_json({"type": "agent.speak_end"})
|
|
77
|
+
|
|
78
|
+
async def interrupt(self) -> None:
|
|
79
|
+
await self._send_json(
|
|
80
|
+
{"type": "agent.interrupt", "event_id": str(uuid.uuid4())}
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
async def _send_json(self, msg: dict[str, object]) -> None:
|
|
84
|
+
if self._closed:
|
|
85
|
+
raise RuntimeError("liveavatar_ws is closed")
|
|
86
|
+
if not self.connected:
|
|
87
|
+
await self.connect()
|
|
88
|
+
assert self._ws is not None
|
|
89
|
+
try:
|
|
90
|
+
await self._ws.send(json.dumps(msg))
|
|
91
|
+
except ConnectionClosed:
|
|
92
|
+
logger.warning("liveavatar_ws connection closed during send; reconnecting")
|
|
93
|
+
self._ws = None
|
|
94
|
+
await self._reconnect()
|
|
95
|
+
assert self._ws is not None
|
|
96
|
+
await self._ws.send(json.dumps(msg))
|
|
97
|
+
|
|
98
|
+
async def _reconnect(self) -> None:
|
|
99
|
+
async with self._reconnect_lock:
|
|
100
|
+
if self.connected or self._closed:
|
|
101
|
+
return
|
|
102
|
+
await self.connect()
|