vision-agents-plugins-lemonslice 0.3.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,96 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .cursor/*
7
+ # Distribution / packaging
8
+ .Python
9
+ build/
10
+ dist/
11
+ downloads/
12
+ develop-eggs/
13
+ eggs/
14
+ .eggs/
15
+ lib64/
16
+ parts/
17
+ sdist/
18
+ var/
19
+ wheels/
20
+ share/python-wheels/
21
+ pip-wheel-metadata/
22
+ MANIFEST
23
+ *.egg-info/
24
+ *.egg
25
+
26
+ # Installer logs
27
+ pip-log.txt
28
+ pip-delete-this-directory.txt
29
+
30
+ # Unit test / coverage reports
31
+ htmlcov/
32
+ .tox/
33
+ .nox/
34
+ .coverage
35
+ .coverage.*
36
+ .cache
37
+ coverage.xml
38
+ nosetests.xml
39
+ *.cover
40
+ *.py,cover
41
+ .hypothesis/
42
+ .pytest_cache/
43
+
44
+ # Type checker / lint caches
45
+ .mypy_cache/
46
+ .dmypy.json
47
+ dmypy.json
48
+ .pytype/
49
+ .pyre/
50
+ .ruff_cache/
51
+
52
+ # Environments
53
+ .venv
54
+ env/
55
+ venv/
56
+ ENV/
57
+ env.bak/
58
+ venv.bak/
59
+ .env
60
+ .env.local
61
+ .env.*.local
62
+ .env.bak
63
+ pyvenv.cfg
64
+ .python-version
65
+
66
+ # Editors / IDEs
67
+ .vscode/
68
+ .idea/
69
+
70
+ # Jupyter Notebook
71
+ .ipynb_checkpoints/
72
+
73
+ # OS / Misc
74
+ .DS_Store
75
+ *.log
76
+
77
+ # Tooling & repo-specific
78
+ pyrightconfig.json
79
+ shell.nix
80
+ bin/*
81
+ lib/*
82
+ stream-py/
83
+
84
+ # Example lock files (regenerated by uv sync)
85
+ examples/*/uv.lock
86
+ plugins/*/example/uv.lock
87
+
88
+ # Artifacts / assets
89
+ *.pt
90
+ *.kef
91
+ *.onnx
92
+ profile.html
93
+
94
+ /opencode.json
95
+ .ralph-tui/
96
+ .claude/
@@ -0,0 +1,133 @@
1
+ Metadata-Version: 2.4
2
+ Name: vision-agents-plugins-lemonslice
3
+ Version: 0.3.7
4
+ Summary: LemonSlice avatar plugin for Vision Agents
5
+ Project-URL: Documentation, https://visionagents.ai/
6
+ Project-URL: Website, https://visionagents.ai/
7
+ Project-URL: Source, https://github.com/GetStream/Vision-Agents
8
+ License-Expression: MIT
9
+ Keywords: AI,agents,avatars,lemonslice,voice agents
10
+ Requires-Python: >=3.10
11
+ Requires-Dist: httpx>=0.27.0
12
+ Requires-Dist: livekit-api>=1.0.0
13
+ Requires-Dist: livekit>=1.0.0
14
+ Requires-Dist: vision-agents
15
+ Description-Content-Type: text/markdown
16
+
17
+ # LemonSlice Avatar Plugin for Vision Agents
18
+
19
+ Add real-time interactive avatar video to your AI agents using LemonSlice's self-managed API.
20
+
21
+ ## Features
22
+
23
+ - Real-time avatar video synchronized with TTS audio
24
+ - Works with any TTS provider (Cartesia, ElevenLabs, etc.)
25
+ - Supports both standard and Realtime LLMs
26
+ - Customizable avatar expressions via agent prompts
27
+
28
+ ## Installation
29
+
30
+ ```bash
31
+ pip install vision-agents[lemonslice]
32
+ ```
33
+
34
+ Or with uv:
35
+
36
+ ```bash
37
+ uv pip install vision-agents[lemonslice]
38
+ ```
39
+
40
+ ## Quick Start
41
+
42
+ ```python
43
+ import asyncio
44
+ from uuid import uuid4
45
+ from dotenv import load_dotenv
46
+
47
+ from vision_agents.core import User, Agent
48
+ from vision_agents.plugins import cartesia, deepgram, getstream, gemini, lemonslice
49
+
50
+ load_dotenv()
51
+
52
+
53
+ async def start_avatar_agent():
54
+ agent = Agent(
55
+ edge=getstream.Edge(),
56
+ agent_user=User(name="AI Assistant with Avatar", id="agent"),
57
+ instructions="You're a friendly AI assistant.",
58
+
59
+ llm=gemini.LLM(),
60
+ tts=cartesia.TTS(),
61
+ stt=deepgram.STT(),
62
+
63
+ processors=[
64
+ lemonslice.LemonSliceAvatarPublisher(
65
+ agent_id="your-avatar-id",
66
+ )
67
+ ]
68
+ )
69
+
70
+ call = agent.edge.client.video.call("default", str(uuid4()))
71
+
72
+ async with agent.join(call):
73
+ await agent.simple_response("Hello! I'm your AI assistant with an avatar.")
74
+ await agent.finish()
75
+
76
+
77
+ if __name__ == "__main__":
78
+ asyncio.run(start_avatar_agent())
79
+ ```
80
+
81
+ ## Configuration
82
+
83
+ ### Environment Variables
84
+
85
+ ```bash
86
+ LEMONSLICE_API_KEY=your_lemonslice_api_key
87
+ # LemonSlice uses Livekit as a transport for audio and video
88
+ LIVEKIT_URL=wss://your-livekit-server.com
89
+ LIVEKIT_API_KEY=your_livekit_api_key
90
+ LIVEKIT_API_SECRET=your_livekit_api_secret
91
+ ```
92
+
93
+ ### AvatarPublisher Options
94
+
95
+ ```python
96
+ lemonslice.LemonSliceAvatarPublisher(
97
+ agent_id="your-avatar-id", # LemonSlice agent ID
98
+ agent_image_url=None, # Or provide a custom image URL (368x560px)
99
+ agent_prompt=None, # Prompt to influence avatar expressions/movements
100
+ api_key=None, # Optional: override LEMONSLICE_API_KEY env var
101
+ idle_timeout=None, # Session timeout in seconds
102
+ livekit_url=None, # Optional: override LIVEKIT_URL env var
103
+ livekit_api_key=None, # Optional: override LIVEKIT_API_KEY env var
104
+ livekit_api_secret=None, # Optional: override LIVEKIT_API_SECRET env var
105
+ width=1920, # Output video width in pixels
106
+ height=1080, # Output video height in pixels
107
+ )
108
+ ```
109
+
110
+ ## How It Works
111
+
112
+ 1. **LemonSlice Session**: Creates a session via LemonSlice API, and joins the LiveKit room as a participant
113
+ 2. **Audio Forwarding**: TTS audio is captured and sent to LemonSlice via the room
114
+ 3. **Avatar Generation**: LemonSlice generates synchronized avatar video and audio
115
+ 4. **Video Streaming**: Avatar video is streamed to call participants via GetStream Edge
116
+
117
+ ## Requirements
118
+
119
+ - Python 3.10+
120
+ - LemonSlice API key (get one at [lemonslice.com](https://lemonslice.com))
121
+ - LiveKit server (cloud or self-hosted)
122
+ - GetStream account for video calls
123
+ - TTS provider (Cartesia, ElevenLabs, etc.) or Realtime LLM
124
+
125
+ ## License
126
+
127
+ MIT
128
+
129
+ ## Links
130
+
131
+ - [Documentation](https://visionagents.ai/)
132
+ - [GitHub](https://github.com/GetStream/Vision-Agents)
133
+ - [LemonSlice Docs](https://lemonslice.com/docs/self-managed/overview)
@@ -0,0 +1,117 @@
1
+ # LemonSlice Avatar Plugin for Vision Agents
2
+
3
+ Add real-time interactive avatar video to your AI agents using LemonSlice's self-managed API.
4
+
5
+ ## Features
6
+
7
+ - Real-time avatar video synchronized with TTS audio
8
+ - Works with any TTS provider (Cartesia, ElevenLabs, etc.)
9
+ - Supports both standard and Realtime LLMs
10
+ - Customizable avatar expressions via agent prompts
11
+
12
+ ## Installation
13
+
14
+ ```bash
15
+ pip install vision-agents[lemonslice]
16
+ ```
17
+
18
+ Or with uv:
19
+
20
+ ```bash
21
+ uv pip install vision-agents[lemonslice]
22
+ ```
23
+
24
+ ## Quick Start
25
+
26
+ ```python
27
+ import asyncio
28
+ from uuid import uuid4
29
+ from dotenv import load_dotenv
30
+
31
+ from vision_agents.core import User, Agent
32
+ from vision_agents.plugins import cartesia, deepgram, getstream, gemini, lemonslice
33
+
34
+ load_dotenv()
35
+
36
+
37
+ async def start_avatar_agent():
38
+ agent = Agent(
39
+ edge=getstream.Edge(),
40
+ agent_user=User(name="AI Assistant with Avatar", id="agent"),
41
+ instructions="You're a friendly AI assistant.",
42
+
43
+ llm=gemini.LLM(),
44
+ tts=cartesia.TTS(),
45
+ stt=deepgram.STT(),
46
+
47
+ processors=[
48
+ lemonslice.LemonSliceAvatarPublisher(
49
+ agent_id="your-avatar-id",
50
+ )
51
+ ]
52
+ )
53
+
54
+ call = agent.edge.client.video.call("default", str(uuid4()))
55
+
56
+ async with agent.join(call):
57
+ await agent.simple_response("Hello! I'm your AI assistant with an avatar.")
58
+ await agent.finish()
59
+
60
+
61
+ if __name__ == "__main__":
62
+ asyncio.run(start_avatar_agent())
63
+ ```
64
+
65
+ ## Configuration
66
+
67
+ ### Environment Variables
68
+
69
+ ```bash
70
+ LEMONSLICE_API_KEY=your_lemonslice_api_key
71
+ # LemonSlice uses Livekit as a transport for audio and video
72
+ LIVEKIT_URL=wss://your-livekit-server.com
73
+ LIVEKIT_API_KEY=your_livekit_api_key
74
+ LIVEKIT_API_SECRET=your_livekit_api_secret
75
+ ```
76
+
77
+ ### AvatarPublisher Options
78
+
79
+ ```python
80
+ lemonslice.LemonSliceAvatarPublisher(
81
+ agent_id="your-avatar-id", # LemonSlice agent ID
82
+ agent_image_url=None, # Or provide a custom image URL (368x560px)
83
+ agent_prompt=None, # Prompt to influence avatar expressions/movements
84
+ api_key=None, # Optional: override LEMONSLICE_API_KEY env var
85
+ idle_timeout=None, # Session timeout in seconds
86
+ livekit_url=None, # Optional: override LIVEKIT_URL env var
87
+ livekit_api_key=None, # Optional: override LIVEKIT_API_KEY env var
88
+ livekit_api_secret=None, # Optional: override LIVEKIT_API_SECRET env var
89
+ width=1920, # Output video width in pixels
90
+ height=1080, # Output video height in pixels
91
+ )
92
+ ```
93
+
94
+ ## How It Works
95
+
96
+ 1. **LemonSlice Session**: Creates a session via LemonSlice API, and joins the LiveKit room as a participant
97
+ 2. **Audio Forwarding**: TTS audio is captured and sent to LemonSlice via the room
98
+ 3. **Avatar Generation**: LemonSlice generates synchronized avatar video and audio
99
+ 4. **Video Streaming**: Avatar video is streamed to call participants via GetStream Edge
100
+
101
+ ## Requirements
102
+
103
+ - Python 3.10+
104
+ - LemonSlice API key (get one at [lemonslice.com](https://lemonslice.com))
105
+ - LiveKit server (cloud or self-hosted)
106
+ - GetStream account for video calls
107
+ - TTS provider (Cartesia, ElevenLabs, etc.) or Realtime LLM
108
+
109
+ ## License
110
+
111
+ MIT
112
+
113
+ ## Links
114
+
115
+ - [Documentation](https://visionagents.ai/)
116
+ - [GitHub](https://github.com/GetStream/Vision-Agents)
117
+ - [LemonSlice Docs](https://lemonslice.com/docs/self-managed/overview)
@@ -0,0 +1,42 @@
1
+ [build-system]
2
+ requires = ["hatchling", "hatch-vcs"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "vision-agents-plugins-lemonslice"
7
+ dynamic = ["version"]
8
+ description = "LemonSlice avatar plugin for Vision Agents"
9
+ readme = "README.md"
10
+ keywords = ["lemonslice", "avatars", "AI", "voice agents", "agents"]
11
+ requires-python = ">=3.10"
12
+ license = "MIT"
13
+ dependencies = [
14
+ "vision-agents",
15
+ "livekit>=1.0.0",
16
+ "livekit-api>=1.0.0",
17
+ "httpx>=0.27.0",
18
+ ]
19
+
20
+ [project.urls]
21
+ Documentation = "https://visionagents.ai/"
22
+ Website = "https://visionagents.ai/"
23
+ Source = "https://github.com/GetStream/Vision-Agents"
24
+
25
+ [tool.hatch.version]
26
+ source = "vcs"
27
+ raw-options = { root = "..", search_parent_directories = true, fallback_version = "0.0.0" }
28
+
29
+ [tool.hatch.build.targets.wheel]
30
+ packages = [".", "vision_agents"]
31
+
32
+ [tool.hatch.build.targets.sdist]
33
+ include = ["/vision_agents"]
34
+
35
+ [tool.uv.sources]
36
+ vision-agents = { workspace = true }
37
+
38
+ [dependency-groups]
39
+ dev = [
40
+ "pytest>=8.4.1",
41
+ "pytest-asyncio>=1.0.0",
42
+ ]
@@ -0,0 +1,5 @@
1
+ from .lemonslice_avatar_publisher import LemonSliceAvatarPublisher
2
+
3
+ __all__ = [
4
+ "LemonSliceAvatarPublisher",
5
+ ]
@@ -0,0 +1,10 @@
1
+ class LemonSliceError(Exception):
2
+ """Base exception for LemonSlice API errors."""
3
+
4
+
5
+ class LemonSliceSessionError(LemonSliceError):
6
+ """Raised when session creation or management fails."""
7
+
8
+ def __init__(self, message: str, status_code: int | None = None):
9
+ self.status_code = status_code
10
+ super().__init__(message)
@@ -0,0 +1,160 @@
1
+ import asyncio
2
+ import logging
3
+ from typing import Any
4
+
5
+ import av
6
+ from getstream.video.rtc import audio_track
7
+ from getstream.video.rtc.track_util import PcmData
8
+ from vision_agents.core.llm.events import (
9
+ RealtimeAudioOutputDoneEvent,
10
+ RealtimeAudioOutputEvent,
11
+ )
12
+ from vision_agents.core.processors.base_processor import AudioPublisher, VideoPublisher
13
+ from vision_agents.core.tts.events import TTSAudioEvent
14
+ from vision_agents.core.utils.video_track import QueuedVideoTrack
15
+
16
+ from .lemonslice_client import LemonSliceClient
17
+ from .lemonslice_rtc_manager import LemonSliceRTCManager
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class LemonSliceAvatarPublisher(AudioPublisher, VideoPublisher):
23
+ """LemonSlice avatar video and audio publisher.
24
+
25
+ Sends TTS audio to LemonSlice over LiveKit and receives synchronized
26
+ avatar video and audio back.
27
+
28
+ For standard LLMs: LemonSlice provides both video and audio.
29
+ For Realtime LLMs: LemonSlice provides video only; LLM provides audio.
30
+ """
31
+
32
+ name = "lemonslice_avatar"
33
+
34
+ def __init__(
35
+ self,
36
+ agent_id: str | None = None,
37
+ agent_image_url: str | None = None,
38
+ agent_prompt: str | None = None,
39
+ idle_timeout: int | None = None,
40
+ api_key: str | None = None,
41
+ base_url: str | None = None,
42
+ livekit_url: str | None = None,
43
+ livekit_api_key: str | None = None,
44
+ livekit_api_secret: str | None = None,
45
+ width: int = 1920,
46
+ height: int = 1080,
47
+ ):
48
+ """Initialize the LemonSlice avatar publisher.
49
+
50
+ Args:
51
+ agent_id: LemonSlice agent ID.
52
+ agent_image_url: URL of the agent's avatar image.
53
+ agent_prompt: Prompt describing the agent's persona.
54
+ idle_timeout: Seconds before an idle session is closed.
55
+ api_key: LemonSlice API key. Uses LEMONSLICE_API_KEY env var if not provided.
56
+ base_url: LemonSlice API base URL override.
57
+ livekit_url: LiveKit server URL. Uses LIVEKIT_URL env var if not provided.
58
+ livekit_api_key: LiveKit API key. Uses LIVEKIT_API_KEY env var if not provided.
59
+ livekit_api_secret: LiveKit API secret. Uses LIVEKIT_API_SECRET env var if not provided.
60
+ width: Output video width in pixels.
61
+ height: Output video height in pixels.
62
+ """
63
+ client_kwargs: dict[str, Any] = {
64
+ "agent_id": agent_id,
65
+ "agent_image_url": agent_image_url,
66
+ "agent_prompt": agent_prompt,
67
+ "idle_timeout": idle_timeout,
68
+ "api_key": api_key,
69
+ }
70
+ if base_url is not None:
71
+ client_kwargs["base_url"] = base_url
72
+
73
+ self._client = LemonSliceClient(**client_kwargs)
74
+ self._rtc_manager = LemonSliceRTCManager(
75
+ on_video=self._on_video_frame,
76
+ on_audio=self._on_audio_frame,
77
+ on_disconnect=self._on_disconnect,
78
+ livekit_url=livekit_url,
79
+ livekit_api_key=livekit_api_key,
80
+ livekit_api_secret=livekit_api_secret,
81
+ )
82
+ self._video_track = QueuedVideoTrack(width=width, height=height)
83
+ self._audio_track = audio_track.AudioStreamTrack(
84
+ sample_rate=48000, channels=2, format="s16"
85
+ )
86
+
87
+ self._connected = False
88
+ self._agent: Any = None
89
+ self._send_lock = asyncio.Lock()
90
+
91
+ logger.debug(f"LemonSlice AvatarPublisher initialized ({width}x{height})")
92
+
93
+ def publish_video_track(self) -> QueuedVideoTrack:
94
+ return self._video_track
95
+
96
+ def publish_audio_track(self) -> audio_track.AudioStreamTrack:
97
+ return self._audio_track
98
+
99
+ def attach_agent(self, agent: Any) -> None:
100
+ self._agent = agent
101
+ self._subscribe_to_audio_events()
102
+
103
+ async def start(self) -> None:
104
+ """Connect to LemonSlice. Called by Agent via _apply("start") during join()."""
105
+ await self._connect()
106
+
107
+ async def close(self) -> None:
108
+ self._video_track.stop()
109
+ try:
110
+ await self._rtc_manager.close()
111
+ except Exception as exc:
112
+ logger.warning(f"Failed to close LemonSlice RTC manager: {exc}")
113
+ finally:
114
+ await self._client.close()
115
+ self._connected = False
116
+ logger.debug("LemonSlice avatar publisher closed")
117
+
118
+ def _subscribe_to_audio_events(self) -> None:
119
+ @self._agent.events.subscribe
120
+ async def on_tts_audio(event: TTSAudioEvent):
121
+ # Use the lock because TTS events arrive asynchronously
122
+ async with self._send_lock:
123
+ if event.data is not None:
124
+ await self._rtc_manager.send_audio(event.data)
125
+ if event.is_final_chunk:
126
+ await self._rtc_manager.flush()
127
+
128
+ @self._agent.events.subscribe
129
+ async def on_realtime_audio(event: RealtimeAudioOutputEvent):
130
+ async with self._send_lock:
131
+ if event.data is not None:
132
+ await self._rtc_manager.send_audio(event.data)
133
+
134
+ @self._agent.events.subscribe
135
+ async def on_realtime_audio_done(_: RealtimeAudioOutputDoneEvent):
136
+ async with self._send_lock:
137
+ await self._rtc_manager.flush()
138
+
139
+ async def _connect(self) -> None:
140
+ credentials = self._rtc_manager.generate_credentials()
141
+ await self._rtc_manager.connect(credentials)
142
+ try:
143
+ await self._client.create_session(
144
+ credentials.livekit_url, credentials.livekit_token
145
+ )
146
+ self._connected = True
147
+ logger.info("LemonSlice avatar connection established")
148
+ except Exception:
149
+ logger.exception("Failed to create a LemonSlice session")
150
+ await self._rtc_manager.close()
151
+
152
+ async def _on_video_frame(self, frame: av.VideoFrame) -> None:
153
+ await self._video_track.add_frame(frame)
154
+
155
+ async def _on_audio_frame(self, pcm: PcmData) -> None:
156
+ await self._audio_track.write(pcm)
157
+
158
+ async def _on_disconnect(self) -> None:
159
+ logger.info("LemonSlice disconnected")
160
+ self._connected = False
@@ -0,0 +1,118 @@
1
+ import logging
2
+ from os import getenv
3
+
4
+ import httpx
5
+
6
+ from .exceptions import LemonSliceSessionError
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ DEFAULT_BASE_URL = "https://lemonslice.com/api/liveai"
11
+
12
+
13
+ class LemonSliceClient:
14
+ """REST API client for LemonSlice session management.
15
+
16
+ Handles authentication and session creation with LemonSlice's
17
+ self-managed API for real-time avatar generation.
18
+ """
19
+
20
+ def __init__(
21
+ self,
22
+ agent_id: str | None = None,
23
+ agent_image_url: str | None = None,
24
+ agent_prompt: str | None = None,
25
+ idle_timeout: int | None = None,
26
+ api_key: str | None = None,
27
+ base_url: str = DEFAULT_BASE_URL,
28
+ ):
29
+ """Initialize the LemonSlice client.
30
+
31
+ Args:
32
+ agent_id: LemonSlice agent ID.
33
+ agent_image_url: Custom agent image URL (368x560px recommended).
34
+ agent_prompt: Prompt influencing avatar expressions and movements.
35
+ idle_timeout: Session timeout in seconds.
36
+ api_key: LemonSlice API key. Uses LEMONSLICE_API_KEY env var if not provided.
37
+ base_url: LemonSlice API base URL.
38
+ """
39
+ if not agent_id and not agent_image_url:
40
+ raise ValueError("Either agent_id or agent_image_url must be provided.")
41
+
42
+ self._api_key: str = api_key or getenv("LEMONSLICE_API_KEY") or ""
43
+ if not self._api_key:
44
+ raise ValueError(
45
+ "LemonSlice API key required. Set LEMONSLICE_API_KEY environment "
46
+ "variable or pass api_key parameter."
47
+ )
48
+
49
+ self._agent_id = agent_id
50
+ self._agent_image_url = agent_image_url
51
+ self._agent_prompt = agent_prompt
52
+ self._idle_timeout = idle_timeout
53
+ self._session_id: str | None = None
54
+ self._http_client = httpx.AsyncClient(
55
+ base_url=base_url,
56
+ headers={
57
+ "X-API-Key": self._api_key,
58
+ "Content-Type": "application/json",
59
+ },
60
+ )
61
+
62
+ @property
63
+ def session_id(self) -> str | None:
64
+ return self._session_id
65
+
66
+ async def create_session(self, livekit_url: str, livekit_token: str) -> str:
67
+ """Create a new LemonSlice avatar session.
68
+
69
+ Args:
70
+ livekit_url: LiveKit server URL for the avatar to connect to.
71
+ livekit_token: LiveKit access token for the avatar participant.
72
+
73
+ Returns:
74
+ The created session ID.
75
+ """
76
+ payload: dict[str, object] = {
77
+ "transport_type": "livekit",
78
+ "properties": {
79
+ "livekit_url": livekit_url,
80
+ "livekit_token": livekit_token,
81
+ },
82
+ }
83
+
84
+ if self._agent_id:
85
+ payload["agent_id"] = self._agent_id
86
+ if self._agent_image_url:
87
+ payload["agent_image_url"] = self._agent_image_url
88
+ if self._agent_prompt:
89
+ payload["agent_prompt"] = self._agent_prompt
90
+ if self._idle_timeout is not None:
91
+ payload["idle_timeout"] = self._idle_timeout
92
+
93
+ response = await self._http_client.post("/sessions", json=payload)
94
+
95
+ if response.status_code != 201:
96
+ raise LemonSliceSessionError(
97
+ f"Failed to create session: {response.status_code} - {response.text}",
98
+ status_code=response.status_code,
99
+ )
100
+
101
+ data = response.json()
102
+ self._session_id = data.get("session_id")
103
+
104
+ if not self._session_id:
105
+ raise LemonSliceSessionError(
106
+ f"Session creation returned no session_id: {data}"
107
+ )
108
+
109
+ logger.info(f"LemonSlice session created: {self._session_id}")
110
+ return self._session_id
111
+
112
+ async def close(self) -> None:
113
+ """Clean up client resources."""
114
+ try:
115
+ await self._http_client.aclose()
116
+ finally:
117
+ self._session_id = None
118
+ logger.debug("LemonSlice client closed")
@@ -0,0 +1,268 @@
1
+ import asyncio
2
+ import logging
3
+ from dataclasses import dataclass
4
+ from os import getenv
5
+ from typing import Callable, Coroutine
6
+ from uuid import uuid4
7
+
8
+ import av
9
+ from getstream.video.rtc.track_util import AudioFormat, PcmData
10
+ from livekit import api, rtc
11
+ from PIL import Image
12
+ from vision_agents.core.utils.utils import cancel_and_wait
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ _AUDIO_STREAM_TOPIC = "lk.audio_stream"
17
+ _AVATAR_IDENTITY = "avatar"
18
+ _PLUGIN_IDENTITY = "plugin"
19
+ _SAMPLE_RATE = 16000
20
+ _NUM_CHANNELS = 1
21
+
22
+
23
+ @dataclass(frozen=True)
24
+ class ConnectionCredentials:
25
+ """All credentials needed for a LemonSlice LiveKit session."""
26
+
27
+ room_name: str
28
+ agent_token: str
29
+ livekit_url: str
30
+ livekit_token: str
31
+
32
+
33
+ class LemonSliceRTCManager:
34
+ """Manages a LiveKit room connection for LemonSlice avatar streaming.
35
+
36
+ Creates a LiveKit room, sends TTS audio to LemonSlice via data streams,
37
+ and receives synchronized avatar video and audio tracks.
38
+ """
39
+
40
+ def __init__(
41
+ self,
42
+ on_video: Callable[[av.VideoFrame], Coroutine[None, None, None]],
43
+ on_audio: Callable[[PcmData], Coroutine[None, None, None]],
44
+ on_disconnect: Callable[[], Coroutine[None, None, None]],
45
+ livekit_url: str | None = None,
46
+ livekit_api_key: str | None = None,
47
+ livekit_api_secret: str | None = None,
48
+ ):
49
+ self._livekit_url = livekit_url or getenv("LIVEKIT_URL") or ""
50
+ if not self._livekit_url:
51
+ raise ValueError(
52
+ "LiveKit URL required. Set LIVEKIT_URL environment variable "
53
+ "or pass livekit_url parameter."
54
+ )
55
+
56
+ self._livekit_api_key = livekit_api_key or getenv("LIVEKIT_API_KEY") or ""
57
+ self._livekit_api_secret = (
58
+ livekit_api_secret or getenv("LIVEKIT_API_SECRET") or ""
59
+ )
60
+ if not self._livekit_api_key or not self._livekit_api_secret:
61
+ raise ValueError(
62
+ "LiveKit API key and secret required. Set LIVEKIT_API_KEY and "
63
+ "LIVEKIT_API_SECRET environment variables or pass them as parameters."
64
+ )
65
+
66
+ self._on_video = on_video
67
+ self._on_audio = on_audio
68
+ self._on_disconnect = on_disconnect
69
+
70
+ self._room: rtc.Room | None = None
71
+ self._stream_writer: rtc.ByteStreamWriter | None = None
72
+ self._connected = False
73
+ self._tasks: set[asyncio.Task[None]] = set()
74
+
75
+ @property
76
+ def is_connected(self) -> bool:
77
+ return self._connected
78
+
79
+ def generate_credentials(self) -> ConnectionCredentials:
80
+ """Generate credentials for a new LiveKit room session.
81
+
82
+ Returns:
83
+ Credentials for both the agent and the LemonSlice participant.
84
+ """
85
+ room_name = f"lemonslice-{uuid4()}"
86
+ agent_token = self._generate_token(room_name, _PLUGIN_IDENTITY, kind="agent")
87
+ lemonslice_token = self._generate_token(
88
+ room_name, _AVATAR_IDENTITY, kind="agent"
89
+ )
90
+ return ConnectionCredentials(
91
+ room_name=room_name,
92
+ agent_token=agent_token,
93
+ livekit_url=self._livekit_url,
94
+ livekit_token=lemonslice_token,
95
+ )
96
+
97
+ async def connect(self, credentials: ConnectionCredentials) -> None:
98
+ """Connect to a LiveKit room.
99
+
100
+ Args:
101
+ credentials: Connection credentials from generate_credentials().
102
+ """
103
+ room = rtc.Room()
104
+
105
+ @room.on("connected")
106
+ def on_connected():
107
+ logger.info("Room connected")
108
+
109
+ @room.on("participant_connected")
110
+ def on_participant_connected(participant: rtc.RemoteParticipant):
111
+ if participant.identity == _AVATAR_IDENTITY:
112
+ logger.info("LemonSlice avatar entered the room")
113
+
114
+ @room.on("track_subscribed")
115
+ def on_track_subscribed(
116
+ track: rtc.Track,
117
+ publication: rtc.RemoteTrackPublication,
118
+ participant: rtc.RemoteParticipant,
119
+ ) -> None:
120
+ if participant.identity == _AVATAR_IDENTITY:
121
+ if track.kind == rtc.TrackKind.KIND_VIDEO:
122
+ logger.info("Received video track from LemonSlice")
123
+ video_stream = rtc.VideoStream(track)
124
+ self._create_task(self._consume_video(video_stream))
125
+ elif track.kind == rtc.TrackKind.KIND_AUDIO:
126
+ logger.info("Received audio track from LemonSlice")
127
+ audio_stream = rtc.AudioStream(
128
+ track, sample_rate=48000, num_channels=2
129
+ )
130
+ self._create_task(self._consume_audio(audio_stream))
131
+
132
+ @room.on("participant_disconnected")
133
+ def on_participant_disconnected(participant: rtc.RemoteParticipant) -> None:
134
+ logger.info(
135
+ f"Participant disconnected: {participant.identity}; "
136
+ f"reason: {participant.disconnect_reason}"
137
+ )
138
+ self._connected = False
139
+ self._create_task(self._on_disconnect())
140
+ if self._room is not None:
141
+ self._create_task(self._room.disconnect())
142
+
143
+ @room.on("disconnected")
144
+ def on_disconnected(reason: str) -> None:
145
+ # The "disconnected" callback may be triggered multiple times
146
+ # because we disconnect ourselves when the avatar leaves the call.
147
+ if self._connected:
148
+ logger.info(f"Room disconnected; reason: {reason}")
149
+ self._connected = False
150
+ self._create_task(self._on_disconnect())
151
+
152
+ logger.info(f"Connecting to LiveKit room {credentials.room_name}")
153
+ await room.connect(self._livekit_url, credentials.agent_token)
154
+ logger.info(f"Connected to LiveKit room {credentials.room_name}")
155
+
156
+ room.local_participant.register_rpc_method(
157
+ "lk.playback_finished", self._rpc_on_playback_finished
158
+ )
159
+
160
+ self._room = room
161
+ self._connected = True
162
+
163
+ async def send_audio(self, pcm: PcmData) -> None:
164
+ """Send a PCM audio chunk to LemonSlice via a LiveKit byte stream.
165
+
166
+ Args:
167
+ pcm: Audio data to send. Resampled to 16 kHz mono automatically.
168
+ """
169
+ if self._room is None or not self._room.isconnected():
170
+ return
171
+
172
+ if pcm.sample_rate != _SAMPLE_RATE or pcm.channels != _NUM_CHANNELS:
173
+ pcm = pcm.resample(
174
+ target_sample_rate=_SAMPLE_RATE,
175
+ target_channels=_NUM_CHANNELS,
176
+ )
177
+
178
+ if self._stream_writer is None:
179
+ self._stream_writer = await self._room.local_participant.stream_bytes(
180
+ name=f"AUDIO_{uuid4()}",
181
+ topic=_AUDIO_STREAM_TOPIC,
182
+ destination_identities=[_AVATAR_IDENTITY],
183
+ attributes={
184
+ "sample_rate": str(pcm.sample_rate),
185
+ "num_channels": str(pcm.channels),
186
+ },
187
+ )
188
+ logger.debug("Opened audio byte stream to LemonSlice")
189
+
190
+ await self._stream_writer.write(pcm.to_bytes())
191
+
192
+ async def flush(self) -> None:
193
+ """Close the current byte stream, signalling end of a TTS segment."""
194
+ if self._stream_writer is not None:
195
+ await self._stream_writer.aclose()
196
+ self._stream_writer = None
197
+ logger.debug("Closed audio byte stream (segment end)")
198
+
199
+ async def close(self) -> None:
200
+ """Disconnect from the LiveKit room and clean up resources."""
201
+ try:
202
+ if self._stream_writer is not None:
203
+ await self._stream_writer.aclose()
204
+
205
+ await cancel_and_wait(*self._tasks)
206
+ self._tasks.clear()
207
+
208
+ if self._room is not None:
209
+ await self._room.disconnect()
210
+ finally:
211
+ self._room = None
212
+ self._stream_writer = None
213
+ self._connected = False
214
+ logger.debug("LemonSlice RTC manager closed")
215
+
216
+ async def _consume_video(self, video_stream: rtc.VideoStream) -> None:
217
+ async for event in video_stream:
218
+ lk_frame = event.frame.convert(rtc.VideoBufferType.RGBA)
219
+ img = Image.frombuffer(
220
+ "RGBA", (lk_frame.width, lk_frame.height), lk_frame.data
221
+ )
222
+ frame = av.VideoFrame.from_image(img)
223
+ await self._on_video(frame)
224
+
225
+ async def _consume_audio(self, audio_stream: rtc.AudioStream) -> None:
226
+ async for event in audio_stream:
227
+ frame = event.frame
228
+ pcm = PcmData.from_bytes(
229
+ frame.data, # type: ignore[arg-type]
230
+ sample_rate=frame.sample_rate,
231
+ format=AudioFormat.S16,
232
+ channels=frame.num_channels,
233
+ )
234
+ await self._on_audio(pcm)
235
+
236
+ def _rpc_on_playback_finished(self, data: rtc.RpcInvocationData) -> str:
237
+ logger.info(
238
+ "playback finished event received",
239
+ extra={"caller_identity": data.caller_identity},
240
+ )
241
+ return "ok"
242
+
243
+ def _generate_token(
244
+ self,
245
+ room_name: str,
246
+ identity: str,
247
+ kind: api.AccessToken.ParticipantKind,
248
+ ) -> str:
249
+ token = (
250
+ api.AccessToken(self._livekit_api_key, self._livekit_api_secret)
251
+ .with_kind(kind)
252
+ .with_identity(identity)
253
+ .with_name(identity)
254
+ .with_grants(
255
+ api.VideoGrants(
256
+ room_join=True,
257
+ room=room_name,
258
+ can_publish=True,
259
+ can_subscribe=True,
260
+ )
261
+ )
262
+ )
263
+ return token.to_jwt()
264
+
265
+ def _create_task(self, coro: Coroutine[None, None, None]) -> None:
266
+ task: asyncio.Task[None] = asyncio.create_task(coro)
267
+ self._tasks.add(task)
268
+ task.add_done_callback(self._tasks.discard)