vision-agents-plugins-getstream 0.0.17__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,32 @@
1
+ */__pycache__
2
+ */chat/__pycache__
3
+ */video/__pycache__
4
+ */chat/sync/__pycache__
5
+ */chat/async_/__pycache__
6
+ */sync/__pycache__
7
+ */async_/__pycache__
8
+ */video/sync/__pycache__
9
+ */model/__pycache__/
10
+ */cli/__pycache__
11
+ */cli/__pycache__
12
+ .env
13
+ .venv
14
+ .vscode/settings.json
15
+ *.pyc
16
+ dist/*
17
+ dist/*
18
+ *.log
19
+ .python-version
20
+ pyvenv.cfg
21
+ .idea*
22
+ bin/*
23
+ lib/*
24
+ shell.nix
25
+ pyrightconfig.json
26
+ .DS_Store
27
+
28
+ *.egg-info/
29
+ *.egg
30
+ *.pt
31
+ *.kef
32
+ .env.bak
@@ -0,0 +1,35 @@
1
+ Metadata-Version: 2.4
2
+ Name: vision-agents-plugins-getstream
3
+ Version: 0.0.17
4
+ Summary: GetStream video/voice integration for Vision Agents
5
+ Project-URL: Documentation, https://visionagents.ai/
6
+ Project-URL: Website, https://visionagents.ai/
7
+ Project-URL: Source, https://github.com/GetStream/Vision-Agents
8
+ License-Expression: MIT
9
+ Keywords: AI,agents,getstream,realtime,streaming,video,voice agents
10
+ Requires-Python: >=3.10
11
+ Requires-Dist: getstream[telemetry,webrtc]>=2.5.0
12
+ Requires-Dist: vision-agents
13
+ Description-Content-Type: text/markdown
14
+
15
+ # GetStream Plugin
16
+
17
+ A plugin for Stream Agents that provides GetStream integration.
18
+
19
+ ## Installation
20
+
21
+ ```bash
22
+ pip install vision-agents-plugins-getstream
23
+ ```
24
+
25
+ ## Usage
26
+
27
+ ```python
28
+ from vision_agents.plugins import getstream
29
+
30
+ # Use the plugin
31
+ ```
32
+
33
+ ## Development
34
+
35
+ This plugin follows the standard Stream Agents plugin structure.
@@ -0,0 +1,21 @@
1
+ # GetStream Plugin
2
+
3
+ A plugin for Stream Agents that provides GetStream integration.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install vision-agents-plugins-getstream
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```python
14
+ from vision_agents.plugins import getstream
15
+
16
+ # Use the plugin
17
+ ```
18
+
19
+ ## Development
20
+
21
+ This plugin follows the standard Stream Agents plugin structure.
@@ -0,0 +1,40 @@
1
+ [build-system]
2
+ requires = ["hatchling", "hatch-vcs"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "vision-agents-plugins-getstream"
7
+ dynamic = ["version"]
8
+ description = "GetStream video/voice integration for Vision Agents"
9
+ readme = "README.md"
10
+ keywords = ["getstream", "video", "realtime", "streaming", "AI", "voice agents", "agents"]
11
+ requires-python = ">=3.10"
12
+ license = "MIT"
13
+ dependencies = [
14
+ "vision-agents",
15
+ "getstream[webrtc,telemetry]>=2.5.0",
16
+ ]
17
+
18
+ [project.urls]
19
+ Documentation = "https://visionagents.ai/"
20
+ Website = "https://visionagents.ai/"
21
+ Source = "https://github.com/GetStream/Vision-Agents"
22
+
23
+ [tool.hatch.version]
24
+ source = "vcs"
25
+ raw-options = { root = "..", search_parent_directories = true, fallback_version = "0.0.0" }
26
+
27
+ [tool.hatch.build.targets.wheel]
28
+ packages = [".", "vision_agents"]
29
+
30
+ [tool.hatch.build.targets.sdist]
31
+ include = ["/vision_agents"]
32
+
33
+ [tool.uv.sources]
34
+ vision-agents = { workspace = true }
35
+
36
+ [dependency-groups]
37
+ dev = [
38
+ "pytest>=8.4.1",
39
+ "pytest-asyncio>=1.0.0",
40
+ ]
@@ -0,0 +1,9 @@
1
+ # GetStream plugin for Stream Agents
2
+ from .stream_conversation import StreamConversation as Conversation
3
+
4
+ from .stream_edge_transport import StreamEdge as Edge
5
+
6
+ from getstream import Stream as Client
7
+
8
+ __all__ = ["Conversation", "Edge", "Client"]
9
+
@@ -0,0 +1,236 @@
1
+ import logging
2
+ import threading
3
+ import queue
4
+ import time
5
+ from typing import List, Dict
6
+
7
+ from getstream.chat.client import ChatClient
8
+ from getstream.models import MessageRequest, ChannelResponse
9
+
10
+ from vision_agents.core.agents.conversation import InMemoryConversation, Message
11
+
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class StreamConversation(InMemoryConversation):
17
+ """
18
+ Persists the message history to a stream channel & messages
19
+ """
20
+ messages: List[Message]
21
+
22
+ # maps internal ids to stream message ids
23
+ internal_ids_to_stream_ids: Dict[str, str]
24
+
25
+ channel: ChannelResponse
26
+ chat_client: ChatClient
27
+
28
+ def __init__(self, instructions: str, messages: List[Message], channel: ChannelResponse, chat_client: ChatClient):
29
+ super().__init__(instructions, messages)
30
+ self.messages = messages
31
+ self.channel = channel
32
+ self.chat_client = chat_client
33
+ self.internal_ids_to_stream_ids = {}
34
+
35
+ # Initialize the worker thread for API calls
36
+ self._api_queue: queue.Queue = queue.Queue()
37
+ self._shutdown = False
38
+ self._worker_thread = threading.Thread(target=self._api_worker, daemon=True, name="StreamConversation-APIWorker")
39
+ self._worker_thread.start()
40
+ self._pending_operations = 0
41
+ self._operations_lock = threading.Lock()
42
+ logger.info(f"Started API worker thread for channel {channel.id}")
43
+
44
+ def _api_worker(self):
45
+ """Worker thread that processes Stream API calls."""
46
+ logger.debug("API worker thread started")
47
+ while not self._shutdown:
48
+ try:
49
+ # Get operation from queue with timeout to check shutdown periodically
50
+ operation = self._api_queue.get(timeout=0.1)
51
+
52
+ try:
53
+ op_type = operation["type"]
54
+ logger.debug(f"Processing API operation: {op_type}")
55
+
56
+ if op_type == "send_message":
57
+ response = self.chat_client.send_message(
58
+ operation["channel_type"],
59
+ operation["channel_id"],
60
+ operation["request"]
61
+ )
62
+ # Store the mapping
63
+ self.internal_ids_to_stream_ids[operation["internal_id"]] = response.data.message.id
64
+ operation["stream_id"] = response.data.message.id
65
+
66
+ elif op_type == "update_message_partial":
67
+ self.chat_client.update_message_partial(
68
+ operation["stream_id"],
69
+ user_id=operation["user_id"],
70
+ set=operation["set_data"]
71
+ )
72
+
73
+ elif op_type == "ephemeral_message_update":
74
+ self.chat_client.ephemeral_message_update(
75
+ operation["stream_id"],
76
+ user_id=operation["user_id"],
77
+ set=operation["set_data"]
78
+ )
79
+
80
+ logger.debug(f"Successfully processed API operation: {op_type}")
81
+
82
+ except Exception as e:
83
+ logger.error(f"Error processing API operation {operation.get('type', 'unknown')}: {e}")
84
+ # Continue processing other operations even if one fails
85
+
86
+ finally:
87
+ # Decrement pending operations counter
88
+ with self._operations_lock:
89
+ self._pending_operations -= 1
90
+
91
+ except queue.Empty:
92
+ # Timeout reached, loop back to check shutdown flag
93
+ continue
94
+ except Exception as e:
95
+ logger.error(f"Unexpected error in API worker thread: {e}")
96
+ time.sleep(0.1) # Brief pause before continuing
97
+
98
+ logger.debug("API worker thread shutting down")
99
+
100
+ def wait_for_pending_operations(self, timeout: float = 5.0) -> bool:
101
+ """Wait for all pending API operations to complete.
102
+
103
+ Args:
104
+ timeout: Maximum time to wait in seconds.
105
+
106
+ Returns:
107
+ True if all operations completed, False if timeout reached.
108
+ """
109
+ start_time = time.time()
110
+ while time.time() - start_time < timeout:
111
+ with self._operations_lock:
112
+ if self._pending_operations == 0:
113
+ return True
114
+ time.sleep(0.01) # Small sleep to avoid busy waiting
115
+
116
+ with self._operations_lock:
117
+ remaining = self._pending_operations
118
+ if remaining > 0:
119
+ logger.warning(f"Timeout waiting for {remaining} pending operations")
120
+ return False
121
+
122
+ def shutdown(self):
123
+ """Shutdown the worker thread gracefully."""
124
+ logger.info("Shutting down API worker thread")
125
+ self._shutdown = True
126
+ if self._worker_thread.is_alive():
127
+ self._worker_thread.join(timeout=2.0)
128
+ if self._worker_thread.is_alive():
129
+ logger.warning("API worker thread did not shut down cleanly")
130
+
131
+ def add_message(self, message: Message, completed: bool = True):
132
+ """Add a message to the Stream conversation.
133
+
134
+ Args:
135
+ message: The Message object to add
136
+ completed: If True, mark the message as completed using update_message_partial.
137
+ If False, mark as still generating using ephemeral_message_update.
138
+
139
+ Returns:
140
+ None (operations are processed asynchronously)
141
+ """
142
+ self.messages.append(message)
143
+
144
+ # Queue the send_message operation
145
+ request = MessageRequest(text=message.content, user_id=message.user_id)
146
+ send_op = {
147
+ "type": "send_message",
148
+ "channel_type": self.channel.type,
149
+ "channel_id": self.channel.id,
150
+ "request": request,
151
+ "internal_id": message.id,
152
+ }
153
+
154
+ # Increment pending operations counter
155
+ with self._operations_lock:
156
+ self._pending_operations += 1
157
+
158
+ self._api_queue.put(send_op)
159
+
160
+ # Queue the update operation (will use the stream_id once send_message completes)
161
+ # We need to wait for the send operation to complete first
162
+ # So we'll handle this in a second operation that waits for the stream_id
163
+ def queue_update_operation():
164
+ # Wait for the stream_id to be available
165
+ max_wait = 5.0
166
+ start_time = time.time()
167
+ while time.time() - start_time < max_wait:
168
+ stream_id = self.internal_ids_to_stream_ids.get(message.id if message.id else "")
169
+ if stream_id:
170
+ update_op = {
171
+ "type": "update_message_partial" if completed else "ephemeral_message_update",
172
+ "stream_id": stream_id,
173
+ "user_id": message.user_id,
174
+ "set_data": {"text": message.content, "generating": not completed},
175
+ }
176
+ with self._operations_lock:
177
+ self._pending_operations += 1
178
+ self._api_queue.put(update_op)
179
+ return
180
+ time.sleep(0.01)
181
+ logger.error(f"Timeout waiting for stream_id for message {message.id}")
182
+
183
+ # Queue the update in a separate thread to avoid blocking
184
+ threading.Thread(target=queue_update_operation, daemon=True).start()
185
+
186
+ def update_message(self, message_id: str, input_text: str, user_id: str, replace_content: bool, completed: bool):
187
+ """Update a message in the Stream conversation.
188
+
189
+ This method updates both the local message content and queues the Stream API sync.
190
+ If the message doesn't exist, it creates a new one.
191
+
192
+ Args:
193
+ message_id: The ID of the message to update
194
+ input_text: The text content to set or append
195
+ user_id: The ID of the user who owns the message
196
+ replace_content: If True, replace the entire message content. If False, append to existing content.
197
+ completed: If True, mark the message as completed using update_message_partial.
198
+ If False, mark as still generating using ephemeral_message_update.
199
+
200
+ Returns:
201
+ None (operations are processed asynchronously)
202
+ """
203
+ # First, update the local message using the superclass logic
204
+ super().update_message(message_id, input_text, user_id, replace_content, completed)
205
+
206
+ # Get the updated message for Stream API sync
207
+ message = self.lookup(message_id)
208
+ if message is None:
209
+ # This shouldn't happen after super().update_message, but handle gracefully
210
+ logger.warning(f"message {message_id} not found after update")
211
+ return None
212
+
213
+ stream_id = self.internal_ids_to_stream_ids.get(message_id)
214
+ if stream_id is None:
215
+ logger.warning(f"stream_id for message {message_id} not found, skipping Stream API update")
216
+ return None
217
+
218
+ # Queue the update operation
219
+ update_op = {
220
+ "type": "update_message_partial" if completed else "ephemeral_message_update",
221
+ "stream_id": stream_id,
222
+ "user_id": message.user_id,
223
+ "set_data": {"text": message.content, "generating": not completed},
224
+ }
225
+
226
+ with self._operations_lock:
227
+ self._pending_operations += 1
228
+
229
+ return self._api_queue.put(update_op)
230
+
231
+ def __del__(self):
232
+ """Cleanup when the conversation is destroyed."""
233
+ try:
234
+ self.shutdown()
235
+ except Exception as e:
236
+ logger.error(f"Error during StreamConversation cleanup: {e}")
@@ -0,0 +1,237 @@
1
+ import logging
2
+ import os
3
+ import webbrowser
4
+ from typing import Optional, TYPE_CHECKING
5
+ from urllib.parse import urlencode
6
+ from uuid import uuid4
7
+
8
+ import aiortc
9
+ from getstream import AsyncStream
10
+ from getstream.chat.async_client import ChatClient
11
+ from getstream.models import ChannelInput
12
+ from getstream.video import rtc
13
+ from getstream.chat.async_channel import Channel
14
+ from getstream.video.async_call import Call
15
+ from getstream.video.rtc import audio_track, ConnectionManager
16
+ from getstream.video.rtc.pb.stream.video.sfu.models.models_pb2 import TrackType, Participant
17
+ from getstream.video.rtc.track_util import PcmData
18
+ from getstream.video.rtc.tracks import TrackSubscriptionConfig, SubscriptionConfig
19
+
20
+ from vision_agents.plugins.getstream.stream_conversation import StreamConversation
21
+ from vision_agents.core.edge import EdgeTransport
22
+ from vision_agents.core.edge.types import Connection, User
23
+ from vision_agents.core.events.manager import EventManager
24
+ from vision_agents.core.edge import events
25
+ from vision_agents.core.utils import get_vision_agents_version
26
+
27
+ if TYPE_CHECKING:
28
+ from vision_agents.core.agents.agents import Agent
29
+
30
+
31
+ class StreamConnection(Connection):
32
+ def __init__(self, connection: ConnectionManager):
33
+ super().__init__()
34
+ # store the native connection object
35
+ self._connection = connection
36
+
37
+ async def close(self):
38
+ await self._connection.leave()
39
+
40
+ class StreamEdge(EdgeTransport):
41
+ """
42
+ StreamEdge uses getstream.io's edge network. To support multiple vendors, this means we expose
43
+
44
+ """
45
+ client: AsyncStream
46
+
47
+ def __init__(self, **kwargs):
48
+ # Initialize Stream client
49
+ super().__init__()
50
+ version = get_vision_agents_version()
51
+ self.client = AsyncStream(user_agent=f"vision-agents-{version}")
52
+ self.logger = logging.getLogger(self.__class__.__name__)
53
+ self.events = EventManager()
54
+ self.events.register_events_from_module(events)
55
+ self.channel: Optional[Channel] = None
56
+ self.conversation: Optional[StreamConversation] = None
57
+ self.channel_type = "videocall"
58
+
59
+ async def create_conversation(self, call: Call, user, instructions):
60
+ chat_client: ChatClient = call.client.stream.chat
61
+ self.channel = await chat_client.get_or_create_channel(
62
+ self.channel_type,
63
+ call.id,
64
+ data=ChannelInput(created_by_id=user.id),
65
+ )
66
+ self.conversation = StreamConversation(
67
+ instructions, [], self.channel.data.channel, chat_client
68
+ )
69
+ return self.conversation
70
+
71
+ async def create_user(self, user: User):
72
+ return await self.client.create_user(name=user.name, id=user.id)
73
+
74
+ async def join(self, agent: "Agent", call: Call) -> StreamConnection:
75
+ """
76
+ The logic for joining a call is different for each edge network/realtime audio/video provider
77
+
78
+ This function
79
+ - initializes the chat channel
80
+ - has the agent.agent_user join the call
81
+ - connect incoming audio/video to the agent
82
+ - connecting agent's outgoing audio/video to the call
83
+
84
+ TODO:
85
+ - process track flow
86
+
87
+ """
88
+ # Traditional mode - use WebRTC connection
89
+ # Configure subscription for audio and video
90
+ subscription_config = SubscriptionConfig(
91
+ default=self._get_subscription_config()
92
+ )
93
+
94
+ try:
95
+ # Open RTC connection and keep it alive for the duration of the returned context manager
96
+ connection = await rtc.join(
97
+ call, agent.agent_user.id, subscription_config=subscription_config
98
+ )
99
+ await connection.__aenter__() # TODO: weird API? there should be a manual version
100
+ except Exception:
101
+ raise
102
+
103
+ self._connection = connection
104
+
105
+ @self._connection.on("audio")
106
+ async def on_audio_received(pcm: PcmData, participant: Participant):
107
+ self.events.send(events.AudioReceivedEvent(
108
+ plugin_name="getstream",
109
+ pcm_data=pcm,
110
+ participant=participant,
111
+ user_metadata=participant
112
+ ))
113
+
114
+ @self._connection.on("track_added")
115
+ async def on_track(track_id, track_type, user):
116
+ # TODO: maybe make it easy to subscribe only to video tracks?
117
+ self.events.send(events.TrackAddedEvent(
118
+ plugin_name="getstream",
119
+ track_id=track_id,
120
+ track_type=track_type,
121
+ user=user,
122
+ user_metadata=user
123
+ ))
124
+
125
+ _, track = self._connection.subscriber_pc.track_map[track_id]
126
+ track.on("ended", lambda: self.events.send(events.TrackEndedEvent(
127
+ plugin_name="getstream",
128
+ track_id=track_id,
129
+ track_type=track_type,
130
+ user=user,
131
+ user_metadata=user
132
+ )))
133
+
134
+ @self._connection.on("call_ended")
135
+ async def call_ended(*args, **kwargs):
136
+ self.events.send(events.CallEndedEvent(
137
+ plugin_name="getstream",
138
+ args=args,
139
+ kwargs=kwargs
140
+ ))
141
+
142
+ standardize_connection = StreamConnection(connection)
143
+
144
+ return standardize_connection
145
+
146
+ def create_audio_track(self, framerate: int = 48000, stereo: bool = True):
147
+ return audio_track.AudioStreamTrack(framerate=framerate, stereo=stereo) # default to webrtc framerate
148
+
149
+ def create_video_track(self):
150
+ return aiortc.VideoStreamTrack()
151
+
152
+ def add_track_subscriber(self, track_id: str) -> Optional[aiortc.mediastreams.MediaStreamTrack]:
153
+ return self._connection.subscriber_pc.add_track_subscriber(track_id)
154
+
155
+ async def publish_tracks(self, audio_track, video_track):
156
+ """
157
+ Add the tracks to publish audio and video
158
+ """
159
+ await self._connection.add_tracks(audio=audio_track, video=video_track)
160
+ if audio_track:
161
+ self.logger.info("🤖 Agent ready to speak")
162
+ if video_track:
163
+ self.logger.info("🎥 Agent ready to publish video")
164
+ # In Realtime mode we directly publish the provider's output track; no extra forwarding needed
165
+
166
+ def _get_subscription_config(self):
167
+ return TrackSubscriptionConfig(
168
+ track_types=[
169
+ TrackType.TRACK_TYPE_VIDEO,
170
+ TrackType.TRACK_TYPE_AUDIO,
171
+ ]
172
+ )
173
+
174
+ def close(self):
175
+ # Note: Not calling super().close() as it's an abstract method with trivial body
176
+ pass
177
+
178
+ async def open_demo(self, call: Call) -> str:
179
+ client = call.client.stream
180
+
181
+ # Create a human user for testing
182
+ human_id = f"user-{uuid4()}"
183
+ name = "Human User"
184
+
185
+ # Create user token for browser access
186
+ token = client.create_token(human_id, expiration=3600)
187
+
188
+ """Helper function to open browser with Stream call link."""
189
+ base_url = (
190
+ f"{os.getenv('EXAMPLE_BASE_URL', 'https://getstream.io/video/demos')}/join/"
191
+ )
192
+ params = {
193
+ "api_key": client.api_key,
194
+ "token": token,
195
+ "skip_lobby": "true",
196
+ "user_name": name,
197
+ "video_encoder": "vp8",
198
+ "bitrate": 12000000,
199
+ "w": 1920,
200
+ "h": 1080,
201
+ # TODO: FPS..., aim at 60fps
202
+ }
203
+
204
+ url = f"{base_url}{call.id}?{urlencode(params)}"
205
+ print(f"🌐 Opening browser to: {url}")
206
+
207
+ try:
208
+ webbrowser.open(url)
209
+ print("✅ Browser opened successfully!")
210
+ except Exception as e:
211
+ print(f"❌ Failed to open browser: {e}")
212
+ print(f"Please manually open this URL: {url}")
213
+
214
+ return url
215
+
216
+ def open_pronto(self, api_key: str, token: str, call_id: str):
217
+ """Open browser with the video call URL."""
218
+ # Use the same URL pattern as the working workout assistant example
219
+ base_url = (
220
+ f"{os.getenv('EXAMPLE_BASE_URL', 'https://pronto-staging.getstream.io')}/join/"
221
+ )
222
+ params = {
223
+ "api_key": api_key,
224
+ "token": token,
225
+ "skip_lobby": "true",
226
+ "video_encoder": "vp8",
227
+ }
228
+
229
+ url = f"{base_url}{call_id}?{urlencode(params)}"
230
+ self.logger.info(f"🌐 Opening browser: {url}")
231
+
232
+ try:
233
+ webbrowser.open(url)
234
+ self.logger.info("✅ Browser opened successfully!")
235
+ except Exception as e:
236
+ self.logger.error(f"❌ Failed to open browser: {e}")
237
+ self.logger.info(f"Please manually open this URL: {url}")