PyPI - vision-agents-plugins-getstream - Versions diffs - 0.0.17__tar.gz - Mend

vision-agents-plugins-getstream 0.0.17__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

vision_agents_plugins_getstream-0.0.17/.gitignore ADDED Viewed

@@ -0,0 +1,32 @@
+*/__pycache__
+*/chat/__pycache__
+*/video/__pycache__
+*/chat/sync/__pycache__
+*/chat/async_/__pycache__
+*/sync/__pycache__
+*/async_/__pycache__
+*/video/sync/__pycache__
+*/model/__pycache__/
+*/cli/__pycache__
+*/cli/__pycache__
+.env
+.venv
+.vscode/settings.json
+*.pyc
+dist/*
+dist/*
+*.log
+.python-version
+pyvenv.cfg
+.idea*
+bin/*
+lib/*
+shell.nix
+pyrightconfig.json
+.DS_Store
+*.egg-info/
+*.egg
+*.pt
+*.kef
+.env.bak

vision_agents_plugins_getstream-0.0.17/PKG-INFO ADDED Viewed

@@ -0,0 +1,35 @@
+Metadata-Version: 2.4
+Name: vision-agents-plugins-getstream
+Version: 0.0.17
+Summary: GetStream video/voice integration for Vision Agents
+Project-URL: Documentation, https://visionagents.ai/
+Project-URL: Website, https://visionagents.ai/
+Project-URL: Source, https://github.com/GetStream/Vision-Agents
+License-Expression: MIT
+Keywords: AI,agents,getstream,realtime,streaming,video,voice agents
+Requires-Python: >=3.10
+Requires-Dist: getstream[telemetry,webrtc]>=2.5.0
+Requires-Dist: vision-agents
+Description-Content-Type: text/markdown
+# GetStream Plugin
+A plugin for Stream Agents that provides GetStream integration.
+## Installation
+```bash
+pip install vision-agents-plugins-getstream
+```
+## Usage
+```python
+from vision_agents.plugins import getstream
+# Use the plugin
+```
+## Development
+This plugin follows the standard Stream Agents plugin structure.

vision_agents_plugins_getstream-0.0.17/README.md ADDED Viewed

@@ -0,0 +1,21 @@
+# GetStream Plugin
+A plugin for Stream Agents that provides GetStream integration.
+## Installation
+```bash
+pip install vision-agents-plugins-getstream
+```
+## Usage
+```python
+from vision_agents.plugins import getstream
+# Use the plugin
+```
+## Development
+This plugin follows the standard Stream Agents plugin structure.

vision_agents_plugins_getstream-0.0.17/pyproject.toml ADDED Viewed

@@ -0,0 +1,40 @@
+[build-system]
+requires = ["hatchling", "hatch-vcs"]
+build-backend = "hatchling.build"
+[project]
+name = "vision-agents-plugins-getstream"
+dynamic = ["version"]
+description = "GetStream video/voice integration for Vision Agents"
+readme = "README.md"
+keywords = ["getstream", "video", "realtime", "streaming", "AI", "voice agents", "agents"]
+requires-python = ">=3.10"
+license = "MIT"
+dependencies = [
+    "vision-agents",
+    "getstream[webrtc,telemetry]>=2.5.0",
+]
+[project.urls]
+Documentation = "https://visionagents.ai/"
+Website = "https://visionagents.ai/"
+Source = "https://github.com/GetStream/Vision-Agents"
+[tool.hatch.version]
+source = "vcs"
+raw-options = { root = "..", search_parent_directories = true, fallback_version = "0.0.0" }
+[tool.hatch.build.targets.wheel]
+packages = [".", "vision_agents"]
+[tool.hatch.build.targets.sdist]
+include = ["/vision_agents"]
+[tool.uv.sources]
+vision-agents = { workspace = true }
+[dependency-groups]
+dev = [
+    "pytest>=8.4.1",
+    "pytest-asyncio>=1.0.0",
+]

vision_agents_plugins_getstream-0.0.17/vision_agents/plugins/getstream/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+# GetStream plugin for Stream Agents
+from .stream_conversation import StreamConversation as Conversation
+from .stream_edge_transport import StreamEdge as Edge
+from getstream import Stream as Client
+__all__ = ["Conversation", "Edge", "Client"]

vision_agents_plugins_getstream-0.0.17/vision_agents/plugins/getstream/stream_conversation.py ADDED Viewed

@@ -0,0 +1,236 @@
+import logging
+import threading
+import queue
+import time
+from typing import List, Dict
+from getstream.chat.client import ChatClient
+from getstream.models import MessageRequest, ChannelResponse
+from vision_agents.core.agents.conversation import InMemoryConversation, Message
+logger = logging.getLogger(__name__)
+class StreamConversation(InMemoryConversation):
+    """
+    Persists the message history to a stream channel & messages
+    """
+    messages: List[Message]
+    # maps internal ids to stream message ids
+    internal_ids_to_stream_ids: Dict[str, str]
+    channel: ChannelResponse
+    chat_client: ChatClient
+    def __init__(self, instructions: str, messages: List[Message], channel: ChannelResponse, chat_client: ChatClient):
+        super().__init__(instructions, messages)
+        self.messages = messages
+        self.channel = channel
+        self.chat_client = chat_client
+        self.internal_ids_to_stream_ids = {}
+        # Initialize the worker thread for API calls
+        self._api_queue: queue.Queue = queue.Queue()
+        self._shutdown = False
+        self._worker_thread = threading.Thread(target=self._api_worker, daemon=True, name="StreamConversation-APIWorker")
+        self._worker_thread.start()
+        self._pending_operations = 0
+        self._operations_lock = threading.Lock()
+        logger.info(f"Started API worker thread for channel {channel.id}")
+    def _api_worker(self):
+        """Worker thread that processes Stream API calls."""
+        logger.debug("API worker thread started")
+        while not self._shutdown:
+            try:
+                # Get operation from queue with timeout to check shutdown periodically
+                operation = self._api_queue.get(timeout=0.1)
+                try:
+                    op_type = operation["type"]
+                    logger.debug(f"Processing API operation: {op_type}")
+                    if op_type == "send_message":
+                        response = self.chat_client.send_message(
+                            operation["channel_type"],
+                            operation["channel_id"],
+                            operation["request"]
+                        )
+                        # Store the mapping
+                        self.internal_ids_to_stream_ids[operation["internal_id"]] = response.data.message.id
+                        operation["stream_id"] = response.data.message.id
+                    elif op_type == "update_message_partial":
+                        self.chat_client.update_message_partial(
+                            operation["stream_id"],
+                            user_id=operation["user_id"],
+                            set=operation["set_data"]
+                        )
+                    elif op_type == "ephemeral_message_update":
+                        self.chat_client.ephemeral_message_update(
+                            operation["stream_id"],
+                            user_id=operation["user_id"],
+                            set=operation["set_data"]
+                        )
+                    logger.debug(f"Successfully processed API operation: {op_type}")
+                except Exception as e:
+                    logger.error(f"Error processing API operation {operation.get('type', 'unknown')}: {e}")
+                    # Continue processing other operations even if one fails
+                finally:
+                    # Decrement pending operations counter
+                    with self._operations_lock:
+                        self._pending_operations -= 1
+            except queue.Empty:
+                # Timeout reached, loop back to check shutdown flag
+                continue
+            except Exception as e:
+                logger.error(f"Unexpected error in API worker thread: {e}")
+                time.sleep(0.1)  # Brief pause before continuing
+        logger.debug("API worker thread shutting down")
+    def wait_for_pending_operations(self, timeout: float = 5.0) -> bool:
+        """Wait for all pending API operations to complete.
+        Args:
+            timeout: Maximum time to wait in seconds.
+        Returns:
+            True if all operations completed, False if timeout reached.
+        """
+        start_time = time.time()
+        while time.time() - start_time < timeout:
+            with self._operations_lock:
+                if self._pending_operations == 0:
+                    return True
+            time.sleep(0.01)  # Small sleep to avoid busy waiting
+        with self._operations_lock:
+            remaining = self._pending_operations
+        if remaining > 0:
+            logger.warning(f"Timeout waiting for {remaining} pending operations")
+        return False
+    def shutdown(self):
+        """Shutdown the worker thread gracefully."""
+        logger.info("Shutting down API worker thread")
+        self._shutdown = True
+        if self._worker_thread.is_alive():
+            self._worker_thread.join(timeout=2.0)
+            if self._worker_thread.is_alive():
+                logger.warning("API worker thread did not shut down cleanly")
+    def add_message(self, message: Message, completed: bool = True):
+        """Add a message to the Stream conversation.
+        Args:
+            message: The Message object to add
+            completed: If True, mark the message as completed using update_message_partial.
+                      If False, mark as still generating using ephemeral_message_update.
+        Returns:
+            None (operations are processed asynchronously)
+        """
+        self.messages.append(message)
+        # Queue the send_message operation
+        request = MessageRequest(text=message.content, user_id=message.user_id)
+        send_op = {
+            "type": "send_message",
+            "channel_type": self.channel.type,
+            "channel_id": self.channel.id,
+            "request": request,
+            "internal_id": message.id,
+        }
+        # Increment pending operations counter
+        with self._operations_lock:
+            self._pending_operations += 1
+        self._api_queue.put(send_op)
+        # Queue the update operation (will use the stream_id once send_message completes)
+        # We need to wait for the send operation to complete first
+        # So we'll handle this in a second operation that waits for the stream_id
+        def queue_update_operation():
+            # Wait for the stream_id to be available
+            max_wait = 5.0
+            start_time = time.time()
+            while time.time() - start_time < max_wait:
+                stream_id = self.internal_ids_to_stream_ids.get(message.id if message.id else "")
+                if stream_id:
+                    update_op = {
+                        "type": "update_message_partial" if completed else "ephemeral_message_update",
+                        "stream_id": stream_id,
+                        "user_id": message.user_id,
+                        "set_data": {"text": message.content, "generating": not completed},
+                    }
+                    with self._operations_lock:
+                        self._pending_operations += 1
+                    self._api_queue.put(update_op)
+                    return
+                time.sleep(0.01)
+            logger.error(f"Timeout waiting for stream_id for message {message.id}")
+        # Queue the update in a separate thread to avoid blocking
+        threading.Thread(target=queue_update_operation, daemon=True).start()
+    def update_message(self, message_id: str, input_text: str, user_id: str, replace_content: bool, completed: bool):
+        """Update a message in the Stream conversation.
+        This method updates both the local message content and queues the Stream API sync.
+        If the message doesn't exist, it creates a new one.
+        Args:
+            message_id: The ID of the message to update
+            input_text: The text content to set or append
+            user_id: The ID of the user who owns the message
+            replace_content: If True, replace the entire message content. If False, append to existing content.
+            completed: If True, mark the message as completed using update_message_partial.
+                      If False, mark as still generating using ephemeral_message_update.
+        Returns:
+            None (operations are processed asynchronously)
+        """
+        # First, update the local message using the superclass logic
+        super().update_message(message_id, input_text, user_id, replace_content, completed)
+        # Get the updated message for Stream API sync
+        message = self.lookup(message_id)
+        if message is None:
+            # This shouldn't happen after super().update_message, but handle gracefully
+            logger.warning(f"message {message_id} not found after update")
+            return None
+        stream_id = self.internal_ids_to_stream_ids.get(message_id)
+        if stream_id is None:
+            logger.warning(f"stream_id for message {message_id} not found, skipping Stream API update")
+            return None
+        # Queue the update operation
+        update_op = {
+            "type": "update_message_partial" if completed else "ephemeral_message_update",
+            "stream_id": stream_id,
+            "user_id": message.user_id,
+            "set_data": {"text": message.content, "generating": not completed},
+        }
+        with self._operations_lock:
+            self._pending_operations += 1
+        return self._api_queue.put(update_op)
+    def __del__(self):
+        """Cleanup when the conversation is destroyed."""
+        try:
+            self.shutdown()
+        except Exception as e:
+            logger.error(f"Error during StreamConversation cleanup: {e}")

vision_agents_plugins_getstream-0.0.17/vision_agents/plugins/getstream/stream_edge_transport.py ADDED Viewed

@@ -0,0 +1,237 @@
+import logging
+import os
+import webbrowser
+from typing import Optional, TYPE_CHECKING
+from urllib.parse import urlencode
+from uuid import uuid4
+import aiortc
+from getstream import AsyncStream
+from getstream.chat.async_client import ChatClient
+from getstream.models import ChannelInput
+from getstream.video import rtc
+from getstream.chat.async_channel import Channel
+from getstream.video.async_call import Call
+from getstream.video.rtc import audio_track, ConnectionManager
+from getstream.video.rtc.pb.stream.video.sfu.models.models_pb2 import TrackType, Participant
+from getstream.video.rtc.track_util import PcmData
+from getstream.video.rtc.tracks import TrackSubscriptionConfig, SubscriptionConfig
+from vision_agents.plugins.getstream.stream_conversation import StreamConversation
+from vision_agents.core.edge import EdgeTransport
+from vision_agents.core.edge.types import Connection, User
+from vision_agents.core.events.manager import EventManager
+from vision_agents.core.edge import events
+from vision_agents.core.utils import get_vision_agents_version
+if TYPE_CHECKING:
+    from vision_agents.core.agents.agents import Agent
+class StreamConnection(Connection):
+    def __init__(self, connection: ConnectionManager):
+        super().__init__()
+        # store the native connection object
+        self._connection = connection
+    async def close(self):
+        await self._connection.leave()
+class StreamEdge(EdgeTransport):
+    """
+    StreamEdge uses getstream.io's edge network. To support multiple vendors, this means we expose
+    """
+    client: AsyncStream
+    def __init__(self, **kwargs):
+        # Initialize Stream client
+        super().__init__()
+        version = get_vision_agents_version()
+        self.client = AsyncStream(user_agent=f"vision-agents-{version}")
+        self.logger = logging.getLogger(self.__class__.__name__)
+        self.events = EventManager()
+        self.events.register_events_from_module(events)
+        self.channel: Optional[Channel] = None
+        self.conversation: Optional[StreamConversation] = None
+        self.channel_type = "videocall"
+    async def create_conversation(self, call: Call, user, instructions):
+        chat_client: ChatClient = call.client.stream.chat
+        self.channel = await chat_client.get_or_create_channel(
+            self.channel_type,
+            call.id,
+            data=ChannelInput(created_by_id=user.id),
+        )
+        self.conversation = StreamConversation(
+            instructions, [], self.channel.data.channel, chat_client
+        )
+        return self.conversation
+    async def create_user(self, user: User):
+        return await self.client.create_user(name=user.name, id=user.id)
+    async def join(self, agent: "Agent", call: Call) -> StreamConnection:
+        """
+        The logic for joining a call is different for each edge network/realtime audio/video provider
+        This function
+        - initializes the chat channel
+        - has the agent.agent_user join the call
+        - connect incoming audio/video to the agent
+        - connecting agent's outgoing audio/video to the call
+        TODO:
+        - process track flow
+        """
+        # Traditional mode - use WebRTC connection
+        # Configure subscription for audio and video
+        subscription_config = SubscriptionConfig(
+            default=self._get_subscription_config()
+        )
+        try:
+            # Open RTC connection and keep it alive for the duration of the returned context manager
+            connection = await rtc.join(
+                call, agent.agent_user.id, subscription_config=subscription_config
+            )
+            await connection.__aenter__() # TODO: weird API? there should be a manual version
+        except Exception:
+            raise
+        self._connection = connection
+        @self._connection.on("audio")
+        async def on_audio_received(pcm: PcmData, participant: Participant):
+            self.events.send(events.AudioReceivedEvent(
+                plugin_name="getstream",
+                pcm_data=pcm,
+                participant=participant,
+                user_metadata=participant
+            ))
+        @self._connection.on("track_added")
+        async def on_track(track_id, track_type, user):
+            # TODO: maybe make it easy to subscribe only to video tracks?
+            self.events.send(events.TrackAddedEvent(
+                plugin_name="getstream",
+                track_id=track_id,
+                track_type=track_type,
+                user=user,
+                user_metadata=user
+            ))
+            _, track = self._connection.subscriber_pc.track_map[track_id]
+            track.on("ended", lambda: self.events.send(events.TrackEndedEvent(
+                plugin_name="getstream",
+                track_id=track_id,
+                track_type=track_type,
+                user=user,
+                user_metadata=user
+            )))
+        @self._connection.on("call_ended")
+        async def call_ended(*args, **kwargs):
+            self.events.send(events.CallEndedEvent(
+                plugin_name="getstream",
+                args=args,
+                kwargs=kwargs
+            ))
+        standardize_connection = StreamConnection(connection)
+        return standardize_connection
+    def create_audio_track(self, framerate: int = 48000, stereo: bool = True):
+        return audio_track.AudioStreamTrack(framerate=framerate, stereo=stereo) # default to webrtc framerate
+    def create_video_track(self):
+        return aiortc.VideoStreamTrack()
+    def add_track_subscriber(self, track_id: str) -> Optional[aiortc.mediastreams.MediaStreamTrack]:
+        return self._connection.subscriber_pc.add_track_subscriber(track_id)
+    async def publish_tracks(self, audio_track, video_track):
+        """
+        Add the tracks to publish audio and video
+        """
+        await self._connection.add_tracks(audio=audio_track, video=video_track)
+        if audio_track:
+            self.logger.info("🤖 Agent ready to speak")
+        if video_track:
+            self.logger.info("🎥 Agent ready to publish video")
+        # In Realtime mode we directly publish the provider's output track; no extra forwarding needed
+    def _get_subscription_config(self):
+        return TrackSubscriptionConfig(
+            track_types=[
+                TrackType.TRACK_TYPE_VIDEO,
+                TrackType.TRACK_TYPE_AUDIO,
+            ]
+        )
+    def close(self):
+        # Note: Not calling super().close() as it's an abstract method with trivial body
+        pass
+    async def open_demo(self, call: Call) -> str:
+        client = call.client.stream
+        # Create a human user for testing
+        human_id = f"user-{uuid4()}"
+        name = "Human User"
+        # Create user token for browser access
+        token = client.create_token(human_id, expiration=3600)
+        """Helper function to open browser with Stream call link."""
+        base_url = (
+            f"{os.getenv('EXAMPLE_BASE_URL', 'https://getstream.io/video/demos')}/join/"
+        )
+        params = {
+            "api_key": client.api_key,
+            "token": token,
+            "skip_lobby": "true",
+            "user_name": name,
+            "video_encoder": "vp8",
+            "bitrate": 12000000,
+            "w": 1920,
+            "h": 1080,
+            # TODO: FPS..., aim at 60fps
+        }
+        url = f"{base_url}{call.id}?{urlencode(params)}"
+        print(f"🌐 Opening browser to: {url}")
+        try:
+            webbrowser.open(url)
+            print("✅ Browser opened successfully!")
+        except Exception as e:
+            print(f"❌ Failed to open browser: {e}")
+            print(f"Please manually open this URL: {url}")
+        return url
+    def open_pronto(self, api_key: str, token: str, call_id: str):
+        """Open browser with the video call URL."""
+        # Use the same URL pattern as the working workout assistant example
+        base_url = (
+            f"{os.getenv('EXAMPLE_BASE_URL', 'https://pronto-staging.getstream.io')}/join/"
+        )
+        params = {
+            "api_key": api_key,
+            "token": token,
+            "skip_lobby": "true",
+            "video_encoder": "vp8",
+        }
+        url = f"{base_url}{call_id}?{urlencode(params)}"
+        self.logger.info(f"🌐 Opening browser: {url}")
+        try:
+            webbrowser.open(url)
+            self.logger.info("✅ Browser opened successfully!")
+        except Exception as e:
+            self.logger.error(f"❌ Failed to open browser: {e}")
+            self.logger.info(f"Please manually open this URL: {url}")