PyPI - vision-agents-plugins-openai - Versions diffs - 0.0.17__tar.gz - Mend

vision-agents-plugins-openai 0.0.17__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

vision_agents_plugins_openai-0.0.17/.gitignore ADDED Viewed

@@ -0,0 +1,32 @@
+*/__pycache__
+*/chat/__pycache__
+*/video/__pycache__
+*/chat/sync/__pycache__
+*/chat/async_/__pycache__
+*/sync/__pycache__
+*/async_/__pycache__
+*/video/sync/__pycache__
+*/model/__pycache__/
+*/cli/__pycache__
+*/cli/__pycache__
+.env
+.venv
+.vscode/settings.json
+*.pyc
+dist/*
+dist/*
+*.log
+.python-version
+pyvenv.cfg
+.idea*
+bin/*
+lib/*
+shell.nix
+pyrightconfig.json
+.DS_Store
+*.egg-info/
+*.egg
+*.pt
+*.kef
+.env.bak

vision_agents_plugins_openai-0.0.17/PKG-INFO ADDED Viewed

@@ -0,0 +1,117 @@
+Metadata-Version: 2.4
+Name: vision-agents-plugins-openai
+Version: 0.0.17
+Summary: OpenAI plugin for vision agents
+Project-URL: Documentation, https://visionagents.ai/
+Project-URL: Website, https://visionagents.ai/
+Project-URL: Source, https://github.com/GetStream/Vision-Agents
+License-Expression: MIT
+Requires-Python: >=3.10
+Requires-Dist: openai[realtime]>=2.2.0
+Requires-Dist: vision-agents
+Description-Content-Type: text/markdown
+# OpenAI Plugin for GetStream
+This package provides OpenAI integration for the GetStream plugin ecosystem.
+It enables features such as:
+- Real-time transcription and language processing using OpenAI models
+- Easy integration with other GetStream plugins and services
+- Function calling capabilities for dynamic interactions
+## Installation
+```bash
+pip install getstream-plugins-openai
+```
+## Usage
+```python
+from getstream.plugins.openai import OpenAIRealtime
+# Initialize with API key
+sts = OpenAIRealtime(api_key="your_openai_api_key", voice="alloy")
+# Connect to a call
+async with await sts.connect(call, agent_user_id="assistant") as connection:
+    # Send user message
+    await sts.send_user_message("Hello, how can you help me?")
+    # Request assistant response
+    await sts.request_assistant_response()
+```
+## Function Calling
+The OpenAI Realtime API supports function calling, allowing the assistant to invoke custom functions you define. This enables dynamic interactions like:
+- Database queries
+- API calls to external services
+- File operations
+- Custom business logic
+### Example with Function Calling
+```python
+from getstream.plugins.openai import OpenAIRealtime
+# Define your functions
+def get_weather(location: str) -> str:
+    """Get current weather for a location"""
+    # Your weather API logic here
+    return f"Weather in {location}: Sunny, 72°F"
+def send_email(to: str, subject: str, body: str) -> str:
+    """Send an email"""
+    # Your email sending logic here
+    return f"Email sent to {to} with subject: {subject}"
+# Initialize with functions
+sts = OpenAIRealtime(
+    api_key="your_openai_api_key",
+    voice="alloy",
+    functions=[
+        {
+            "name": "get_weather",
+            "description": "Get current weather information",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {"type": "string", "description": "City name"}
+                },
+                "required": ["location"]
+            }
+        },
+        {
+            "name": "send_email",
+            "description": "Send an email to someone",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "to": {"type": "string", "description": "Recipient email"},
+                    "subject": {"type": "string", "description": "Email subject"},
+                    "body": {"type": "string", "description": "Email body"}
+                },
+                "required": ["to", "subject", "body"]
+            }
+        }
+    ]
+)
+async with await sts.connect(call, agent_user_id="assistant") as connection:
+    await sts.send_user_message("What's the weather like in San Francisco?")
+    await sts.request_assistant_response()
+    # The assistant can now call your functions and you can respond with results
+    # await sts.send_function_call_output("call_id", "function_result")
+```
+## Requirements
+- Python 3.10+
+- openai[realtime] api
+- GetStream SDK
+## License
+MIT

vision_agents_plugins_openai-0.0.17/README.md ADDED Viewed

@@ -0,0 +1,104 @@
+# OpenAI Plugin for GetStream
+This package provides OpenAI integration for the GetStream plugin ecosystem.
+It enables features such as:
+- Real-time transcription and language processing using OpenAI models
+- Easy integration with other GetStream plugins and services
+- Function calling capabilities for dynamic interactions
+## Installation
+```bash
+pip install getstream-plugins-openai
+```
+## Usage
+```python
+from getstream.plugins.openai import OpenAIRealtime
+# Initialize with API key
+sts = OpenAIRealtime(api_key="your_openai_api_key", voice="alloy")
+# Connect to a call
+async with await sts.connect(call, agent_user_id="assistant") as connection:
+    # Send user message
+    await sts.send_user_message("Hello, how can you help me?")
+    # Request assistant response
+    await sts.request_assistant_response()
+```
+## Function Calling
+The OpenAI Realtime API supports function calling, allowing the assistant to invoke custom functions you define. This enables dynamic interactions like:
+- Database queries
+- API calls to external services
+- File operations
+- Custom business logic
+### Example with Function Calling
+```python
+from getstream.plugins.openai import OpenAIRealtime
+# Define your functions
+def get_weather(location: str) -> str:
+    """Get current weather for a location"""
+    # Your weather API logic here
+    return f"Weather in {location}: Sunny, 72°F"
+def send_email(to: str, subject: str, body: str) -> str:
+    """Send an email"""
+    # Your email sending logic here
+    return f"Email sent to {to} with subject: {subject}"
+# Initialize with functions
+sts = OpenAIRealtime(
+    api_key="your_openai_api_key",
+    voice="alloy",
+    functions=[
+        {
+            "name": "get_weather",
+            "description": "Get current weather information",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {"type": "string", "description": "City name"}
+                },
+                "required": ["location"]
+            }
+        },
+        {
+            "name": "send_email",
+            "description": "Send an email to someone",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "to": {"type": "string", "description": "Recipient email"},
+                    "subject": {"type": "string", "description": "Email subject"},
+                    "body": {"type": "string", "description": "Email body"}
+                },
+                "required": ["to", "subject", "body"]
+            }
+        }
+    ]
+)
+async with await sts.connect(call, agent_user_id="assistant") as connection:
+    await sts.send_user_message("What's the weather like in San Francisco?")
+    await sts.request_assistant_response()
+    # The assistant can now call your functions and you can respond with results
+    # await sts.send_function_call_output("call_id", "function_result")
+```
+## Requirements
+- Python 3.10+
+- openai[realtime] api
+- GetStream SDK
+## License
+MIT

vision_agents_plugins_openai-0.0.17/py.typed ADDED Viewed

File without changes

vision_agents_plugins_openai-0.0.17/pyproject.toml ADDED Viewed

@@ -0,0 +1,36 @@
+[build-system]
+requires = ["hatchling", "hatch-vcs"]
+build-backend = "hatchling.build"
+[project]
+name = "vision-agents-plugins-openai"
+dynamic = ["version"]
+description = "OpenAI plugin for vision agents"
+readme = "README.md"
+requires-python = ">=3.10"
+license = "MIT"
+dependencies = [
+    "vision-agents",
+    "openai[realtime]>=2.2.0",
+]
+[project.urls]
+Documentation = "https://visionagents.ai/"
+Website = "https://visionagents.ai/"
+Source = "https://github.com/GetStream/Vision-Agents"
+[tool.hatch.version]
+source = "vcs"
+raw-options = { root = "..", search_parent_directories = true, fallback_version = "0.0.0" }
+[tool.hatch.build.targets.wheel]
+packages = ["."]
+[tool.uv.sources]
+vision-agents = { workspace = true }
+[dependency-groups]
+dev = [
+    "pytest>=8.4.1",
+    "pytest-asyncio>=1.0.0",
+]

vision_agents_plugins_openai-0.0.17/tests/test_openai_llm.py ADDED Viewed

@@ -0,0 +1,99 @@
+import pytest
+from dotenv import load_dotenv
+from vision_agents.core.agents.conversation import Message
+from vision_agents.plugins.openai.openai_llm import OpenAILLM
+from vision_agents.core.llm.events import LLMResponseChunkEvent
+load_dotenv()
+class TestOpenAILLM:
+    """Test suite for OpenAILLM class with mocked API calls."""
+    def test_message(self):
+        messages = OpenAILLM._normalize_message("say hi")
+        assert isinstance(messages[0], Message)
+        message = messages[0]
+        assert message.original is not None
+        assert message.content == "say hi"
+    def test_advanced_message(self):
+        img_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/d5/2023_06_08_Raccoon1.jpg/1599px-2023_06_08_Raccoon1.jpg"
+        advanced = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "input_text", "text": "what do you see in this image?"},
+                    {"type": "input_image", "image_url": f"{img_url}"},
+                ],
+            }
+        ]
+        messages2 = OpenAILLM._normalize_message(advanced)
+        assert messages2[0].original is not None
+    @pytest.fixture
+    async def llm(self) -> OpenAILLM:
+        llm = OpenAILLM(model="gpt-4o")
+        return llm
+    @pytest.mark.integration
+    async def test_simple(self, llm: OpenAILLM):
+        response = await llm.simple_response(
+            "Explain quantum computing in 1 paragraph",
+        )
+        assert response.text
+    @pytest.mark.integration
+    async def test_native_api(self, llm: OpenAILLM):
+        response = await llm.create_response(
+            input="say hi", instructions="You are a helpful assistant."
+        )
+        # Assertions
+        assert response.text
+        assert hasattr(response.original, 'id')  # OpenAI response has id
+    @pytest.mark.integration
+    async def test_streaming(self, llm: OpenAILLM):
+        streamingWorks = False
+        @llm.events.subscribe
+        async def passed(event: LLMResponseChunkEvent):
+            nonlocal streamingWorks
+            streamingWorks = True
+        response = await llm.simple_response(
+            "Explain quantum computing in 1 paragraph",
+        )
+        await llm.events.wait()
+        assert response.text
+        assert streamingWorks
+    @pytest.mark.integration
+    async def test_memory(self, llm: OpenAILLM):
+        await llm.simple_response(
+            text="There are 2 dogs in the room",
+        )
+        response = await llm.simple_response(
+            text="How many paws are there in the room?",
+        )
+        assert "8" in response.text or "eight" in response.text
+    @pytest.mark.integration
+    async def test_native_memory(self, llm: OpenAILLM):
+        await llm.create_response(
+            input="There are 2 dogs in the room",
+        )
+        response = await llm.create_response(
+            input="How many paws are there in the room?",
+        )
+        assert "8" in response.text or "eight" in response.text

vision_agents_plugins_openai-0.0.17/tests/test_openai_realtime.py ADDED Viewed

@@ -0,0 +1,110 @@
+import asyncio
+import pytest
+from dotenv import load_dotenv
+from vision_agents.plugins.openai import Realtime
+from vision_agents.core.llm.events import RealtimeAudioOutputEvent
+# Load environment variables
+load_dotenv()
+class TestOpenAIRealtime:
+    """Integration tests for OpenAI Realtime API"""
+    @pytest.fixture
+    async def realtime(self):
+        """Create and manage Realtime connection lifecycle"""
+        realtime = Realtime(
+            model="gpt-realtime",
+            voice="alloy",
+        )
+        try:
+            yield realtime
+        finally:
+            await realtime.close()
+    @pytest.mark.integration
+    async def test_simple_response_flow(self, realtime):
+        """Test sending a simple text message and receiving response"""
+        # Send a simple message
+        events = []
+        @realtime.events.subscribe
+        async def on_audio(event: RealtimeAudioOutputEvent):
+            events.append(event)
+        await asyncio.sleep(0.01)
+        await realtime.connect()
+        await realtime.simple_response("Hello, can you hear me?")
+        # Wait for response
+        await asyncio.sleep(3.0)
+        assert len(events) > 0
+    @pytest.mark.integration
+    async def test_audio_sending_flow(self, realtime, mia_audio_16khz):
+        """Test sending real audio data and verify connection remains stable"""
+        events = []
+        @realtime.events.subscribe
+        async def on_audio(event: RealtimeAudioOutputEvent):
+            events.append(event)
+        await asyncio.sleep(0.01)
+        await realtime.connect()
+        # Wait for connection to be fully established
+        await asyncio.sleep(2.0)
+        # Convert 16kHz audio to 48kHz for OpenAI realtime
+        # OpenAI expects 48kHz PCM audio
+        import numpy as np
+        from scipy import signal
+        from vision_agents.core.edge.types import PcmData
+        # Resample from 16kHz to 48kHz
+        samples_16k = mia_audio_16khz.samples
+        num_samples_48k = int(len(samples_16k) * 48000 / 16000)
+        samples_48k = signal.resample(samples_16k, num_samples_48k).astype(np.int16)
+        # Create new PcmData with 48kHz
+        audio_48khz = PcmData(
+            samples=samples_48k,
+            sample_rate=48000,
+            format="s16"
+        )
+        await realtime.simple_response("Listen to the following audio and tell me what you hear")
+        await asyncio.sleep(5.0)
+        # Send the resampled audio
+        await realtime.simple_audio_response(audio_48khz)
+        # Wait for response
+        await asyncio.sleep(10.0)
+        assert len(events) > 0
+    @pytest.mark.integration
+    async def test_video_sending_flow(self, realtime, bunny_video_track):
+        """Test sending real video data and verify connection remains stable"""
+        events = []
+        @realtime.events.subscribe
+        async def on_audio(event: RealtimeAudioOutputEvent):
+            events.append(event)
+        await asyncio.sleep(0.01)
+        await realtime.connect()
+        await realtime.simple_response("Describe what you see in this video please")
+        await asyncio.sleep(10.0)
+        # Start video sender with low FPS to avoid overwhelming the connection
+        await realtime._watch_video_track(bunny_video_track)
+        # Let it run for a few seconds
+        await asyncio.sleep(10.0)
+        # Stop video sender
+        await realtime._stop_watching_video_track()
+        assert len(events) > 0

vision_agents_plugins_openai-0.0.17/vision_agents/plugins/openai/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+from .openai_llm import OpenAILLM as LLM
+from .openai_realtime import Realtime
+__all__ = ["Realtime", "LLM"]

vision_agents_plugins_openai-0.0.17/vision_agents/plugins/openai/events.py ADDED Viewed

@@ -0,0 +1,19 @@
+from dataclasses import dataclass, field
+from vision_agents.core.events import PluginBaseEvent
+from typing import Optional, Any
+@dataclass
+class OpenAIStreamEvent(PluginBaseEvent):
+    """Event emitted when OpenAI provides a stream event."""
+    type: str = field(default='plugin.openai.stream', init=False)
+    event_type: Optional[str] = None
+    event_data: Optional[Any] = None
+@dataclass
+class LLMErrorEvent(PluginBaseEvent):
+    """Event emitted when an LLM encounters an error."""
+    type: str = field(default='plugin.llm.error', init=False)
+    error_message: Optional[str] = None
+    event_data: Optional[Any] = None