vision-agents-plugins-openai 0.0.17__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,32 @@
1
+ */__pycache__
2
+ */chat/__pycache__
3
+ */video/__pycache__
4
+ */chat/sync/__pycache__
5
+ */chat/async_/__pycache__
6
+ */sync/__pycache__
7
+ */async_/__pycache__
8
+ */video/sync/__pycache__
9
+ */model/__pycache__/
10
+ */cli/__pycache__
11
+ */cli/__pycache__
12
+ .env
13
+ .venv
14
+ .vscode/settings.json
15
+ *.pyc
16
+ dist/*
17
+ dist/*
18
+ *.log
19
+ .python-version
20
+ pyvenv.cfg
21
+ .idea*
22
+ bin/*
23
+ lib/*
24
+ shell.nix
25
+ pyrightconfig.json
26
+ .DS_Store
27
+
28
+ *.egg-info/
29
+ *.egg
30
+ *.pt
31
+ *.kef
32
+ .env.bak
@@ -0,0 +1,117 @@
1
+ Metadata-Version: 2.4
2
+ Name: vision-agents-plugins-openai
3
+ Version: 0.0.17
4
+ Summary: OpenAI plugin for vision agents
5
+ Project-URL: Documentation, https://visionagents.ai/
6
+ Project-URL: Website, https://visionagents.ai/
7
+ Project-URL: Source, https://github.com/GetStream/Vision-Agents
8
+ License-Expression: MIT
9
+ Requires-Python: >=3.10
10
+ Requires-Dist: openai[realtime]>=2.2.0
11
+ Requires-Dist: vision-agents
12
+ Description-Content-Type: text/markdown
13
+
14
+ # OpenAI Plugin for GetStream
15
+
16
+ This package provides OpenAI integration for the GetStream plugin ecosystem.
17
+
18
+ It enables features such as:
19
+ - Real-time transcription and language processing using OpenAI models
20
+ - Easy integration with other GetStream plugins and services
21
+ - Function calling capabilities for dynamic interactions
22
+
23
+ ## Installation
24
+
25
+ ```bash
26
+ pip install getstream-plugins-openai
27
+ ```
28
+
29
+ ## Usage
30
+
31
+ ```python
32
+ from getstream.plugins.openai import OpenAIRealtime
33
+
34
+ # Initialize with API key
35
+ sts = OpenAIRealtime(api_key="your_openai_api_key", voice="alloy")
36
+
37
+ # Connect to a call
38
+ async with await sts.connect(call, agent_user_id="assistant") as connection:
39
+ # Send user message
40
+ await sts.send_user_message("Hello, how can you help me?")
41
+
42
+ # Request assistant response
43
+ await sts.request_assistant_response()
44
+ ```
45
+
46
+ ## Function Calling
47
+
48
+ The OpenAI Realtime API supports function calling, allowing the assistant to invoke custom functions you define. This enables dynamic interactions like:
49
+
50
+ - Database queries
51
+ - API calls to external services
52
+ - File operations
53
+ - Custom business logic
54
+
55
+ ### Example with Function Calling
56
+
57
+ ```python
58
+ from getstream.plugins.openai import OpenAIRealtime
59
+
60
+ # Define your functions
61
+ def get_weather(location: str) -> str:
62
+ """Get current weather for a location"""
63
+ # Your weather API logic here
64
+ return f"Weather in {location}: Sunny, 72°F"
65
+
66
+ def send_email(to: str, subject: str, body: str) -> str:
67
+ """Send an email"""
68
+ # Your email sending logic here
69
+ return f"Email sent to {to} with subject: {subject}"
70
+
71
+ # Initialize with functions
72
+ sts = OpenAIRealtime(
73
+ api_key="your_openai_api_key",
74
+ voice="alloy",
75
+ functions=[
76
+ {
77
+ "name": "get_weather",
78
+ "description": "Get current weather information",
79
+ "parameters": {
80
+ "type": "object",
81
+ "properties": {
82
+ "location": {"type": "string", "description": "City name"}
83
+ },
84
+ "required": ["location"]
85
+ }
86
+ },
87
+ {
88
+ "name": "send_email",
89
+ "description": "Send an email to someone",
90
+ "parameters": {
91
+ "type": "object",
92
+ "properties": {
93
+ "to": {"type": "string", "description": "Recipient email"},
94
+ "subject": {"type": "string", "description": "Email subject"},
95
+ "body": {"type": "string", "description": "Email body"}
96
+ },
97
+ "required": ["to", "subject", "body"]
98
+ }
99
+ }
100
+ ]
101
+ )
102
+
103
+ async with await sts.connect(call, agent_user_id="assistant") as connection:
104
+ await sts.send_user_message("What's the weather like in San Francisco?")
105
+ await sts.request_assistant_response()
106
+
107
+ # The assistant can now call your functions and you can respond with results
108
+ # await sts.send_function_call_output("call_id", "function_result")
109
+ ```
110
+
111
+ ## Requirements
112
+ - Python 3.10+
113
+ - openai[realtime] api
114
+ - GetStream SDK
115
+
116
+ ## License
117
+ MIT
@@ -0,0 +1,104 @@
1
+ # OpenAI Plugin for GetStream
2
+
3
+ This package provides OpenAI integration for the GetStream plugin ecosystem.
4
+
5
+ It enables features such as:
6
+ - Real-time transcription and language processing using OpenAI models
7
+ - Easy integration with other GetStream plugins and services
8
+ - Function calling capabilities for dynamic interactions
9
+
10
+ ## Installation
11
+
12
+ ```bash
13
+ pip install getstream-plugins-openai
14
+ ```
15
+
16
+ ## Usage
17
+
18
+ ```python
19
+ from getstream.plugins.openai import OpenAIRealtime
20
+
21
+ # Initialize with API key
22
+ sts = OpenAIRealtime(api_key="your_openai_api_key", voice="alloy")
23
+
24
+ # Connect to a call
25
+ async with await sts.connect(call, agent_user_id="assistant") as connection:
26
+ # Send user message
27
+ await sts.send_user_message("Hello, how can you help me?")
28
+
29
+ # Request assistant response
30
+ await sts.request_assistant_response()
31
+ ```
32
+
33
+ ## Function Calling
34
+
35
+ The OpenAI Realtime API supports function calling, allowing the assistant to invoke custom functions you define. This enables dynamic interactions like:
36
+
37
+ - Database queries
38
+ - API calls to external services
39
+ - File operations
40
+ - Custom business logic
41
+
42
+ ### Example with Function Calling
43
+
44
+ ```python
45
+ from getstream.plugins.openai import OpenAIRealtime
46
+
47
+ # Define your functions
48
+ def get_weather(location: str) -> str:
49
+ """Get current weather for a location"""
50
+ # Your weather API logic here
51
+ return f"Weather in {location}: Sunny, 72°F"
52
+
53
+ def send_email(to: str, subject: str, body: str) -> str:
54
+ """Send an email"""
55
+ # Your email sending logic here
56
+ return f"Email sent to {to} with subject: {subject}"
57
+
58
+ # Initialize with functions
59
+ sts = OpenAIRealtime(
60
+ api_key="your_openai_api_key",
61
+ voice="alloy",
62
+ functions=[
63
+ {
64
+ "name": "get_weather",
65
+ "description": "Get current weather information",
66
+ "parameters": {
67
+ "type": "object",
68
+ "properties": {
69
+ "location": {"type": "string", "description": "City name"}
70
+ },
71
+ "required": ["location"]
72
+ }
73
+ },
74
+ {
75
+ "name": "send_email",
76
+ "description": "Send an email to someone",
77
+ "parameters": {
78
+ "type": "object",
79
+ "properties": {
80
+ "to": {"type": "string", "description": "Recipient email"},
81
+ "subject": {"type": "string", "description": "Email subject"},
82
+ "body": {"type": "string", "description": "Email body"}
83
+ },
84
+ "required": ["to", "subject", "body"]
85
+ }
86
+ }
87
+ ]
88
+ )
89
+
90
+ async with await sts.connect(call, agent_user_id="assistant") as connection:
91
+ await sts.send_user_message("What's the weather like in San Francisco?")
92
+ await sts.request_assistant_response()
93
+
94
+ # The assistant can now call your functions and you can respond with results
95
+ # await sts.send_function_call_output("call_id", "function_result")
96
+ ```
97
+
98
+ ## Requirements
99
+ - Python 3.10+
100
+ - openai[realtime] api
101
+ - GetStream SDK
102
+
103
+ ## License
104
+ MIT
File without changes
@@ -0,0 +1,36 @@
1
+ [build-system]
2
+ requires = ["hatchling", "hatch-vcs"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "vision-agents-plugins-openai"
7
+ dynamic = ["version"]
8
+ description = "OpenAI plugin for vision agents"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = "MIT"
12
+ dependencies = [
13
+ "vision-agents",
14
+ "openai[realtime]>=2.2.0",
15
+ ]
16
+
17
+ [project.urls]
18
+ Documentation = "https://visionagents.ai/"
19
+ Website = "https://visionagents.ai/"
20
+ Source = "https://github.com/GetStream/Vision-Agents"
21
+
22
+ [tool.hatch.version]
23
+ source = "vcs"
24
+ raw-options = { root = "..", search_parent_directories = true, fallback_version = "0.0.0" }
25
+
26
+ [tool.hatch.build.targets.wheel]
27
+ packages = ["."]
28
+
29
+ [tool.uv.sources]
30
+ vision-agents = { workspace = true }
31
+
32
+ [dependency-groups]
33
+ dev = [
34
+ "pytest>=8.4.1",
35
+ "pytest-asyncio>=1.0.0",
36
+ ]
@@ -0,0 +1,99 @@
1
+ import pytest
2
+ from dotenv import load_dotenv
3
+
4
+ from vision_agents.core.agents.conversation import Message
5
+ from vision_agents.plugins.openai.openai_llm import OpenAILLM
6
+ from vision_agents.core.llm.events import LLMResponseChunkEvent
7
+
8
+ load_dotenv()
9
+
10
+
11
+ class TestOpenAILLM:
12
+ """Test suite for OpenAILLM class with mocked API calls."""
13
+
14
+ def test_message(self):
15
+ messages = OpenAILLM._normalize_message("say hi")
16
+ assert isinstance(messages[0], Message)
17
+ message = messages[0]
18
+ assert message.original is not None
19
+ assert message.content == "say hi"
20
+
21
+ def test_advanced_message(self):
22
+ img_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/d5/2023_06_08_Raccoon1.jpg/1599px-2023_06_08_Raccoon1.jpg"
23
+
24
+ advanced = [
25
+ {
26
+ "role": "user",
27
+ "content": [
28
+ {"type": "input_text", "text": "what do you see in this image?"},
29
+ {"type": "input_image", "image_url": f"{img_url}"},
30
+ ],
31
+ }
32
+ ]
33
+ messages2 = OpenAILLM._normalize_message(advanced)
34
+ assert messages2[0].original is not None
35
+
36
+ @pytest.fixture
37
+ async def llm(self) -> OpenAILLM:
38
+ llm = OpenAILLM(model="gpt-4o")
39
+ return llm
40
+
41
+ @pytest.mark.integration
42
+ async def test_simple(self, llm: OpenAILLM):
43
+ response = await llm.simple_response(
44
+ "Explain quantum computing in 1 paragraph",
45
+ )
46
+
47
+ assert response.text
48
+
49
+ @pytest.mark.integration
50
+ async def test_native_api(self, llm: OpenAILLM):
51
+
52
+
53
+ response = await llm.create_response(
54
+ input="say hi", instructions="You are a helpful assistant."
55
+ )
56
+
57
+ # Assertions
58
+ assert response.text
59
+ assert hasattr(response.original, 'id') # OpenAI response has id
60
+
61
+
62
+ @pytest.mark.integration
63
+ async def test_streaming(self, llm: OpenAILLM):
64
+
65
+ streamingWorks = False
66
+
67
+ @llm.events.subscribe
68
+ async def passed(event: LLMResponseChunkEvent):
69
+ nonlocal streamingWorks
70
+ streamingWorks = True
71
+
72
+ response = await llm.simple_response(
73
+ "Explain quantum computing in 1 paragraph",
74
+ )
75
+
76
+ await llm.events.wait()
77
+
78
+ assert response.text
79
+ assert streamingWorks
80
+
81
+ @pytest.mark.integration
82
+ async def test_memory(self, llm: OpenAILLM):
83
+ await llm.simple_response(
84
+ text="There are 2 dogs in the room",
85
+ )
86
+ response = await llm.simple_response(
87
+ text="How many paws are there in the room?",
88
+ )
89
+ assert "8" in response.text or "eight" in response.text
90
+
91
+ @pytest.mark.integration
92
+ async def test_native_memory(self, llm: OpenAILLM):
93
+ await llm.create_response(
94
+ input="There are 2 dogs in the room",
95
+ )
96
+ response = await llm.create_response(
97
+ input="How many paws are there in the room?",
98
+ )
99
+ assert "8" in response.text or "eight" in response.text
@@ -0,0 +1,110 @@
1
+ import asyncio
2
+ import pytest
3
+ from dotenv import load_dotenv
4
+
5
+ from vision_agents.plugins.openai import Realtime
6
+ from vision_agents.core.llm.events import RealtimeAudioOutputEvent
7
+
8
+ # Load environment variables
9
+ load_dotenv()
10
+
11
+
12
+ class TestOpenAIRealtime:
13
+ """Integration tests for OpenAI Realtime API"""
14
+
15
+ @pytest.fixture
16
+ async def realtime(self):
17
+ """Create and manage Realtime connection lifecycle"""
18
+ realtime = Realtime(
19
+ model="gpt-realtime",
20
+ voice="alloy",
21
+ )
22
+ try:
23
+ yield realtime
24
+ finally:
25
+ await realtime.close()
26
+
27
+ @pytest.mark.integration
28
+ async def test_simple_response_flow(self, realtime):
29
+ """Test sending a simple text message and receiving response"""
30
+ # Send a simple message
31
+ events = []
32
+
33
+ @realtime.events.subscribe
34
+ async def on_audio(event: RealtimeAudioOutputEvent):
35
+ events.append(event)
36
+
37
+ await asyncio.sleep(0.01)
38
+ await realtime.connect()
39
+ await realtime.simple_response("Hello, can you hear me?")
40
+
41
+ # Wait for response
42
+ await asyncio.sleep(3.0)
43
+ assert len(events) > 0
44
+
45
+ @pytest.mark.integration
46
+ async def test_audio_sending_flow(self, realtime, mia_audio_16khz):
47
+ """Test sending real audio data and verify connection remains stable"""
48
+ events = []
49
+
50
+ @realtime.events.subscribe
51
+ async def on_audio(event: RealtimeAudioOutputEvent):
52
+ events.append(event)
53
+
54
+ await asyncio.sleep(0.01)
55
+ await realtime.connect()
56
+
57
+ # Wait for connection to be fully established
58
+ await asyncio.sleep(2.0)
59
+
60
+ # Convert 16kHz audio to 48kHz for OpenAI realtime
61
+ # OpenAI expects 48kHz PCM audio
62
+ import numpy as np
63
+ from scipy import signal
64
+ from vision_agents.core.edge.types import PcmData
65
+
66
+ # Resample from 16kHz to 48kHz
67
+ samples_16k = mia_audio_16khz.samples
68
+ num_samples_48k = int(len(samples_16k) * 48000 / 16000)
69
+ samples_48k = signal.resample(samples_16k, num_samples_48k).astype(np.int16)
70
+
71
+ # Create new PcmData with 48kHz
72
+ audio_48khz = PcmData(
73
+ samples=samples_48k,
74
+ sample_rate=48000,
75
+ format="s16"
76
+ )
77
+
78
+ await realtime.simple_response("Listen to the following audio and tell me what you hear")
79
+ await asyncio.sleep(5.0)
80
+
81
+ # Send the resampled audio
82
+ await realtime.simple_audio_response(audio_48khz)
83
+
84
+ # Wait for response
85
+ await asyncio.sleep(10.0)
86
+ assert len(events) > 0
87
+
88
+ @pytest.mark.integration
89
+ async def test_video_sending_flow(self, realtime, bunny_video_track):
90
+ """Test sending real video data and verify connection remains stable"""
91
+ events = []
92
+
93
+ @realtime.events.subscribe
94
+ async def on_audio(event: RealtimeAudioOutputEvent):
95
+ events.append(event)
96
+
97
+ await asyncio.sleep(0.01)
98
+ await realtime.connect()
99
+ await realtime.simple_response("Describe what you see in this video please")
100
+ await asyncio.sleep(10.0)
101
+ # Start video sender with low FPS to avoid overwhelming the connection
102
+ await realtime._watch_video_track(bunny_video_track)
103
+
104
+ # Let it run for a few seconds
105
+ await asyncio.sleep(10.0)
106
+
107
+ # Stop video sender
108
+ await realtime._stop_watching_video_track()
109
+ assert len(events) > 0
110
+
@@ -0,0 +1,6 @@
1
+
2
+ from .openai_llm import OpenAILLM as LLM
3
+ from .openai_realtime import Realtime
4
+
5
+ __all__ = ["Realtime", "LLM"]
6
+
@@ -0,0 +1,19 @@
1
+ from dataclasses import dataclass, field
2
+ from vision_agents.core.events import PluginBaseEvent
3
+ from typing import Optional, Any
4
+
5
+
6
+ @dataclass
7
+ class OpenAIStreamEvent(PluginBaseEvent):
8
+ """Event emitted when OpenAI provides a stream event."""
9
+ type: str = field(default='plugin.openai.stream', init=False)
10
+ event_type: Optional[str] = None
11
+ event_data: Optional[Any] = None
12
+
13
+
14
+ @dataclass
15
+ class LLMErrorEvent(PluginBaseEvent):
16
+ """Event emitted when an LLM encounters an error."""
17
+ type: str = field(default='plugin.llm.error', init=False)
18
+ error_message: Optional[str] = None
19
+ event_data: Optional[Any] = None