vision-agents-plugins-xai 0.2.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,90 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .cursor/*
7
+ # Distribution / packaging
8
+ .Python
9
+ build/
10
+ dist/
11
+ downloads/
12
+ develop-eggs/
13
+ eggs/
14
+ .eggs/
15
+ lib64/
16
+ parts/
17
+ sdist/
18
+ var/
19
+ wheels/
20
+ share/python-wheels/
21
+ pip-wheel-metadata/
22
+ MANIFEST
23
+ *.egg-info/
24
+ *.egg
25
+
26
+ # Installer logs
27
+ pip-log.txt
28
+ pip-delete-this-directory.txt
29
+
30
+ # Unit test / coverage reports
31
+ htmlcov/
32
+ .tox/
33
+ .nox/
34
+ .coverage
35
+ .coverage.*
36
+ .cache
37
+ coverage.xml
38
+ nosetests.xml
39
+ *.cover
40
+ *.py,cover
41
+ .hypothesis/
42
+ .pytest_cache/
43
+
44
+ # Type checker / lint caches
45
+ .mypy_cache/
46
+ .dmypy.json
47
+ dmypy.json
48
+ .pytype/
49
+ .pyre/
50
+ .ruff_cache/
51
+
52
+ # Environments
53
+ .venv
54
+ env/
55
+ venv/
56
+ ENV/
57
+ env.bak/
58
+ venv.bak/
59
+ .env
60
+ .env.local
61
+ .env.*.local
62
+ .env.bak
63
+ pyvenv.cfg
64
+ .python-version
65
+
66
+ # Editors / IDEs
67
+ .vscode/
68
+ .idea/
69
+
70
+ # Jupyter Notebook
71
+ .ipynb_checkpoints/
72
+
73
+ # OS / Misc
74
+ .DS_Store
75
+ *.log
76
+
77
+ # Tooling & repo-specific
78
+ pyrightconfig.json
79
+ shell.nix
80
+ bin/*
81
+ lib/*
82
+ stream-py/
83
+
84
+ # Artifacts / assets
85
+ *.pt
86
+ *.kef
87
+ *.onnx
88
+ profile.html
89
+
90
+ /opencode.json
@@ -0,0 +1,166 @@
1
+ Metadata-Version: 2.4
2
+ Name: vision-agents-plugins-xai
3
+ Version: 0.2.8
4
+ Summary: XAI for stream agents
5
+ Project-URL: Documentation, https://visionagents.ai/
6
+ Project-URL: Website, https://visionagents.ai/
7
+ Project-URL: Source, https://github.com/GetStream/Vision-Agents
8
+ License-Expression: Apache-2.0
9
+ Requires-Python: >=3.10.0
10
+ Requires-Dist: xai-sdk
11
+ Description-Content-Type: text/markdown
12
+
13
+ # XAI Plugin for Stream Agents
14
+
15
+ This package provides xAI (Grok) integration for the Stream Agents ecosystem, enabling you to use xAI's powerful language models in your conversational AI applications.
16
+
17
+ ## Features
18
+
19
+ - **Native xAI SDK Integration**: Full access to xAI's chat completion and streaming APIs
20
+ - **Conversation Memory**: Automatic conversation history management
21
+ - **Streaming Support**: Real-time response streaming with standardized events
22
+ - **Multimodal Support**: Handle text and image inputs
23
+ - **Event System**: Subscribe to response events for custom handling
24
+ - **Easy Integration**: Drop-in replacement for other LLM providers
25
+
26
+ ## Installation
27
+
28
+ ```bash
29
+ pip install vision-agents-plugins-xai
30
+ ```
31
+
32
+ ## Quick Start
33
+
34
+ ```python
35
+ import asyncio
36
+ from vision_agents.plugins import xai
37
+
38
+ async def main():
39
+ # Initialize with your xAI API key
40
+ llm = xai.LLM(
41
+ model="grok-4",
42
+ api_key="your_xai_api_key" # or set XAI_API_KEY environment variable
43
+ )
44
+
45
+ # Simple response
46
+ response = await llm.simple_response("Explain quantum computing in simple terms")
47
+
48
+ print(f"\n\nComplete response: {response.text}")
49
+
50
+ if __name__ == "__main__":
51
+ asyncio.run(main())
52
+ ```
53
+
54
+ ## Advanced Usage
55
+
56
+ ### Conversation with Memory
57
+
58
+ ```python
59
+ from vision_agents.plugins import xai
60
+
61
+ llm = xai.LLM(model="grok-4", api_key="your_api_key")
62
+
63
+ # First message
64
+ await llm.simple_response("My name is Alice and I have 2 cats")
65
+
66
+ # Second message - the LLM remembers the context
67
+ response = await llm.simple_response("How many pets do I have?")
68
+ print(response.text) # Will mention the 2 cats
69
+ ```
70
+
71
+ ### Using Instructions
72
+
73
+ ```python
74
+ llm = LLM(
75
+ model="grok-4",
76
+ api_key="your_api_key"
77
+ )
78
+
79
+ # Create a response with system instructions
80
+ response = await llm.create_response(
81
+ input="Tell me about the weather",
82
+ instructions="You are a helpful weather assistant. Always be cheerful and optimistic.",
83
+ stream=True
84
+ )
85
+ ```
86
+
87
+ ### Multimodal Input
88
+
89
+ ```python
90
+ # Handle complex multimodal messages
91
+ advanced_message = [
92
+ {
93
+ "role": "user",
94
+ "content": [
95
+ {"type": "input_text", "text": "What do you see in this image?"},
96
+ {"type": "input_image", "image_url": "https://example.com/image.jpg"},
97
+ ],
98
+ }
99
+ ]
100
+
101
+ messages = LLM._normalize_message(advanced_message)
102
+ # Use with your conversation system
103
+ ```
104
+
105
+
106
+ ## API Reference
107
+
108
+ ### XAILLM Class
109
+
110
+ #### Constructor
111
+
112
+ ```python
113
+ LLM(
114
+ model: str = "grok-4",
115
+ api_key: Optional[str] = None,
116
+ client: Optional[AsyncClient] = None
117
+ )
118
+ ```
119
+
120
+ **Parameters:**
121
+ - `model`: xAI model to use (default: "grok-4")
122
+ - `api_key`: Your xAI API key (default: reads from `XAI_API_KEY` environment variable)
123
+ - `client`: Optional pre-configured xAI AsyncClient
124
+
125
+ #### Methods
126
+
127
+ ##### `async simple_response(text: str, processors=None, participant=None)`
128
+
129
+ Generate a simple response to text input.
130
+
131
+ **Parameters:**
132
+ - `text`: Input text to respond to
133
+ - `processors`: Optional list of processors for video/voice AI context
134
+ - `participant`: Optional participant object
135
+
136
+ **Returns:** `LLMResponseEvent[Response]` with the generated text
137
+
138
+ ##### `async create_response(input: str, instructions: str = "", model: str = None, stream: bool = True)`
139
+
140
+ Create a response with full control over parameters.
141
+
142
+ **Parameters:**
143
+ - `input`: Input text
144
+ - `instructions`: System instructions for the model
145
+ - `model`: Override the default model
146
+ - `stream`: Whether to stream the response (default: True)
147
+
148
+ **Returns:** `LLMResponseEvent[Response]` with the generated text
149
+
150
+
151
+ ## Configuration
152
+
153
+ ### Environment Variables
154
+
155
+ - `XAI_API_KEY`: Your xAI API key (required if not provided in constructor)
156
+
157
+
158
+ ## Requirements
159
+
160
+ - Python 3.10+
161
+ - `xai-sdk`
162
+ - `vision-agents-core`
163
+
164
+ ## License
165
+
166
+ Apache-2.0
@@ -0,0 +1,154 @@
1
+ # XAI Plugin for Stream Agents
2
+
3
+ This package provides xAI (Grok) integration for the Stream Agents ecosystem, enabling you to use xAI's powerful language models in your conversational AI applications.
4
+
5
+ ## Features
6
+
7
+ - **Native xAI SDK Integration**: Full access to xAI's chat completion and streaming APIs
8
+ - **Conversation Memory**: Automatic conversation history management
9
+ - **Streaming Support**: Real-time response streaming with standardized events
10
+ - **Multimodal Support**: Handle text and image inputs
11
+ - **Event System**: Subscribe to response events for custom handling
12
+ - **Easy Integration**: Drop-in replacement for other LLM providers
13
+
14
+ ## Installation
15
+
16
+ ```bash
17
+ pip install vision-agents-plugins-xai
18
+ ```
19
+
20
+ ## Quick Start
21
+
22
+ ```python
23
+ import asyncio
24
+ from vision_agents.plugins import xai
25
+
26
+ async def main():
27
+ # Initialize with your xAI API key
28
+ llm = xai.LLM(
29
+ model="grok-4",
30
+ api_key="your_xai_api_key" # or set XAI_API_KEY environment variable
31
+ )
32
+
33
+ # Simple response
34
+ response = await llm.simple_response("Explain quantum computing in simple terms")
35
+
36
+ print(f"\n\nComplete response: {response.text}")
37
+
38
+ if __name__ == "__main__":
39
+ asyncio.run(main())
40
+ ```
41
+
42
+ ## Advanced Usage
43
+
44
+ ### Conversation with Memory
45
+
46
+ ```python
47
+ from vision_agents.plugins import xai
48
+
49
+ llm = xai.LLM(model="grok-4", api_key="your_api_key")
50
+
51
+ # First message
52
+ await llm.simple_response("My name is Alice and I have 2 cats")
53
+
54
+ # Second message - the LLM remembers the context
55
+ response = await llm.simple_response("How many pets do I have?")
56
+ print(response.text) # Will mention the 2 cats
57
+ ```
58
+
59
+ ### Using Instructions
60
+
61
+ ```python
62
+ llm = LLM(
63
+ model="grok-4",
64
+ api_key="your_api_key"
65
+ )
66
+
67
+ # Create a response with system instructions
68
+ response = await llm.create_response(
69
+ input="Tell me about the weather",
70
+ instructions="You are a helpful weather assistant. Always be cheerful and optimistic.",
71
+ stream=True
72
+ )
73
+ ```
74
+
75
+ ### Multimodal Input
76
+
77
+ ```python
78
+ # Handle complex multimodal messages
79
+ advanced_message = [
80
+ {
81
+ "role": "user",
82
+ "content": [
83
+ {"type": "input_text", "text": "What do you see in this image?"},
84
+ {"type": "input_image", "image_url": "https://example.com/image.jpg"},
85
+ ],
86
+ }
87
+ ]
88
+
89
+ messages = LLM._normalize_message(advanced_message)
90
+ # Use with your conversation system
91
+ ```
92
+
93
+
94
+ ## API Reference
95
+
96
+ ### XAILLM Class
97
+
98
+ #### Constructor
99
+
100
+ ```python
101
+ LLM(
102
+ model: str = "grok-4",
103
+ api_key: Optional[str] = None,
104
+ client: Optional[AsyncClient] = None
105
+ )
106
+ ```
107
+
108
+ **Parameters:**
109
+ - `model`: xAI model to use (default: "grok-4")
110
+ - `api_key`: Your xAI API key (default: reads from `XAI_API_KEY` environment variable)
111
+ - `client`: Optional pre-configured xAI AsyncClient
112
+
113
+ #### Methods
114
+
115
+ ##### `async simple_response(text: str, processors=None, participant=None)`
116
+
117
+ Generate a simple response to text input.
118
+
119
+ **Parameters:**
120
+ - `text`: Input text to respond to
121
+ - `processors`: Optional list of processors for video/voice AI context
122
+ - `participant`: Optional participant object
123
+
124
+ **Returns:** `LLMResponseEvent[Response]` with the generated text
125
+
126
+ ##### `async create_response(input: str, instructions: str = "", model: str = None, stream: bool = True)`
127
+
128
+ Create a response with full control over parameters.
129
+
130
+ **Parameters:**
131
+ - `input`: Input text
132
+ - `instructions`: System instructions for the model
133
+ - `model`: Override the default model
134
+ - `stream`: Whether to stream the response (default: True)
135
+
136
+ **Returns:** `LLMResponseEvent[Response]` with the generated text
137
+
138
+
139
+ ## Configuration
140
+
141
+ ### Environment Variables
142
+
143
+ - `XAI_API_KEY`: Your xAI API key (required if not provided in constructor)
144
+
145
+
146
+ ## Requirements
147
+
148
+ - Python 3.10+
149
+ - `xai-sdk`
150
+ - `vision-agents-core`
151
+
152
+ ## License
153
+
154
+ Apache-2.0
@@ -0,0 +1,35 @@
1
+ [build-system]
2
+ requires = ["hatchling", "hatch-vcs"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "vision-agents-plugins-xai"
7
+ dynamic = ["version"]
8
+ description = "XAI for stream agents"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10.0"
11
+ license = "Apache-2.0"
12
+ dependencies = [
13
+ "xai-sdk",
14
+ ]
15
+
16
+ [project.urls]
17
+ Documentation = "https://visionagents.ai/"
18
+ Website = "https://visionagents.ai/"
19
+ Source = "https://github.com/GetStream/Vision-Agents"
20
+
21
+ [tool.hatch.version]
22
+ source = "vcs"
23
+ raw-options = { root = "..", search_parent_directories = true, fallback_version = "0.0.0" }
24
+
25
+ [tool.hatch.build.targets.wheel]
26
+ packages = ["."]
27
+
28
+ [tool.uv.sources]
29
+ vision-agents = { workspace = true }
30
+
31
+ [dependency-groups]
32
+ dev = [
33
+ "pytest>=8.4.1",
34
+ "pytest-asyncio>=1.0.0",
35
+ ]
@@ -0,0 +1,98 @@
1
+ import pytest
2
+ from dotenv import load_dotenv
3
+ import os
4
+
5
+ from vision_agents.core.agents.conversation import Message
6
+ from vision_agents.plugins.xai.llm import XAILLM
7
+ from vision_agents.core.llm.events import LLMResponseChunkEvent
8
+
9
+ load_dotenv()
10
+
11
+
12
+ class TestXAILLM:
13
+ """Test suite for XAILLM class with live API calls."""
14
+
15
+ def test_message(self):
16
+ messages = XAILLM._normalize_message("say hi")
17
+ assert isinstance(messages[0], Message)
18
+ message = messages[0]
19
+ assert message.original is not None
20
+ assert message.content == "say hi"
21
+
22
+ async def test_advanced_message(self):
23
+ img_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/d5/2023_06_08_Raccoon1.jpg/1599px-2023_06_08_Raccoon1.jpg"
24
+
25
+ advanced = [
26
+ {
27
+ "role": "user",
28
+ "content": [
29
+ {"type": "input_text", "text": "what do you see in this image?"},
30
+ {"type": "input_image", "image_url": f"{img_url}"},
31
+ ],
32
+ }
33
+ ]
34
+ messages2 = XAILLM._normalize_message(advanced)
35
+ assert messages2[0].original is not None
36
+
37
+ @pytest.mark.integration
38
+ @pytest.mark.skipif(not os.getenv("XAI_API_KEY"), reason="XAI_API_KEY not set")
39
+ async def test_simple(self):
40
+ llm = XAILLM(model="grok-4-latest", api_key=os.getenv("XAI_API_KEY"))
41
+ response = await llm.simple_response(
42
+ "Explain quantum computing in 1 paragraph",
43
+ )
44
+ assert response.text
45
+
46
+ @pytest.mark.integration
47
+ @pytest.mark.skipif(not os.getenv("XAI_API_KEY"), reason="XAI_API_KEY not set")
48
+ async def test_native_api(self):
49
+ llm = XAILLM(model="grok-4-latest", api_key=os.getenv("XAI_API_KEY"))
50
+ response = await llm.create_response(
51
+ input="say hi", instructions="You are a helpful assistant."
52
+ )
53
+ assert response.text
54
+
55
+ @pytest.mark.integration
56
+ @pytest.mark.skipif(not os.getenv("XAI_API_KEY"), reason="XAI_API_KEY not set")
57
+ async def test_streaming(self):
58
+ llm = XAILLM(model="grok-4-latest", api_key=os.getenv("XAI_API_KEY"))
59
+ streaming_works = False
60
+
61
+ @llm.events.subscribe
62
+ async def passed(event: LLMResponseChunkEvent):
63
+ nonlocal streaming_works
64
+ streaming_works = True
65
+
66
+ response = await llm.simple_response(
67
+ "Explain quantum computing in 1 paragraph",
68
+ )
69
+ await llm.events.wait()
70
+
71
+ assert response.text
72
+ assert streaming_works
73
+
74
+ @pytest.mark.integration
75
+ @pytest.mark.skipif(not os.getenv("XAI_API_KEY"), reason="XAI_API_KEY not set")
76
+ async def test_memory(self):
77
+ llm = XAILLM(model="grok-4-latest", api_key=os.getenv("XAI_API_KEY"))
78
+ await llm.simple_response(
79
+ text="There are 2 dogs in the room",
80
+ )
81
+ await llm.events.wait()
82
+ response = await llm.simple_response(
83
+ text="How many paws are there in the room?",
84
+ )
85
+ assert "8" in response.text or "eight" in response.text
86
+
87
+ @pytest.mark.integration
88
+ @pytest.mark.skipif(not os.getenv("XAI_API_KEY"), reason="XAI_API_KEY not set")
89
+ async def test_native_memory(self):
90
+ llm = XAILLM(model="grok-4-latest", api_key=os.getenv("XAI_API_KEY"))
91
+ await llm.create_response(
92
+ input="There are 2 dogs in the room",
93
+ )
94
+ await llm.events.wait()
95
+ response = await llm.create_response(
96
+ input="How many paws are there in the room?",
97
+ )
98
+ assert "8" in response.text or "eight" in response.text
@@ -0,0 +1,26 @@
1
+ import pytest
2
+ import os
3
+ from dotenv import load_dotenv
4
+ from vision_agents.plugins.xai.llm import XAILLM
5
+
6
+ load_dotenv()
7
+
8
+
9
+ class TestXAITools:
10
+ """Test suite for XAILLM tool calling."""
11
+
12
+ @pytest.mark.integration
13
+ @pytest.mark.skipif(not os.getenv("XAI_API_KEY"), reason="XAI_API_KEY not set")
14
+ async def test_tool_calling(self):
15
+ llm = XAILLM(model="grok-4-latest", api_key=os.getenv("XAI_API_KEY"))
16
+
17
+ @llm.register_function()
18
+ def get_weather(location: str) -> str:
19
+ """Get the weather for a location."""
20
+ return f"The weather in {location} is sunny."
21
+
22
+ response = await llm.create_response(
23
+ input="What is the weather in San Francisco?",
24
+ )
25
+
26
+ assert "sunny" in response.text.lower()
@@ -0,0 +1,4 @@
1
+ from .llm import XAILLM as LLM
2
+ from .version import __version__
3
+
4
+ __all__ = ["LLM", "__version__"]
@@ -0,0 +1,11 @@
1
+ from dataclasses import dataclass, field
2
+ from vision_agents.core.events import PluginBaseEvent
3
+ from typing import Optional, Any
4
+
5
+
6
+ @dataclass
7
+ class XAIChunkEvent(PluginBaseEvent):
8
+ """Event emitted when xAI provides a chunk."""
9
+
10
+ type: str = field(default="plugin.xai.chunk", init=False)
11
+ chunk: Optional[Any] = None
@@ -0,0 +1,437 @@
1
+ import json
2
+ from typing import Optional, List, Any, TYPE_CHECKING, Dict
3
+ from xai_sdk import AsyncClient
4
+ from xai_sdk.chat import system, user, Response, Chunk, tool_result, tool
5
+ from xai_sdk.proto import chat_pb2
6
+
7
+ from vision_agents.core.llm.llm import LLM, LLMResponseEvent
8
+ from vision_agents.core.processors import Processor
9
+ from vision_agents.core.llm.events import (
10
+ LLMResponseChunkEvent,
11
+ LLMResponseCompletedEvent,
12
+ )
13
+ from vision_agents.core.llm.llm_types import NormalizedToolCallItem, ToolSchema
14
+ from . import events
15
+
16
+ if TYPE_CHECKING:
17
+ from vision_agents.core.agents.conversation import Message
18
+ from getstream.video.rtc.pb.stream.video.sfu.models.models_pb2 import Participant
19
+ from xai_sdk.aio.chat import Chat
20
+ else:
21
+ from getstream.video.rtc.pb.stream.video.sfu.models.models_pb2 import Participant
22
+
23
+
24
+ class XAILLM(LLM):
25
+ """
26
+ The XAILLM class provides full/native access to the xAI SDK methods.
27
+ It only standardizes the minimal feature set that's needed for the agent integration.
28
+
29
+ The agent requires that we standardize:
30
+ - sharing instructions
31
+ - keeping conversation history
32
+ - response normalization
33
+
34
+ Notes on the xAI integration
35
+ - the native method is called create_response (maps to xAI chat.sample())
36
+ - history is maintained using the chat object's append method
37
+
38
+ Examples:
39
+
40
+ from vision_agents.plugins import xai
41
+ llm = xai.LLM(model="grok-4-latest")
42
+
43
+ """
44
+
45
+ def __init__(
46
+ self,
47
+ model: str = "grok-4-latest",
48
+ api_key: Optional[str] = None,
49
+ client: Optional[AsyncClient] = None,
50
+ ):
51
+ """
52
+ Initialize the XAILLM class.
53
+
54
+ Args:
55
+ model (str): The xAI model to use. Defaults to "grok-4-latest"
56
+ api_key: optional API key. by default loads from XAI_API_KEY
57
+ client: optional xAI client. by default creates a new client object.
58
+ """
59
+ super().__init__()
60
+ self.events.register_events_from_module(events)
61
+ self.model = model
62
+ self.xai_chat: Optional["Chat"] = None
63
+ self.conversation = None
64
+
65
+ if client is not None:
66
+ self.client = client
67
+ elif api_key is not None and api_key != "":
68
+ self.client = AsyncClient(api_key=api_key)
69
+ else:
70
+ self.client = AsyncClient()
71
+
72
+ async def simple_response(
73
+ self,
74
+ text: str,
75
+ processors: Optional[List[Processor]] = None,
76
+ participant: Optional[Participant] = None,
77
+ ):
78
+ """
79
+ simple_response is a standardized way (across openai, claude, gemini etc.) to create a response.
80
+
81
+ Args:
82
+ text: The text to respond to
83
+ processors: list of processors (which contain state) about the video/voice AI
84
+ participant: optionally the participant object
85
+
86
+ Examples:
87
+
88
+ llm.simple_response("say hi to the user, be mean")
89
+ """
90
+ instructions = None
91
+ if self.conversation is not None:
92
+ instructions = self.conversation.instructions
93
+
94
+ return await self.create_response(
95
+ input=text,
96
+ instructions=instructions,
97
+ )
98
+
99
+ async def create_response(
100
+ self, *args: Any, **kwargs: Any
101
+ ) -> LLMResponseEvent[Response]:
102
+ """
103
+ create_response gives you full support/access to the native xAI chat.sample() and chat.stream() methods
104
+ this method wraps the xAI method and ensures we broadcast an event which the agent class hooks into
105
+ """
106
+ input_text = kwargs.get("input", "")
107
+ instructions = kwargs.get("instructions", "")
108
+ model = kwargs.get("model", self.model)
109
+ stream = kwargs.get("stream", True)
110
+
111
+ # Get tools if available
112
+ tools = self._get_tools_for_provider()
113
+
114
+ # Create or reuse chat session
115
+ if not self.xai_chat:
116
+ messages = []
117
+ if instructions:
118
+ messages.append(system(instructions))
119
+ create_kwargs = {"model": model, "messages": messages}
120
+ if tools:
121
+ create_kwargs["tools"] = tools
122
+ self.xai_chat = self.client.chat.create(**create_kwargs)
123
+
124
+ # Add user message
125
+ assert self.xai_chat is not None
126
+ self.xai_chat.append(user(input_text))
127
+
128
+ # Get response based on streaming preference
129
+ if stream:
130
+ # Handle streaming response
131
+ llm_response: Optional[LLMResponseEvent[Response]] = None
132
+ pending_tool_calls = []
133
+ seen = set()
134
+ assert self.xai_chat is not None
135
+ async for response, chunk in self.xai_chat.stream():
136
+ llm_response_optional = self._standardize_and_emit_chunk(
137
+ chunk, response
138
+ )
139
+ if llm_response_optional is not None:
140
+ llm_response = llm_response_optional
141
+
142
+ # Collect tool calls during streaming
143
+ if chunk.choices and chunk.choices[0].finish_reason:
144
+ calls = self._extract_tool_calls_from_response(response)
145
+ for c in calls:
146
+ key = (
147
+ c.get("id"),
148
+ c["name"],
149
+ json.dumps(c.get("arguments_json", {}), sort_keys=True),
150
+ )
151
+ if key not in seen:
152
+ pending_tool_calls.append(c)
153
+ seen.add(key)
154
+
155
+ # Add response to chat history
156
+ if llm_response and llm_response.original:
157
+ assert self.xai_chat is not None
158
+ self.xai_chat.append(llm_response.original)
159
+
160
+ # Handle tool calls if any
161
+ if pending_tool_calls:
162
+ llm_response = await self._handle_tool_calls(pending_tool_calls, kwargs)
163
+ else:
164
+ # Handle non-streaming response
165
+ assert self.xai_chat is not None
166
+ response = await self.xai_chat.sample()
167
+ llm_response = LLMResponseEvent[Response](response, response.content)
168
+
169
+ # Add response to chat history
170
+ assert self.xai_chat is not None
171
+ self.xai_chat.append(response)
172
+
173
+ # Check for tool calls
174
+ tool_calls = self._extract_tool_calls_from_response(response)
175
+ if tool_calls:
176
+ llm_response = await self._handle_tool_calls(tool_calls, kwargs)
177
+
178
+ if llm_response is not None:
179
+ self.events.send(
180
+ LLMResponseCompletedEvent(
181
+ original=llm_response.original, text=llm_response.text
182
+ )
183
+ )
184
+
185
+ return llm_response or LLMResponseEvent[Response](
186
+ Response(chat_pb2.GetChatCompletionResponse(), 0), ""
187
+ )
188
+
189
+ @staticmethod
190
+ def _normalize_message(input_text: str) -> List["Message"]:
191
+ """
192
+ Takes the input text and standardizes it so we can store it in chat
193
+ """
194
+ from vision_agents.core.agents.conversation import Message
195
+
196
+ # Create a standardized message from input text
197
+ message = Message(
198
+ original={"content": input_text, "role": "user", "type": "message"},
199
+ content=input_text,
200
+ )
201
+
202
+ return [message]
203
+
204
+ def _convert_tools_to_provider_format(self, tools: List[ToolSchema]) -> List[Any]:
205
+ """
206
+ Convert ToolSchema objects to xAI SDK format.
207
+
208
+ Args:
209
+ tools: List of ToolSchema objects from the function registry
210
+
211
+ Returns:
212
+ List of tool objects in xAI SDK format
213
+ """
214
+ out = []
215
+ for t in tools or []:
216
+ if not isinstance(t, dict):
217
+ continue
218
+ name = t.get("name", "unnamed_tool")
219
+ description = t.get("description", "") or ""
220
+ params = t.get("parameters_schema") or t.get("parameters") or {}
221
+ if not isinstance(params, dict):
222
+ params = {}
223
+ params.setdefault("type", "object")
224
+ params.setdefault("properties", {})
225
+ params.setdefault("additionalProperties", False)
226
+
227
+ out.append(
228
+ tool(
229
+ name=name,
230
+ description=description,
231
+ parameters=params,
232
+ )
233
+ )
234
+ return out
235
+
236
+ def _extract_tool_calls_from_response(
237
+ self, response: Response
238
+ ) -> List[NormalizedToolCallItem]:
239
+ """
240
+ Extract tool calls from xAI response.
241
+
242
+ Args:
243
+ response: xAI Response object
244
+
245
+ Returns:
246
+ List of normalized tool call items
247
+ """
248
+ calls = []
249
+ tool_calls = getattr(response, "tool_calls", None) or []
250
+ for tc in tool_calls:
251
+ func = getattr(tc, "function", None)
252
+ if not func:
253
+ continue
254
+
255
+ name = getattr(func, "name", "unknown")
256
+ args_str = getattr(func, "arguments", "{}")
257
+ call_id = getattr(tc, "id", "") or getattr(tc, "call_id", "")
258
+
259
+ try:
260
+ args_obj = (
261
+ json.loads(args_str) if isinstance(args_str, str) else args_str
262
+ )
263
+ except Exception:
264
+ args_obj = {}
265
+
266
+ call_item: NormalizedToolCallItem = {
267
+ "type": "tool_call",
268
+ "id": call_id,
269
+ "name": name,
270
+ "arguments_json": args_obj,
271
+ }
272
+ calls.append(call_item)
273
+ return calls
274
+
275
+ def _create_tool_result_message(
276
+ self, tool_calls: List[NormalizedToolCallItem], results: List[Any]
277
+ ) -> List[Any]:
278
+ """
279
+ Create tool result messages for xAI SDK.
280
+
281
+ Args:
282
+ tool_calls: List of tool calls that were executed
283
+ results: List of results from function execution
284
+
285
+ Returns:
286
+ List of tool result messages in xAI SDK format
287
+ """
288
+ msgs = []
289
+ for tc, res in zip(tool_calls, results):
290
+ call_id = tc.get("id")
291
+ if not call_id:
292
+ continue
293
+
294
+ output = res if isinstance(res, str) else json.dumps(res)
295
+ output_str = self._sanitize_tool_output(output)
296
+ msgs.append(tool_result(output_str))
297
+ return msgs
298
+
299
+ async def _handle_tool_calls(
300
+ self, tool_calls: List[NormalizedToolCallItem], original_kwargs: Dict[str, Any]
301
+ ) -> LLMResponseEvent[Response]:
302
+ """
303
+ Handle tool calls by executing them and getting a follow-up response.
304
+ Supports multi-round tool calling (max 3 rounds).
305
+
306
+ Args:
307
+ tool_calls: List of tool calls to execute
308
+ original_kwargs: Original kwargs from the request
309
+
310
+ Returns:
311
+ LLM response with tool results
312
+ """
313
+ llm_response: Optional[LLMResponseEvent[Response]] = None
314
+ max_rounds = 3
315
+ current_tool_calls = tool_calls
316
+ seen: set[tuple] = set()
317
+
318
+ for round_num in range(max_rounds):
319
+ triples, seen = await self._dedup_and_execute(
320
+ current_tool_calls,
321
+ max_concurrency=8,
322
+ timeout_s=30,
323
+ seen=seen,
324
+ )
325
+
326
+ if not triples:
327
+ break
328
+
329
+ tool_results = []
330
+ for tc, res, err in triples:
331
+ cid = tc.get("id")
332
+ if not cid:
333
+ continue
334
+
335
+ output = err if err is not None else res
336
+ output_str = self._sanitize_tool_output(output)
337
+ tool_results.append(tool_result(output_str))
338
+
339
+ if not tool_results:
340
+ return llm_response or LLMResponseEvent[Response](
341
+ Response(chat_pb2.GetChatCompletionResponse(), 0), ""
342
+ )
343
+
344
+ if not self.xai_chat:
345
+ return llm_response or LLMResponseEvent[Response](
346
+ Response(chat_pb2.GetChatCompletionResponse(), 0), ""
347
+ )
348
+
349
+ for tr in tool_results:
350
+ self.xai_chat.append(tr)
351
+
352
+ stream = original_kwargs.get("stream", True)
353
+ if stream:
354
+ llm_response = None
355
+ pending_tool_calls = []
356
+
357
+ async for response, chunk in self.xai_chat.stream():
358
+ llm_response_optional = self._standardize_and_emit_chunk(
359
+ chunk, response
360
+ )
361
+ if llm_response_optional is not None:
362
+ llm_response = llm_response_optional
363
+
364
+ if chunk.choices and chunk.choices[0].finish_reason:
365
+ calls = self._extract_tool_calls_from_response(response)
366
+ for c in calls:
367
+ key = (
368
+ c.get("id"),
369
+ c["name"],
370
+ json.dumps(c.get("arguments_json", {}), sort_keys=True),
371
+ )
372
+ if key not in seen:
373
+ pending_tool_calls.append(c)
374
+ seen.add(key)
375
+
376
+ if llm_response and llm_response.original:
377
+ self.xai_chat.append(llm_response.original)
378
+
379
+ if pending_tool_calls and round_num < max_rounds - 1:
380
+ current_tool_calls = pending_tool_calls
381
+ continue
382
+ else:
383
+ return llm_response or LLMResponseEvent[Response](
384
+ Response(chat_pb2.GetChatCompletionResponse(), 0), ""
385
+ )
386
+ else:
387
+ response = await self.xai_chat.sample()
388
+ llm_response = LLMResponseEvent[Response](response, response.content)
389
+ self.xai_chat.append(response)
390
+
391
+ next_tool_calls = self._extract_tool_calls_from_response(response)
392
+ if next_tool_calls and round_num < max_rounds - 1:
393
+ current_tool_calls = next_tool_calls
394
+ continue
395
+ else:
396
+ return llm_response
397
+
398
+ return llm_response or LLMResponseEvent[Response](
399
+ Response(chat_pb2.GetChatCompletionResponse(), 0), ""
400
+ )
401
+
402
+ def _standardize_and_emit_chunk(
403
+ self, chunk: Chunk, response: Response
404
+ ) -> Optional[LLMResponseEvent[Response]]:
405
+ """
406
+ Forwards the chunk events and also send out a standardized version (the agent class hooks into that)
407
+ """
408
+ # Emit the raw chunk event
409
+ self.events.send(events.XAIChunkEvent(plugin_name="xai", chunk=chunk))
410
+
411
+ # Emit standardized delta events for content
412
+ if chunk.content:
413
+ self.events.send(
414
+ LLMResponseChunkEvent(
415
+ content_index=0, # xAI doesn't have content_index
416
+ item_id=chunk.proto.id if hasattr(chunk.proto, "id") else "",
417
+ output_index=0, # xAI doesn't have output_index
418
+ sequence_number=0, # xAI doesn't have sequence_number
419
+ delta=chunk.content,
420
+ plugin_name="xai",
421
+ )
422
+ )
423
+
424
+ # Check if this is the final chunk (finish_reason indicates completion)
425
+ if chunk.choices and chunk.choices[0].finish_reason:
426
+ # This is the final chunk, return the complete response
427
+ llm_response = LLMResponseEvent[Response](response, response.content)
428
+ self.events.send(
429
+ LLMResponseCompletedEvent(
430
+ plugin_name="xai",
431
+ text=llm_response.text,
432
+ original=llm_response.original,
433
+ )
434
+ )
435
+ return llm_response
436
+
437
+ return None
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"