videosdk-plugins-openai 0.0.46__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of videosdk-plugins-openai might be problematic. Click here for more details.
- videosdk_plugins_openai-0.0.46/.gitignore +19 -0
- videosdk_plugins_openai-0.0.46/PKG-INFO +27 -0
- videosdk_plugins_openai-0.0.46/README.md +9 -0
- videosdk_plugins_openai-0.0.46/pyproject.toml +32 -0
- videosdk_plugins_openai-0.0.46/videosdk/plugins/openai/__init__.py +12 -0
- videosdk_plugins_openai-0.0.46/videosdk/plugins/openai/llm.py +287 -0
- videosdk_plugins_openai-0.0.46/videosdk/plugins/openai/realtime_api.py +753 -0
- videosdk_plugins_openai-0.0.46/videosdk/plugins/openai/stt.py +438 -0
- videosdk_plugins_openai-0.0.46/videosdk/plugins/openai/tts.py +246 -0
- videosdk_plugins_openai-0.0.46/videosdk/plugins/openai/version.py +1 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: videosdk-plugins-openai
|
|
3
|
+
Version: 0.0.46
|
|
4
|
+
Summary: VideoSDK Agent Framework plugin for OpenAI services
|
|
5
|
+
Author: videosdk
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
Keywords: ai,audio,openai,video,videosdk
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Topic :: Communications :: Conferencing
|
|
11
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
|
12
|
+
Classifier: Topic :: Multimedia :: Video
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
14
|
+
Requires-Python: >=3.11
|
|
15
|
+
Requires-Dist: openai[realtime]>=1.68.2
|
|
16
|
+
Requires-Dist: videosdk-agents>=0.0.46
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
|
|
19
|
+
# VideoSDK OpenAI Plugin
|
|
20
|
+
|
|
21
|
+
Agent Framework plugin for realtime, LLM, STT and TTS services from OpenAI.
|
|
22
|
+
|
|
23
|
+
## Installation
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install videosdk-plugins-openai
|
|
27
|
+
```
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "videosdk-plugins-openai"
|
|
7
|
+
dynamic = ["version"]
|
|
8
|
+
description = "VideoSDK Agent Framework plugin for OpenAI services"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "Apache-2.0"
|
|
11
|
+
requires-python = ">=3.11"
|
|
12
|
+
authors = [{ name = "videosdk" }]
|
|
13
|
+
keywords = ["video", "audio", "ai", "openai", "videosdk"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"Development Status :: 4 - Beta",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"Topic :: Communications :: Conferencing",
|
|
19
|
+
"Topic :: Multimedia :: Sound/Audio",
|
|
20
|
+
"Topic :: Multimedia :: Video",
|
|
21
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
22
|
+
]
|
|
23
|
+
dependencies = ["videosdk-agents>=0.0.46", "openai[realtime]>=1.68.2"]
|
|
24
|
+
|
|
25
|
+
[tool.hatch.version]
|
|
26
|
+
path = "videosdk/plugins/openai/version.py"
|
|
27
|
+
|
|
28
|
+
[tool.hatch.build.targets.wheel]
|
|
29
|
+
packages = ["videosdk"]
|
|
30
|
+
|
|
31
|
+
[tool.hatch.build.targets.sdist]
|
|
32
|
+
include = ["/videosdk"]
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from .realtime_api import OpenAIRealtime, OpenAIRealtimeConfig
|
|
2
|
+
from .llm import OpenAILLM
|
|
3
|
+
from .stt import OpenAISTT
|
|
4
|
+
from .tts import OpenAITTS
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
'OpenAIRealtime',
|
|
8
|
+
'OpenAIRealtimeConfig',
|
|
9
|
+
'OpenAILLM',
|
|
10
|
+
'OpenAISTT',
|
|
11
|
+
'OpenAITTS',
|
|
12
|
+
]
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import Any, AsyncIterator, List, Union
|
|
5
|
+
import json
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
import openai
|
|
9
|
+
from videosdk.agents import (
|
|
10
|
+
LLM,
|
|
11
|
+
LLMResponse,
|
|
12
|
+
ChatContext,
|
|
13
|
+
ChatRole,
|
|
14
|
+
ChatMessage,
|
|
15
|
+
FunctionCall,
|
|
16
|
+
FunctionCallOutput,
|
|
17
|
+
ToolChoice,
|
|
18
|
+
FunctionTool,
|
|
19
|
+
is_function_tool,
|
|
20
|
+
build_openai_schema,
|
|
21
|
+
)
|
|
22
|
+
from videosdk.agents.llm.chat_context import ChatContent, ImageContent
|
|
23
|
+
|
|
24
|
+
class OpenAILLM(LLM):
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
*,
|
|
29
|
+
api_key: str | None = None,
|
|
30
|
+
model: str = "gpt-4o-mini",
|
|
31
|
+
base_url: str | None = None,
|
|
32
|
+
temperature: float = 0.7,
|
|
33
|
+
tool_choice: ToolChoice = "auto",
|
|
34
|
+
max_completion_tokens: int | None = None,
|
|
35
|
+
) -> None:
|
|
36
|
+
"""Initialize the OpenAI LLM plugin.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
api_key (Optional[str], optional): OpenAI API key. Defaults to None.
|
|
40
|
+
model (str): The model to use for the LLM plugin. Defaults to "gpt-4o".
|
|
41
|
+
base_url (Optional[str], optional): The base URL for the OpenAI API. Defaults to None.
|
|
42
|
+
temperature (float): The temperature to use for the LLM plugin. Defaults to 0.7.
|
|
43
|
+
tool_choice (ToolChoice): The tool choice to use for the LLM plugin. Defaults to "auto".
|
|
44
|
+
max_completion_tokens (Optional[int], optional): The maximum completion tokens to use for the LLM plugin. Defaults to None.
|
|
45
|
+
"""
|
|
46
|
+
super().__init__()
|
|
47
|
+
self.api_key = api_key or os.getenv("OPENAI_API_KEY")
|
|
48
|
+
if not self.api_key:
|
|
49
|
+
raise ValueError("OpenAI API key must be provided either through api_key parameter or OPENAI_API_KEY environment variable")
|
|
50
|
+
|
|
51
|
+
self.model = model
|
|
52
|
+
self.temperature = temperature
|
|
53
|
+
self.tool_choice = tool_choice
|
|
54
|
+
self.max_completion_tokens = max_completion_tokens
|
|
55
|
+
self._cancelled = False
|
|
56
|
+
|
|
57
|
+
self._client = openai.AsyncOpenAI(
|
|
58
|
+
api_key=self.api_key,
|
|
59
|
+
base_url=base_url or None,
|
|
60
|
+
max_retries=0,
|
|
61
|
+
http_client=httpx.AsyncClient(
|
|
62
|
+
timeout=httpx.Timeout(connect=15.0, read=5.0, write=5.0, pool=5.0),
|
|
63
|
+
follow_redirects=True,
|
|
64
|
+
limits=httpx.Limits(
|
|
65
|
+
max_connections=50,
|
|
66
|
+
max_keepalive_connections=50,
|
|
67
|
+
keepalive_expiry=120,
|
|
68
|
+
),
|
|
69
|
+
),
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
@staticmethod
|
|
73
|
+
def azure(
|
|
74
|
+
*,
|
|
75
|
+
model: str = "gpt-4o-mini",
|
|
76
|
+
azure_endpoint: str | None = None,
|
|
77
|
+
azure_deployment: str | None = None,
|
|
78
|
+
api_version: str | None = None,
|
|
79
|
+
api_key: str | None = None,
|
|
80
|
+
azure_ad_token: str | None = None,
|
|
81
|
+
organization: str | None = None,
|
|
82
|
+
project: str | None = None,
|
|
83
|
+
base_url: str | None = None,
|
|
84
|
+
temperature: float = 0.7,
|
|
85
|
+
tool_choice: ToolChoice = "auto",
|
|
86
|
+
max_completion_tokens: int | None = None,
|
|
87
|
+
timeout: httpx.Timeout | None = None,
|
|
88
|
+
) -> "OpenAILLM":
|
|
89
|
+
"""
|
|
90
|
+
Create a new instance of Azure OpenAI LLM.
|
|
91
|
+
|
|
92
|
+
This automatically infers the following arguments from their corresponding environment variables if they are not provided:
|
|
93
|
+
- `api_key` from `AZURE_OPENAI_API_KEY`
|
|
94
|
+
- `organization` from `OPENAI_ORG_ID`
|
|
95
|
+
- `project` from `OPENAI_PROJECT_ID`
|
|
96
|
+
- `azure_ad_token` from `AZURE_OPENAI_AD_TOKEN`
|
|
97
|
+
- `api_version` from `OPENAI_API_VERSION`
|
|
98
|
+
- `azure_endpoint` from `AZURE_OPENAI_ENDPOINT`
|
|
99
|
+
- `azure_deployment` from `AZURE_OPENAI_DEPLOYMENT` (if not provided, uses `model` as deployment name)
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
azure_endpoint = azure_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT")
|
|
103
|
+
azure_deployment = azure_deployment or os.getenv("AZURE_OPENAI_DEPLOYMENT")
|
|
104
|
+
api_version = api_version or os.getenv("OPENAI_API_VERSION")
|
|
105
|
+
api_key = api_key or os.getenv("AZURE_OPENAI_API_KEY")
|
|
106
|
+
azure_ad_token = azure_ad_token or os.getenv("AZURE_OPENAI_AD_TOKEN")
|
|
107
|
+
organization = organization or os.getenv("OPENAI_ORG_ID")
|
|
108
|
+
project = project or os.getenv("OPENAI_PROJECT_ID")
|
|
109
|
+
|
|
110
|
+
if not azure_deployment:
|
|
111
|
+
azure_deployment = model
|
|
112
|
+
|
|
113
|
+
if not azure_endpoint:
|
|
114
|
+
raise ValueError("Azure endpoint must be provided either through azure_endpoint parameter or AZURE_OPENAI_ENDPOINT environment variable")
|
|
115
|
+
|
|
116
|
+
if not api_key and not azure_ad_token:
|
|
117
|
+
raise ValueError("Either API key or Azure AD token must be provided")
|
|
118
|
+
|
|
119
|
+
azure_client = openai.AsyncAzureOpenAI(
|
|
120
|
+
max_retries=0,
|
|
121
|
+
azure_endpoint=azure_endpoint,
|
|
122
|
+
azure_deployment=azure_deployment,
|
|
123
|
+
api_version=api_version,
|
|
124
|
+
api_key=api_key,
|
|
125
|
+
azure_ad_token=azure_ad_token,
|
|
126
|
+
organization=organization,
|
|
127
|
+
project=project,
|
|
128
|
+
base_url=base_url,
|
|
129
|
+
timeout=timeout
|
|
130
|
+
if timeout
|
|
131
|
+
else httpx.Timeout(connect=15.0, read=5.0, write=5.0, pool=5.0),
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
instance = OpenAILLM(
|
|
135
|
+
model=model,
|
|
136
|
+
temperature=temperature,
|
|
137
|
+
tool_choice=tool_choice,
|
|
138
|
+
max_completion_tokens=max_completion_tokens,
|
|
139
|
+
)
|
|
140
|
+
instance._client = azure_client
|
|
141
|
+
return instance
|
|
142
|
+
|
|
143
|
+
async def chat(
|
|
144
|
+
self,
|
|
145
|
+
messages: ChatContext,
|
|
146
|
+
tools: list[FunctionTool] | None = None,
|
|
147
|
+
**kwargs: Any
|
|
148
|
+
) -> AsyncIterator[LLMResponse]:
|
|
149
|
+
"""
|
|
150
|
+
Implement chat functionality using OpenAI's chat completion API
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
messages: ChatContext containing conversation history
|
|
154
|
+
tools: Optional list of function tools available to the model
|
|
155
|
+
**kwargs: Additional arguments passed to the OpenAI API
|
|
156
|
+
|
|
157
|
+
Yields:
|
|
158
|
+
LLMResponse objects containing the model's responses
|
|
159
|
+
"""
|
|
160
|
+
self._cancelled = False
|
|
161
|
+
|
|
162
|
+
def _format_content(content: Union[str, List[ChatContent]]):
|
|
163
|
+
if isinstance(content, str):
|
|
164
|
+
return content
|
|
165
|
+
|
|
166
|
+
formatted_parts = []
|
|
167
|
+
for part in content:
|
|
168
|
+
if isinstance(part, str):
|
|
169
|
+
formatted_parts.append({"type": "text", "text": part})
|
|
170
|
+
elif isinstance(part, ImageContent):
|
|
171
|
+
image_url_data = {"url": part.to_data_url()}
|
|
172
|
+
if part.inference_detail != "auto":
|
|
173
|
+
image_url_data["detail"] = part.inference_detail
|
|
174
|
+
formatted_parts.append(
|
|
175
|
+
{
|
|
176
|
+
"type": "image_url",
|
|
177
|
+
"image_url": image_url_data,
|
|
178
|
+
}
|
|
179
|
+
)
|
|
180
|
+
return formatted_parts
|
|
181
|
+
|
|
182
|
+
completion_params = {
|
|
183
|
+
"model": self.model,
|
|
184
|
+
"messages": [
|
|
185
|
+
{
|
|
186
|
+
"role": msg.role.value,
|
|
187
|
+
"content": _format_content(msg.content),
|
|
188
|
+
**({"name": msg.name} if hasattr(msg, "name") else {}),
|
|
189
|
+
}
|
|
190
|
+
if isinstance(msg, ChatMessage)
|
|
191
|
+
else {
|
|
192
|
+
"role": "assistant",
|
|
193
|
+
"content": None,
|
|
194
|
+
"function_call": {"name": msg.name, "arguments": msg.arguments},
|
|
195
|
+
}
|
|
196
|
+
if isinstance(msg, FunctionCall)
|
|
197
|
+
else {
|
|
198
|
+
"role": "function",
|
|
199
|
+
"name": msg.name,
|
|
200
|
+
"content": msg.output,
|
|
201
|
+
}
|
|
202
|
+
if isinstance(msg, FunctionCallOutput)
|
|
203
|
+
else None
|
|
204
|
+
for msg in messages.items
|
|
205
|
+
if msg is not None
|
|
206
|
+
],
|
|
207
|
+
"temperature": self.temperature,
|
|
208
|
+
"stream": True,
|
|
209
|
+
"max_tokens": self.max_completion_tokens,
|
|
210
|
+
}
|
|
211
|
+
if tools:
|
|
212
|
+
formatted_tools = []
|
|
213
|
+
for tool in tools:
|
|
214
|
+
if not is_function_tool(tool):
|
|
215
|
+
continue
|
|
216
|
+
try:
|
|
217
|
+
tool_schema = build_openai_schema(tool)
|
|
218
|
+
formatted_tools.append(tool_schema)
|
|
219
|
+
except Exception as e:
|
|
220
|
+
self.emit("error", f"Failed to format tool {tool}: {e}")
|
|
221
|
+
continue
|
|
222
|
+
|
|
223
|
+
if formatted_tools:
|
|
224
|
+
completion_params["functions"] = formatted_tools
|
|
225
|
+
completion_params["function_call"] = self.tool_choice
|
|
226
|
+
completion_params.update(kwargs)
|
|
227
|
+
try:
|
|
228
|
+
response_stream = await self._client.chat.completions.create(**completion_params)
|
|
229
|
+
|
|
230
|
+
current_content = ""
|
|
231
|
+
current_function_call = None
|
|
232
|
+
|
|
233
|
+
async for chunk in response_stream:
|
|
234
|
+
if self._cancelled:
|
|
235
|
+
break
|
|
236
|
+
|
|
237
|
+
if not chunk.choices:
|
|
238
|
+
continue
|
|
239
|
+
|
|
240
|
+
delta = chunk.choices[0].delta
|
|
241
|
+
if delta.function_call:
|
|
242
|
+
if current_function_call is None:
|
|
243
|
+
current_function_call = {
|
|
244
|
+
"name": delta.function_call.name or "",
|
|
245
|
+
"arguments": delta.function_call.arguments or ""
|
|
246
|
+
}
|
|
247
|
+
else:
|
|
248
|
+
if delta.function_call.name:
|
|
249
|
+
current_function_call["name"] += delta.function_call.name
|
|
250
|
+
if delta.function_call.arguments:
|
|
251
|
+
current_function_call["arguments"] += delta.function_call.arguments
|
|
252
|
+
elif current_function_call is not None:
|
|
253
|
+
try:
|
|
254
|
+
args = json.loads(current_function_call["arguments"])
|
|
255
|
+
current_function_call["arguments"] = args
|
|
256
|
+
except json.JSONDecodeError:
|
|
257
|
+
self.emit("error", f"Failed to parse function arguments: {current_function_call['arguments']}")
|
|
258
|
+
current_function_call["arguments"] = {}
|
|
259
|
+
|
|
260
|
+
yield LLMResponse(
|
|
261
|
+
content="",
|
|
262
|
+
role=ChatRole.ASSISTANT,
|
|
263
|
+
metadata={"function_call": current_function_call}
|
|
264
|
+
)
|
|
265
|
+
current_function_call = None
|
|
266
|
+
|
|
267
|
+
elif delta.content is not None:
|
|
268
|
+
current_content = delta.content
|
|
269
|
+
yield LLMResponse(
|
|
270
|
+
content=current_content,
|
|
271
|
+
role=ChatRole.ASSISTANT
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
except Exception as e:
|
|
275
|
+
if not self._cancelled:
|
|
276
|
+
self.emit("error", e)
|
|
277
|
+
raise
|
|
278
|
+
|
|
279
|
+
async def cancel_current_generation(self) -> None:
|
|
280
|
+
self._cancelled = True
|
|
281
|
+
|
|
282
|
+
async def aclose(self) -> None:
|
|
283
|
+
"""Cleanup resources by closing the HTTP client"""
|
|
284
|
+
await self.cancel_current_generation()
|
|
285
|
+
if self._client:
|
|
286
|
+
await self._client.close()
|
|
287
|
+
await super().aclose()
|