videosdk-plugins-openai 0.0.9__tar.gz → 0.0.52__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- videosdk_plugins_openai-0.0.52/.gitignore +19 -0
- {videosdk_plugins_openai-0.0.9 → videosdk_plugins_openai-0.0.52}/PKG-INFO +5 -4
- videosdk_plugins_openai-0.0.52/README.md +9 -0
- {videosdk_plugins_openai-0.0.9 → videosdk_plugins_openai-0.0.52}/pyproject.toml +3 -5
- videosdk_plugins_openai-0.0.52/videosdk/plugins/openai/llm.py +341 -0
- {videosdk_plugins_openai-0.0.9 → videosdk_plugins_openai-0.0.52}/videosdk/plugins/openai/realtime_api.py +363 -170
- {videosdk_plugins_openai-0.0.9 → videosdk_plugins_openai-0.0.52}/videosdk/plugins/openai/stt.py +187 -9
- videosdk_plugins_openai-0.0.52/videosdk/plugins/openai/tts.py +246 -0
- videosdk_plugins_openai-0.0.52/videosdk/plugins/openai/version.py +1 -0
- videosdk_plugins_openai-0.0.9/.gitignore +0 -10
- videosdk_plugins_openai-0.0.9/README.md +0 -9
- videosdk_plugins_openai-0.0.9/videosdk/plugins/openai/llm.py +0 -161
- videosdk_plugins_openai-0.0.9/videosdk/plugins/openai/tts.py +0 -109
- videosdk_plugins_openai-0.0.9/videosdk/plugins/openai/version.py +0 -1
- {videosdk_plugins_openai-0.0.9 → videosdk_plugins_openai-0.0.52}/videosdk/plugins/openai/__init__.py +0 -0
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: videosdk-plugins-openai
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.52
|
|
4
4
|
Summary: VideoSDK Agent Framework plugin for OpenAI services
|
|
5
5
|
Author: videosdk
|
|
6
|
+
License-Expression: Apache-2.0
|
|
6
7
|
Keywords: ai,audio,openai,video,videosdk
|
|
7
8
|
Classifier: Development Status :: 4 - Beta
|
|
8
9
|
Classifier: Intended Audience :: Developers
|
|
@@ -12,12 +13,12 @@ Classifier: Topic :: Multimedia :: Video
|
|
|
12
13
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
13
14
|
Requires-Python: >=3.11
|
|
14
15
|
Requires-Dist: openai[realtime]>=1.68.2
|
|
15
|
-
Requires-Dist: videosdk-agents>=0.0.
|
|
16
|
+
Requires-Dist: videosdk-agents>=0.0.52
|
|
16
17
|
Description-Content-Type: text/markdown
|
|
17
18
|
|
|
18
|
-
VideoSDK OpenAI Plugin
|
|
19
|
+
# VideoSDK OpenAI Plugin
|
|
19
20
|
|
|
20
|
-
Agent Framework plugin for realtime services from OpenAI.
|
|
21
|
+
Agent Framework plugin for realtime, LLM, STT and TTS services from OpenAI.
|
|
21
22
|
|
|
22
23
|
## Installation
|
|
23
24
|
|
|
@@ -7,8 +7,9 @@ name = "videosdk-plugins-openai"
|
|
|
7
7
|
dynamic = ["version"]
|
|
8
8
|
description = "VideoSDK Agent Framework plugin for OpenAI services"
|
|
9
9
|
readme = "README.md"
|
|
10
|
+
license = "Apache-2.0"
|
|
10
11
|
requires-python = ">=3.11"
|
|
11
|
-
authors = [{ name = "videosdk"}]
|
|
12
|
+
authors = [{ name = "videosdk" }]
|
|
12
13
|
keywords = ["video", "audio", "ai", "openai", "videosdk"]
|
|
13
14
|
classifiers = [
|
|
14
15
|
"Intended Audience :: Developers",
|
|
@@ -19,10 +20,7 @@ classifiers = [
|
|
|
19
20
|
"Topic :: Multimedia :: Video",
|
|
20
21
|
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
21
22
|
]
|
|
22
|
-
dependencies = [
|
|
23
|
-
"videosdk-agents>=0.0.15",
|
|
24
|
-
"openai[realtime]>=1.68.2",
|
|
25
|
-
]
|
|
23
|
+
dependencies = ["videosdk-agents>=0.0.52", "openai[realtime]>=1.68.2"]
|
|
26
24
|
|
|
27
25
|
[tool.hatch.version]
|
|
28
26
|
path = "videosdk/plugins/openai/version.py"
|
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import os
|
|
3
|
+
from typing import Any, AsyncIterator, List, Union
|
|
4
|
+
import json
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
import openai
|
|
8
|
+
from videosdk.agents import (
|
|
9
|
+
LLM,
|
|
10
|
+
LLMResponse,
|
|
11
|
+
ChatContext,
|
|
12
|
+
ChatRole,
|
|
13
|
+
ChatMessage,
|
|
14
|
+
FunctionCall,
|
|
15
|
+
FunctionCallOutput,
|
|
16
|
+
ToolChoice,
|
|
17
|
+
FunctionTool,
|
|
18
|
+
is_function_tool,
|
|
19
|
+
build_openai_schema,
|
|
20
|
+
ConversationalGraphResponse
|
|
21
|
+
)
|
|
22
|
+
from videosdk.agents.llm.chat_context import ChatContent, ImageContent
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def prepare_strict_schema(schema_dict):
|
|
26
|
+
if isinstance(schema_dict, dict):
|
|
27
|
+
if schema_dict.get("type") == "object":
|
|
28
|
+
schema_dict["additionalProperties"] = False
|
|
29
|
+
if "properties" in schema_dict:
|
|
30
|
+
all_props = list(schema_dict["properties"].keys())
|
|
31
|
+
schema_dict["required"] = all_props
|
|
32
|
+
|
|
33
|
+
for key, value in schema_dict.items():
|
|
34
|
+
if isinstance(value, dict):
|
|
35
|
+
prepare_strict_schema(value)
|
|
36
|
+
elif isinstance(value, list):
|
|
37
|
+
for item in value:
|
|
38
|
+
if isinstance(item, dict):
|
|
39
|
+
prepare_strict_schema(item)
|
|
40
|
+
return schema_dict
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
conversational_graph_schema = prepare_strict_schema(ConversationalGraphResponse.model_json_schema())
|
|
44
|
+
|
|
45
|
+
class OpenAILLM(LLM):
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
*,
|
|
50
|
+
api_key: str | None = None,
|
|
51
|
+
model: str = "gpt-4o-mini",
|
|
52
|
+
base_url: str | None = None,
|
|
53
|
+
temperature: float = 0.7,
|
|
54
|
+
tool_choice: ToolChoice = "auto",
|
|
55
|
+
max_completion_tokens: int | None = None,
|
|
56
|
+
) -> None:
|
|
57
|
+
"""Initialize the OpenAI LLM plugin.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
api_key (Optional[str], optional): OpenAI API key. Defaults to None.
|
|
61
|
+
model (str): The model to use for the LLM plugin. Defaults to "gpt-4o".
|
|
62
|
+
base_url (Optional[str], optional): The base URL for the OpenAI API. Defaults to None.
|
|
63
|
+
temperature (float): The temperature to use for the LLM plugin. Defaults to 0.7.
|
|
64
|
+
tool_choice (ToolChoice): The tool choice to use for the LLM plugin. Defaults to "auto".
|
|
65
|
+
max_completion_tokens (Optional[int], optional): The maximum completion tokens to use for the LLM plugin. Defaults to None.
|
|
66
|
+
"""
|
|
67
|
+
super().__init__()
|
|
68
|
+
self.api_key = api_key or os.getenv("OPENAI_API_KEY")
|
|
69
|
+
if not self.api_key:
|
|
70
|
+
raise ValueError("OpenAI API key must be provided either through api_key parameter or OPENAI_API_KEY environment variable")
|
|
71
|
+
|
|
72
|
+
self.model = model
|
|
73
|
+
self.temperature = temperature
|
|
74
|
+
self.tool_choice = tool_choice
|
|
75
|
+
self.max_completion_tokens = max_completion_tokens
|
|
76
|
+
self._cancelled = False
|
|
77
|
+
|
|
78
|
+
self._client = openai.AsyncOpenAI(
|
|
79
|
+
api_key=self.api_key,
|
|
80
|
+
base_url=base_url or None,
|
|
81
|
+
max_retries=0,
|
|
82
|
+
http_client=httpx.AsyncClient(
|
|
83
|
+
timeout=httpx.Timeout(connect=15.0, read=5.0, write=5.0, pool=5.0),
|
|
84
|
+
follow_redirects=True,
|
|
85
|
+
limits=httpx.Limits(
|
|
86
|
+
max_connections=50,
|
|
87
|
+
max_keepalive_connections=50,
|
|
88
|
+
keepalive_expiry=120,
|
|
89
|
+
),
|
|
90
|
+
),
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
@staticmethod
|
|
94
|
+
def azure(
|
|
95
|
+
*,
|
|
96
|
+
model: str = "gpt-4o-mini",
|
|
97
|
+
azure_endpoint: str | None = None,
|
|
98
|
+
azure_deployment: str | None = None,
|
|
99
|
+
api_version: str | None = None,
|
|
100
|
+
api_key: str | None = None,
|
|
101
|
+
azure_ad_token: str | None = None,
|
|
102
|
+
organization: str | None = None,
|
|
103
|
+
project: str | None = None,
|
|
104
|
+
base_url: str | None = None,
|
|
105
|
+
temperature: float = 0.7,
|
|
106
|
+
tool_choice: ToolChoice = "auto",
|
|
107
|
+
max_completion_tokens: int | None = None,
|
|
108
|
+
timeout: httpx.Timeout | None = None,
|
|
109
|
+
) -> "OpenAILLM":
|
|
110
|
+
"""
|
|
111
|
+
Create a new instance of Azure OpenAI LLM.
|
|
112
|
+
|
|
113
|
+
This automatically infers the following arguments from their corresponding environment variables if they are not provided:
|
|
114
|
+
- `api_key` from `AZURE_OPENAI_API_KEY`
|
|
115
|
+
- `organization` from `OPENAI_ORG_ID`
|
|
116
|
+
- `project` from `OPENAI_PROJECT_ID`
|
|
117
|
+
- `azure_ad_token` from `AZURE_OPENAI_AD_TOKEN`
|
|
118
|
+
- `api_version` from `OPENAI_API_VERSION`
|
|
119
|
+
- `azure_endpoint` from `AZURE_OPENAI_ENDPOINT`
|
|
120
|
+
- `azure_deployment` from `AZURE_OPENAI_DEPLOYMENT` (if not provided, uses `model` as deployment name)
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
azure_endpoint = azure_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT")
|
|
124
|
+
azure_deployment = azure_deployment or os.getenv("AZURE_OPENAI_DEPLOYMENT")
|
|
125
|
+
api_version = api_version or os.getenv("OPENAI_API_VERSION")
|
|
126
|
+
api_key = api_key or os.getenv("AZURE_OPENAI_API_KEY")
|
|
127
|
+
azure_ad_token = azure_ad_token or os.getenv("AZURE_OPENAI_AD_TOKEN")
|
|
128
|
+
organization = organization or os.getenv("OPENAI_ORG_ID")
|
|
129
|
+
project = project or os.getenv("OPENAI_PROJECT_ID")
|
|
130
|
+
|
|
131
|
+
if not azure_deployment:
|
|
132
|
+
azure_deployment = model
|
|
133
|
+
|
|
134
|
+
if not azure_endpoint:
|
|
135
|
+
raise ValueError("Azure endpoint must be provided either through azure_endpoint parameter or AZURE_OPENAI_ENDPOINT environment variable")
|
|
136
|
+
|
|
137
|
+
if not api_key and not azure_ad_token:
|
|
138
|
+
raise ValueError("Either API key or Azure AD token must be provided")
|
|
139
|
+
|
|
140
|
+
azure_client = openai.AsyncAzureOpenAI(
|
|
141
|
+
max_retries=0,
|
|
142
|
+
azure_endpoint=azure_endpoint,
|
|
143
|
+
azure_deployment=azure_deployment,
|
|
144
|
+
api_version=api_version,
|
|
145
|
+
api_key=api_key,
|
|
146
|
+
azure_ad_token=azure_ad_token,
|
|
147
|
+
organization=organization,
|
|
148
|
+
project=project,
|
|
149
|
+
base_url=base_url,
|
|
150
|
+
timeout=timeout
|
|
151
|
+
if timeout
|
|
152
|
+
else httpx.Timeout(connect=15.0, read=5.0, write=5.0, pool=5.0),
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
instance = OpenAILLM(
|
|
156
|
+
model=model,
|
|
157
|
+
temperature=temperature,
|
|
158
|
+
tool_choice=tool_choice,
|
|
159
|
+
max_completion_tokens=max_completion_tokens,
|
|
160
|
+
)
|
|
161
|
+
instance._client = azure_client
|
|
162
|
+
return instance
|
|
163
|
+
|
|
164
|
+
async def chat(
|
|
165
|
+
self,
|
|
166
|
+
messages: ChatContext,
|
|
167
|
+
tools: list[FunctionTool] | None = None,
|
|
168
|
+
conversational_graph: Any | None = None,
|
|
169
|
+
**kwargs: Any
|
|
170
|
+
) -> AsyncIterator[LLMResponse]:
|
|
171
|
+
"""
|
|
172
|
+
Implement chat functionality using OpenAI's chat completion API
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
messages: ChatContext containing conversation history
|
|
176
|
+
tools: Optional list of function tools available to the model
|
|
177
|
+
**kwargs: Additional arguments passed to the OpenAI API
|
|
178
|
+
|
|
179
|
+
Yields:
|
|
180
|
+
LLMResponse objects containing the model's responses
|
|
181
|
+
"""
|
|
182
|
+
self._cancelled = False
|
|
183
|
+
|
|
184
|
+
def _format_content(content: Union[str, List[ChatContent]]):
|
|
185
|
+
if isinstance(content, str):
|
|
186
|
+
return content
|
|
187
|
+
|
|
188
|
+
formatted_parts = []
|
|
189
|
+
for part in content:
|
|
190
|
+
if isinstance(part, str):
|
|
191
|
+
formatted_parts.append({"type": "text", "text": part})
|
|
192
|
+
elif isinstance(part, ImageContent):
|
|
193
|
+
image_url_data = {"url": part.to_data_url()}
|
|
194
|
+
if part.inference_detail != "auto":
|
|
195
|
+
image_url_data["detail"] = part.inference_detail
|
|
196
|
+
formatted_parts.append(
|
|
197
|
+
{
|
|
198
|
+
"type": "image_url",
|
|
199
|
+
"image_url": image_url_data,
|
|
200
|
+
}
|
|
201
|
+
)
|
|
202
|
+
return formatted_parts
|
|
203
|
+
|
|
204
|
+
completion_params = {
|
|
205
|
+
"model": self.model,
|
|
206
|
+
"messages": [
|
|
207
|
+
{
|
|
208
|
+
"role": msg.role.value,
|
|
209
|
+
"content": _format_content(msg.content),
|
|
210
|
+
**({"name": msg.name} if hasattr(msg, "name") else {}),
|
|
211
|
+
}
|
|
212
|
+
if isinstance(msg, ChatMessage)
|
|
213
|
+
else {
|
|
214
|
+
"role": "assistant",
|
|
215
|
+
"content": None,
|
|
216
|
+
"function_call": {"name": msg.name, "arguments": msg.arguments},
|
|
217
|
+
}
|
|
218
|
+
if isinstance(msg, FunctionCall)
|
|
219
|
+
else {
|
|
220
|
+
"role": "function",
|
|
221
|
+
"name": msg.name,
|
|
222
|
+
"content": msg.output,
|
|
223
|
+
}
|
|
224
|
+
if isinstance(msg, FunctionCallOutput)
|
|
225
|
+
else None
|
|
226
|
+
for msg in messages.items
|
|
227
|
+
if msg is not None
|
|
228
|
+
],
|
|
229
|
+
"temperature": self.temperature,
|
|
230
|
+
"stream": True,
|
|
231
|
+
"max_tokens": self.max_completion_tokens,
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
if conversational_graph:
|
|
235
|
+
completion_params["response_format"] = {
|
|
236
|
+
"type": "json_schema",
|
|
237
|
+
"json_schema": {
|
|
238
|
+
"name": "conversational_graph_response",
|
|
239
|
+
"strict": True,
|
|
240
|
+
"schema": conversational_graph_schema
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
if tools:
|
|
244
|
+
formatted_tools = []
|
|
245
|
+
for tool in tools:
|
|
246
|
+
if not is_function_tool(tool):
|
|
247
|
+
continue
|
|
248
|
+
try:
|
|
249
|
+
tool_schema = build_openai_schema(tool)
|
|
250
|
+
formatted_tools.append(tool_schema)
|
|
251
|
+
except Exception as e:
|
|
252
|
+
self.emit("error", f"Failed to format tool {tool}: {e}")
|
|
253
|
+
continue
|
|
254
|
+
|
|
255
|
+
if formatted_tools:
|
|
256
|
+
completion_params["functions"] = formatted_tools
|
|
257
|
+
completion_params["function_call"] = self.tool_choice
|
|
258
|
+
completion_params.update(kwargs)
|
|
259
|
+
try:
|
|
260
|
+
response_stream = await self._client.chat.completions.create(**completion_params)
|
|
261
|
+
current_content = ""
|
|
262
|
+
current_function_call = None
|
|
263
|
+
streaming_state = {
|
|
264
|
+
"in_response": False,
|
|
265
|
+
"response_start_index": -1,
|
|
266
|
+
"yielded_content_length": 0
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
async for chunk in response_stream:
|
|
270
|
+
if self._cancelled:
|
|
271
|
+
break
|
|
272
|
+
|
|
273
|
+
if not chunk.choices:
|
|
274
|
+
continue
|
|
275
|
+
|
|
276
|
+
delta = chunk.choices[0].delta
|
|
277
|
+
if delta.function_call:
|
|
278
|
+
if current_function_call is None:
|
|
279
|
+
current_function_call = {
|
|
280
|
+
"name": delta.function_call.name or "",
|
|
281
|
+
"arguments": delta.function_call.arguments or ""
|
|
282
|
+
}
|
|
283
|
+
else:
|
|
284
|
+
if delta.function_call.name:
|
|
285
|
+
current_function_call["name"] += delta.function_call.name
|
|
286
|
+
if delta.function_call.arguments:
|
|
287
|
+
current_function_call["arguments"] += delta.function_call.arguments
|
|
288
|
+
elif current_function_call is not None:
|
|
289
|
+
try:
|
|
290
|
+
args = json.loads(current_function_call["arguments"])
|
|
291
|
+
current_function_call["arguments"] = args
|
|
292
|
+
except json.JSONDecodeError:
|
|
293
|
+
self.emit("error", f"Failed to parse function arguments: {current_function_call['arguments']}")
|
|
294
|
+
current_function_call["arguments"] = {}
|
|
295
|
+
|
|
296
|
+
yield LLMResponse(
|
|
297
|
+
content="",
|
|
298
|
+
role=ChatRole.ASSISTANT,
|
|
299
|
+
metadata={"function_call": current_function_call}
|
|
300
|
+
)
|
|
301
|
+
current_function_call = None
|
|
302
|
+
|
|
303
|
+
elif delta.content is not None:
|
|
304
|
+
current_content += delta.content
|
|
305
|
+
if conversational_graph:
|
|
306
|
+
for content_chunk in conversational_graph.stream_conversational_graph_response(current_content, streaming_state):
|
|
307
|
+
yield LLMResponse(content=content_chunk, role=ChatRole.ASSISTANT)
|
|
308
|
+
else:
|
|
309
|
+
yield LLMResponse(content=delta.content, role=ChatRole.ASSISTANT)
|
|
310
|
+
|
|
311
|
+
if current_content and not self._cancelled:
|
|
312
|
+
if conversational_graph:
|
|
313
|
+
try:
|
|
314
|
+
parsed_json = json.loads(current_content.strip())
|
|
315
|
+
yield LLMResponse(
|
|
316
|
+
content="",
|
|
317
|
+
role=ChatRole.ASSISTANT,
|
|
318
|
+
metadata=parsed_json
|
|
319
|
+
)
|
|
320
|
+
except json.JSONDecodeError:
|
|
321
|
+
yield LLMResponse(
|
|
322
|
+
content=current_content,
|
|
323
|
+
role=ChatRole.ASSISTANT
|
|
324
|
+
)
|
|
325
|
+
else:
|
|
326
|
+
pass
|
|
327
|
+
|
|
328
|
+
except Exception as e:
|
|
329
|
+
if not self._cancelled:
|
|
330
|
+
self.emit("error", e)
|
|
331
|
+
raise
|
|
332
|
+
|
|
333
|
+
async def cancel_current_generation(self) -> None:
|
|
334
|
+
self._cancelled = True
|
|
335
|
+
|
|
336
|
+
async def aclose(self) -> None:
|
|
337
|
+
"""Cleanup resources by closing the HTTP client"""
|
|
338
|
+
await self.cancel_current_generation()
|
|
339
|
+
if self._client:
|
|
340
|
+
await self._client.close()
|
|
341
|
+
await super().aclose()
|