videosdk-plugins-openai 0.0.9__tar.gz → 0.0.52__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ myenv/
2
+ venv/
3
+ env/
4
+ __pycache__/
5
+ .venv/
6
+ .env
7
+ .env.local
8
+ test_env/
9
+ dist/
10
+ .DS_Store
11
+ node_modules/
12
+ credentials.json
13
+ .Python
14
+ build/
15
+ eggs/
16
+ sdist/
17
+ wheels/
18
+ docs/
19
+ agent-sdk-reference/
@@ -1,8 +1,9 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: videosdk-plugins-openai
3
- Version: 0.0.9
3
+ Version: 0.0.52
4
4
  Summary: VideoSDK Agent Framework plugin for OpenAI services
5
5
  Author: videosdk
6
+ License-Expression: Apache-2.0
6
7
  Keywords: ai,audio,openai,video,videosdk
7
8
  Classifier: Development Status :: 4 - Beta
8
9
  Classifier: Intended Audience :: Developers
@@ -12,12 +13,12 @@ Classifier: Topic :: Multimedia :: Video
12
13
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
13
14
  Requires-Python: >=3.11
14
15
  Requires-Dist: openai[realtime]>=1.68.2
15
- Requires-Dist: videosdk-agents>=0.0.15
16
+ Requires-Dist: videosdk-agents>=0.0.52
16
17
  Description-Content-Type: text/markdown
17
18
 
18
- VideoSDK OpenAI Plugin
19
+ # VideoSDK OpenAI Plugin
19
20
 
20
- Agent Framework plugin for realtime services from OpenAI.
21
+ Agent Framework plugin for realtime, LLM, STT and TTS services from OpenAI.
21
22
 
22
23
  ## Installation
23
24
 
@@ -0,0 +1,9 @@
1
+ # VideoSDK OpenAI Plugin
2
+
3
+ Agent Framework plugin for realtime, LLM, STT and TTS services from OpenAI.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install videosdk-plugins-openai
9
+ ```
@@ -7,8 +7,9 @@ name = "videosdk-plugins-openai"
7
7
  dynamic = ["version"]
8
8
  description = "VideoSDK Agent Framework plugin for OpenAI services"
9
9
  readme = "README.md"
10
+ license = "Apache-2.0"
10
11
  requires-python = ">=3.11"
11
- authors = [{ name = "videosdk"}]
12
+ authors = [{ name = "videosdk" }]
12
13
  keywords = ["video", "audio", "ai", "openai", "videosdk"]
13
14
  classifiers = [
14
15
  "Intended Audience :: Developers",
@@ -19,10 +20,7 @@ classifiers = [
19
20
  "Topic :: Multimedia :: Video",
20
21
  "Topic :: Scientific/Engineering :: Artificial Intelligence",
21
22
  ]
22
- dependencies = [
23
- "videosdk-agents>=0.0.15",
24
- "openai[realtime]>=1.68.2",
25
- ]
23
+ dependencies = ["videosdk-agents>=0.0.52", "openai[realtime]>=1.68.2"]
26
24
 
27
25
  [tool.hatch.version]
28
26
  path = "videosdk/plugins/openai/version.py"
@@ -0,0 +1,341 @@
1
+ from __future__ import annotations
2
+ import os
3
+ from typing import Any, AsyncIterator, List, Union
4
+ import json
5
+
6
+ import httpx
7
+ import openai
8
+ from videosdk.agents import (
9
+ LLM,
10
+ LLMResponse,
11
+ ChatContext,
12
+ ChatRole,
13
+ ChatMessage,
14
+ FunctionCall,
15
+ FunctionCallOutput,
16
+ ToolChoice,
17
+ FunctionTool,
18
+ is_function_tool,
19
+ build_openai_schema,
20
+ ConversationalGraphResponse
21
+ )
22
+ from videosdk.agents.llm.chat_context import ChatContent, ImageContent
23
+
24
+
25
+ def prepare_strict_schema(schema_dict):
26
+ if isinstance(schema_dict, dict):
27
+ if schema_dict.get("type") == "object":
28
+ schema_dict["additionalProperties"] = False
29
+ if "properties" in schema_dict:
30
+ all_props = list(schema_dict["properties"].keys())
31
+ schema_dict["required"] = all_props
32
+
33
+ for key, value in schema_dict.items():
34
+ if isinstance(value, dict):
35
+ prepare_strict_schema(value)
36
+ elif isinstance(value, list):
37
+ for item in value:
38
+ if isinstance(item, dict):
39
+ prepare_strict_schema(item)
40
+ return schema_dict
41
+
42
+
43
+ conversational_graph_schema = prepare_strict_schema(ConversationalGraphResponse.model_json_schema())
44
+
45
+ class OpenAILLM(LLM):
46
+
47
+ def __init__(
48
+ self,
49
+ *,
50
+ api_key: str | None = None,
51
+ model: str = "gpt-4o-mini",
52
+ base_url: str | None = None,
53
+ temperature: float = 0.7,
54
+ tool_choice: ToolChoice = "auto",
55
+ max_completion_tokens: int | None = None,
56
+ ) -> None:
57
+ """Initialize the OpenAI LLM plugin.
58
+
59
+ Args:
60
+ api_key (Optional[str], optional): OpenAI API key. Defaults to None.
61
+ model (str): The model to use for the LLM plugin. Defaults to "gpt-4o".
62
+ base_url (Optional[str], optional): The base URL for the OpenAI API. Defaults to None.
63
+ temperature (float): The temperature to use for the LLM plugin. Defaults to 0.7.
64
+ tool_choice (ToolChoice): The tool choice to use for the LLM plugin. Defaults to "auto".
65
+ max_completion_tokens (Optional[int], optional): The maximum completion tokens to use for the LLM plugin. Defaults to None.
66
+ """
67
+ super().__init__()
68
+ self.api_key = api_key or os.getenv("OPENAI_API_KEY")
69
+ if not self.api_key:
70
+ raise ValueError("OpenAI API key must be provided either through api_key parameter or OPENAI_API_KEY environment variable")
71
+
72
+ self.model = model
73
+ self.temperature = temperature
74
+ self.tool_choice = tool_choice
75
+ self.max_completion_tokens = max_completion_tokens
76
+ self._cancelled = False
77
+
78
+ self._client = openai.AsyncOpenAI(
79
+ api_key=self.api_key,
80
+ base_url=base_url or None,
81
+ max_retries=0,
82
+ http_client=httpx.AsyncClient(
83
+ timeout=httpx.Timeout(connect=15.0, read=5.0, write=5.0, pool=5.0),
84
+ follow_redirects=True,
85
+ limits=httpx.Limits(
86
+ max_connections=50,
87
+ max_keepalive_connections=50,
88
+ keepalive_expiry=120,
89
+ ),
90
+ ),
91
+ )
92
+
93
+ @staticmethod
94
+ def azure(
95
+ *,
96
+ model: str = "gpt-4o-mini",
97
+ azure_endpoint: str | None = None,
98
+ azure_deployment: str | None = None,
99
+ api_version: str | None = None,
100
+ api_key: str | None = None,
101
+ azure_ad_token: str | None = None,
102
+ organization: str | None = None,
103
+ project: str | None = None,
104
+ base_url: str | None = None,
105
+ temperature: float = 0.7,
106
+ tool_choice: ToolChoice = "auto",
107
+ max_completion_tokens: int | None = None,
108
+ timeout: httpx.Timeout | None = None,
109
+ ) -> "OpenAILLM":
110
+ """
111
+ Create a new instance of Azure OpenAI LLM.
112
+
113
+ This automatically infers the following arguments from their corresponding environment variables if they are not provided:
114
+ - `api_key` from `AZURE_OPENAI_API_KEY`
115
+ - `organization` from `OPENAI_ORG_ID`
116
+ - `project` from `OPENAI_PROJECT_ID`
117
+ - `azure_ad_token` from `AZURE_OPENAI_AD_TOKEN`
118
+ - `api_version` from `OPENAI_API_VERSION`
119
+ - `azure_endpoint` from `AZURE_OPENAI_ENDPOINT`
120
+ - `azure_deployment` from `AZURE_OPENAI_DEPLOYMENT` (if not provided, uses `model` as deployment name)
121
+ """
122
+
123
+ azure_endpoint = azure_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT")
124
+ azure_deployment = azure_deployment or os.getenv("AZURE_OPENAI_DEPLOYMENT")
125
+ api_version = api_version or os.getenv("OPENAI_API_VERSION")
126
+ api_key = api_key or os.getenv("AZURE_OPENAI_API_KEY")
127
+ azure_ad_token = azure_ad_token or os.getenv("AZURE_OPENAI_AD_TOKEN")
128
+ organization = organization or os.getenv("OPENAI_ORG_ID")
129
+ project = project or os.getenv("OPENAI_PROJECT_ID")
130
+
131
+ if not azure_deployment:
132
+ azure_deployment = model
133
+
134
+ if not azure_endpoint:
135
+ raise ValueError("Azure endpoint must be provided either through azure_endpoint parameter or AZURE_OPENAI_ENDPOINT environment variable")
136
+
137
+ if not api_key and not azure_ad_token:
138
+ raise ValueError("Either API key or Azure AD token must be provided")
139
+
140
+ azure_client = openai.AsyncAzureOpenAI(
141
+ max_retries=0,
142
+ azure_endpoint=azure_endpoint,
143
+ azure_deployment=azure_deployment,
144
+ api_version=api_version,
145
+ api_key=api_key,
146
+ azure_ad_token=azure_ad_token,
147
+ organization=organization,
148
+ project=project,
149
+ base_url=base_url,
150
+ timeout=timeout
151
+ if timeout
152
+ else httpx.Timeout(connect=15.0, read=5.0, write=5.0, pool=5.0),
153
+ )
154
+
155
+ instance = OpenAILLM(
156
+ model=model,
157
+ temperature=temperature,
158
+ tool_choice=tool_choice,
159
+ max_completion_tokens=max_completion_tokens,
160
+ )
161
+ instance._client = azure_client
162
+ return instance
163
+
164
+ async def chat(
165
+ self,
166
+ messages: ChatContext,
167
+ tools: list[FunctionTool] | None = None,
168
+ conversational_graph: Any | None = None,
169
+ **kwargs: Any
170
+ ) -> AsyncIterator[LLMResponse]:
171
+ """
172
+ Implement chat functionality using OpenAI's chat completion API
173
+
174
+ Args:
175
+ messages: ChatContext containing conversation history
176
+ tools: Optional list of function tools available to the model
177
+ **kwargs: Additional arguments passed to the OpenAI API
178
+
179
+ Yields:
180
+ LLMResponse objects containing the model's responses
181
+ """
182
+ self._cancelled = False
183
+
184
+ def _format_content(content: Union[str, List[ChatContent]]):
185
+ if isinstance(content, str):
186
+ return content
187
+
188
+ formatted_parts = []
189
+ for part in content:
190
+ if isinstance(part, str):
191
+ formatted_parts.append({"type": "text", "text": part})
192
+ elif isinstance(part, ImageContent):
193
+ image_url_data = {"url": part.to_data_url()}
194
+ if part.inference_detail != "auto":
195
+ image_url_data["detail"] = part.inference_detail
196
+ formatted_parts.append(
197
+ {
198
+ "type": "image_url",
199
+ "image_url": image_url_data,
200
+ }
201
+ )
202
+ return formatted_parts
203
+
204
+ completion_params = {
205
+ "model": self.model,
206
+ "messages": [
207
+ {
208
+ "role": msg.role.value,
209
+ "content": _format_content(msg.content),
210
+ **({"name": msg.name} if hasattr(msg, "name") else {}),
211
+ }
212
+ if isinstance(msg, ChatMessage)
213
+ else {
214
+ "role": "assistant",
215
+ "content": None,
216
+ "function_call": {"name": msg.name, "arguments": msg.arguments},
217
+ }
218
+ if isinstance(msg, FunctionCall)
219
+ else {
220
+ "role": "function",
221
+ "name": msg.name,
222
+ "content": msg.output,
223
+ }
224
+ if isinstance(msg, FunctionCallOutput)
225
+ else None
226
+ for msg in messages.items
227
+ if msg is not None
228
+ ],
229
+ "temperature": self.temperature,
230
+ "stream": True,
231
+ "max_tokens": self.max_completion_tokens,
232
+ }
233
+
234
+ if conversational_graph:
235
+ completion_params["response_format"] = {
236
+ "type": "json_schema",
237
+ "json_schema": {
238
+ "name": "conversational_graph_response",
239
+ "strict": True,
240
+ "schema": conversational_graph_schema
241
+ }
242
+ }
243
+ if tools:
244
+ formatted_tools = []
245
+ for tool in tools:
246
+ if not is_function_tool(tool):
247
+ continue
248
+ try:
249
+ tool_schema = build_openai_schema(tool)
250
+ formatted_tools.append(tool_schema)
251
+ except Exception as e:
252
+ self.emit("error", f"Failed to format tool {tool}: {e}")
253
+ continue
254
+
255
+ if formatted_tools:
256
+ completion_params["functions"] = formatted_tools
257
+ completion_params["function_call"] = self.tool_choice
258
+ completion_params.update(kwargs)
259
+ try:
260
+ response_stream = await self._client.chat.completions.create(**completion_params)
261
+ current_content = ""
262
+ current_function_call = None
263
+ streaming_state = {
264
+ "in_response": False,
265
+ "response_start_index": -1,
266
+ "yielded_content_length": 0
267
+ }
268
+
269
+ async for chunk in response_stream:
270
+ if self._cancelled:
271
+ break
272
+
273
+ if not chunk.choices:
274
+ continue
275
+
276
+ delta = chunk.choices[0].delta
277
+ if delta.function_call:
278
+ if current_function_call is None:
279
+ current_function_call = {
280
+ "name": delta.function_call.name or "",
281
+ "arguments": delta.function_call.arguments or ""
282
+ }
283
+ else:
284
+ if delta.function_call.name:
285
+ current_function_call["name"] += delta.function_call.name
286
+ if delta.function_call.arguments:
287
+ current_function_call["arguments"] += delta.function_call.arguments
288
+ elif current_function_call is not None:
289
+ try:
290
+ args = json.loads(current_function_call["arguments"])
291
+ current_function_call["arguments"] = args
292
+ except json.JSONDecodeError:
293
+ self.emit("error", f"Failed to parse function arguments: {current_function_call['arguments']}")
294
+ current_function_call["arguments"] = {}
295
+
296
+ yield LLMResponse(
297
+ content="",
298
+ role=ChatRole.ASSISTANT,
299
+ metadata={"function_call": current_function_call}
300
+ )
301
+ current_function_call = None
302
+
303
+ elif delta.content is not None:
304
+ current_content += delta.content
305
+ if conversational_graph:
306
+ for content_chunk in conversational_graph.stream_conversational_graph_response(current_content, streaming_state):
307
+ yield LLMResponse(content=content_chunk, role=ChatRole.ASSISTANT)
308
+ else:
309
+ yield LLMResponse(content=delta.content, role=ChatRole.ASSISTANT)
310
+
311
+ if current_content and not self._cancelled:
312
+ if conversational_graph:
313
+ try:
314
+ parsed_json = json.loads(current_content.strip())
315
+ yield LLMResponse(
316
+ content="",
317
+ role=ChatRole.ASSISTANT,
318
+ metadata=parsed_json
319
+ )
320
+ except json.JSONDecodeError:
321
+ yield LLMResponse(
322
+ content=current_content,
323
+ role=ChatRole.ASSISTANT
324
+ )
325
+ else:
326
+ pass
327
+
328
+ except Exception as e:
329
+ if not self._cancelled:
330
+ self.emit("error", e)
331
+ raise
332
+
333
+ async def cancel_current_generation(self) -> None:
334
+ self._cancelled = True
335
+
336
+ async def aclose(self) -> None:
337
+ """Cleanup resources by closing the HTTP client"""
338
+ await self.cancel_current_generation()
339
+ if self._client:
340
+ await self._client.close()
341
+ await super().aclose()