videosdk-plugins-openai 0.0.20__py3-none-any.whl → 0.0.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of videosdk-plugins-openai might be problematic. Click here for more details.

@@ -45,9 +45,9 @@ DEFAULT_INPUT_AUDIO_TRANSCRIPTION = InputAudioTranscription(
45
45
  DEFAULT_TOOL_CHOICE = "auto"
46
46
 
47
47
  OpenAIEventTypes = Literal[
48
- "instructions_updated",
49
- "tools_updated",
50
- "text_response"
48
+ "user_speech_started",
49
+ "text_response",
50
+ "error"
51
51
  ]
52
52
  DEFAULT_VOICE = "alloy"
53
53
  DEFAULT_INPUT_AUDIO_FORMAT = "pcm16"
@@ -305,11 +305,12 @@ class OpenAIRealtime(RealtimeBaseModel[OpenAIEventTypes]):
305
305
  await self._handle_text_done(data)
306
306
 
307
307
  except Exception as e:
308
- self.emit_error(f"Error handling event {event_type}: {str(e)}")
308
+ self.emit("error", f"Error handling event {event_type}: {str(e)}")
309
309
 
310
310
  async def _handle_speech_started(self, data: dict) -> None:
311
311
  """Handle speech detection start"""
312
312
  if "audio" in self.config.modalities:
313
+ self.emit("user_speech_started", {"type": "done"})
313
314
  await self.interrupt()
314
315
  if self.audio_track:
315
316
  self.audio_track.interrupt()
@@ -4,6 +4,7 @@ from typing import Any, AsyncIterator, Literal, Optional, Union
4
4
  import httpx
5
5
  import os
6
6
  import openai
7
+ import asyncio
7
8
 
8
9
  from videosdk.agents import TTS
9
10
 
@@ -81,6 +82,7 @@ class OpenAITTS(TTS):
81
82
  self.emit("error", "Audio track or event loop not set")
82
83
  return
83
84
 
85
+ audio_data = b""
84
86
  async with self._client.audio.speech.with_streaming_response.create(
85
87
  model=self.model,
86
88
  voice=voice_id or self.voice,
@@ -91,13 +93,32 @@ class OpenAITTS(TTS):
91
93
  ) as response:
92
94
  async for chunk in response.iter_bytes():
93
95
  if chunk:
94
- self.loop.create_task(self.audio_track.add_new_bytes(chunk))
96
+ audio_data += chunk
97
+
98
+
99
+ if audio_data:
100
+ await self._stream_audio_chunks(audio_data)
95
101
 
96
102
  except openai.APIError as e:
97
103
  self.emit("error", str(e))
98
104
  except Exception as e:
99
105
  self.emit("error", f"TTS synthesis failed: {str(e)}")
100
106
 
107
+ async def _stream_audio_chunks(self, audio_bytes: bytes) -> None:
108
+ """Stream audio data in chunks for smooth playback"""
109
+ chunk_size = int(OPENAI_TTS_SAMPLE_RATE * OPENAI_TTS_CHANNELS * 2 * 20 / 1000)
110
+
111
+ for i in range(0, len(audio_bytes), chunk_size):
112
+ chunk = audio_bytes[i:i + chunk_size]
113
+
114
+ if len(chunk) < chunk_size and len(chunk) > 0:
115
+ padding_needed = chunk_size - len(chunk)
116
+ chunk += b'\x00' * padding_needed
117
+
118
+ if len(chunk) == chunk_size:
119
+ self.loop.create_task(self.audio_track.add_new_bytes(chunk))
120
+ await asyncio.sleep(0.001)
121
+
101
122
  async def aclose(self) -> None:
102
123
  """Cleanup resources"""
103
124
  await self._client.close()
@@ -1 +1 @@
1
- __version__ = "0.0.20"
1
+ __version__ = "0.0.22"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: videosdk-plugins-openai
3
- Version: 0.0.20
3
+ Version: 0.0.22
4
4
  Summary: VideoSDK Agent Framework plugin for OpenAI services
5
5
  Author: videosdk
6
6
  License-Expression: Apache-2.0
@@ -13,7 +13,7 @@ Classifier: Topic :: Multimedia :: Video
13
13
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
14
  Requires-Python: >=3.11
15
15
  Requires-Dist: openai[realtime]>=1.68.2
16
- Requires-Dist: videosdk-agents>=0.0.20
16
+ Requires-Dist: videosdk-agents>=0.0.22
17
17
  Description-Content-Type: text/markdown
18
18
 
19
19
  # VideoSDK OpenAI Plugin
@@ -0,0 +1,9 @@
1
+ videosdk/plugins/openai/__init__.py,sha256=1jbc4HOYxkLeruM9RAqmZYSBdnr74gnPHmCNMKXEPrg,259
2
+ videosdk/plugins/openai/llm.py,sha256=h6xuJmyjg6InL9tr5pKBGt_5bNMpJ4XqnO72OtmCJ0c,7122
3
+ videosdk/plugins/openai/realtime_api.py,sha256=WSzDWHcCQC8QsKLDmA5mm_oSN8UIHYMplesNliV5eUc,22611
4
+ videosdk/plugins/openai/stt.py,sha256=YZROX-BjTqtWiT6ouMZacLkMYbmao3emB-88ewN93jg,9492
5
+ videosdk/plugins/openai/tts.py,sha256=o5ktMUzjPkj64L5qqRaKPTWq7Na56TshMnLfU-sK36k,4417
6
+ videosdk/plugins/openai/version.py,sha256=NoiGDztYD4fsDDnfSPiSzRkknkNHhFUtKZj0mhQiTYM,22
7
+ videosdk_plugins_openai-0.0.22.dist-info/METADATA,sha256=9BJRuTdobykpCbIf5Gwr33z074lZjp-tCjdgBn5GUqg,827
8
+ videosdk_plugins_openai-0.0.22.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
+ videosdk_plugins_openai-0.0.22.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- videosdk/plugins/openai/__init__.py,sha256=1jbc4HOYxkLeruM9RAqmZYSBdnr74gnPHmCNMKXEPrg,259
2
- videosdk/plugins/openai/llm.py,sha256=h6xuJmyjg6InL9tr5pKBGt_5bNMpJ4XqnO72OtmCJ0c,7122
3
- videosdk/plugins/openai/realtime_api.py,sha256=B2RlEV_yK0R4K1dPTyhhPewoa9bzd43ytEfsLKaHUUQ,22554
4
- videosdk/plugins/openai/stt.py,sha256=YZROX-BjTqtWiT6ouMZacLkMYbmao3emB-88ewN93jg,9492
5
- videosdk/plugins/openai/tts.py,sha256=LDsYXuHBoN-8g1iYt7JV_vRWOJZvhUN8QZQj_q264rU,3635
6
- videosdk/plugins/openai/version.py,sha256=cw-wPso5400rXRCR6WsHwthEUW8-b_VMrztjcYwBGfQ,22
7
- videosdk_plugins_openai-0.0.20.dist-info/METADATA,sha256=Ja6dfVaBHdrxMgmE0Hughw1oSzdMnbNPSOUfBptoopQ,827
8
- videosdk_plugins_openai-0.0.20.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
- videosdk_plugins_openai-0.0.20.dist-info/RECORD,,