PyPI - videosdk-plugins-lmnt - Versions diffs - 0.0.26__py3-none-any.whl → 0.0.28__py3-none-any.whl - Mend

videosdk-plugins-lmnt 0.0.26py3-none-any.whl → 0.0.28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of videosdk-plugins-lmnt might be problematic. Click here for more details.

Files changed (6) hide show

videosdk/plugins/lmnt/tts.py CHANGED Viewed

@@ -5,7 +5,7 @@ import httpx
 import os
 import asyncio
-from videosdk.agents import TTS
+from videosdk.agents import TTS, segment_text
 LMNT_API_BASE_URL = "https://api.lmnt.com"
 LMNT_SAMPLE_RATE = 24000
@@ -14,11 +14,11 @@ LMNT_CHANNELS = 1
 DEFAULT_MODEL = "blizzard"
 DEFAULT_VOICE = "ava"
 DEFAULT_LANGUAGE = "auto"
-DEFAULT_FORMAT = "wav"
+DEFAULT_FORMAT = "wav"
 _LanguageCode = Union[
-    Literal["auto", "de", "en", "es", "fr", "hi", "id", "it", "ja",
-            "ko", "nl", "pl", "pt", "ru", "sv", "th", "tr", "uk", "vi", "zh"],
+    Literal["auto", "de", "en", "es", "fr", "hi", "id", "it", "ja",
+            "ko", "nl", "pl", "pt", "ru", "sv", "th", "tr", "uk", "vi", "zh"],
     str
 ]
 _FormatType = Union[Literal["aac", "mp3", "mulaw", "raw", "wav"], str]
@@ -41,7 +41,7 @@ class LMNTTTS(TTS):
         base_url: str = LMNT_API_BASE_URL,
     ) -> None:
         super().__init__(sample_rate=sample_rate, num_channels=LMNT_CHANNELS)
         self.voice = voice
         self.model = model
         self.language = language
@@ -54,16 +54,18 @@ class LMNTTTS(TTS):
         self.audio_track = None
         self.loop = None
         self._first_chunk_sent = False
+        self._interrupted = False
         self.api_key = api_key or os.getenv("LMNT_API_KEY")
         if not self.api_key:
             raise ValueError(
                 "LMNT API key must be provided either through api_key parameter "
                 "or LMNT_API_KEY environment variable"
             )
         self._client = httpx.AsyncClient(
-            timeout=httpx.Timeout(connect=15.0, read=30.0, write=5.0, pool=5.0),
+            timeout=httpx.Timeout(connect=15.0, read=30.0,
+                                  write=5.0, pool=5.0),
             follow_redirects=True,
             limits=httpx.Limits(
                 max_connections=50,
@@ -71,128 +73,142 @@ class LMNTTTS(TTS):
                 keepalive_expiry=120,
             ),
         )
     def reset_first_audio_tracking(self) -> None:
         """Reset the first audio tracking state for next TTS task"""
         self._first_chunk_sent = False
     async def synthesize(
         self,
         text: AsyncIterator[str] | str,
         voice_id: Optional[str] = None,
-        **kwargs: Any
+        **kwargs: Any,
     ) -> None:
         """
         Convert text to speech using LMNT's TTS API and stream to audio track
         Args:
             text: Text to convert to speech
             voice_id: Optional voice override (uses voice from __init__ if not provided)
             **kwargs: Additional provider-specific arguments
         """
         try:
-            if isinstance(text, AsyncIterator):
-                full_text = ""
-                async for chunk in text:
-                    full_text += chunk
-            else:
-                full_text = text
             if not self.audio_track or not self.loop:
                 self.emit("error", "Audio track or event loop not set")
                 return
-            target_voice = voice_id or self.voice
-            payload = {
-                "voice": target_voice,
-                "text": full_text,
-                "model": kwargs.get("model", self.model),
-                "language": kwargs.get("language", self.language),
-                "format": kwargs.get("format", self.format),
-                "sample_rate": kwargs.get("sample_rate", self.output_sample_rate),
-                "temperature": kwargs.get("temperature", self.temperature),
-                "top_p": kwargs.get("top_p", self.top_p),
-            }
-            seed = kwargs.get("seed", self.seed)
-            if seed is not None:
-                payload["seed"] = seed
-            headers = {
-                "X-API-Key": self.api_key,
-                "Content-Type": "application/json",
-            }
-            url = f"{self.base_url}/v1/ai/speech/bytes"
-            async with self._client.stream(
-                "POST",
-                url,
-                headers=headers,
-                json=payload
-            ) as response:
-                if response.status_code == 400:
-                    error_data = await response.aread()
-                    try:
-                        import json
-                        error_json = json.loads(error_data.decode())
-                        error_msg = error_json.get("error", "Bad request")
-                    except:
-                        error_msg = "Bad request"
-                    self.emit("error", f"LMNT API error: {error_msg}")
-                    return
-                elif response.status_code == 401:
-                    self.emit("error", "LMNT API authentication failed. Please check your API key.")
-                    return
-                elif response.status_code != 200:
-                    self.emit("error", f"LMNT API error: HTTP {response.status_code}")
-                    return
-                header_processed = False
-                accumulated_data = b""
-                async for chunk in response.aiter_bytes():
-                    if chunk:
-                        accumulated_data += chunk
-                        if not header_processed and len(accumulated_data) >= 44:
-                            if accumulated_data.startswith(b'RIFF'):
-                                data_pos = accumulated_data.find(b'data')
-                                if data_pos != -1:
-                                    accumulated_data = accumulated_data[data_pos + 8:]
-                            header_processed = True
-                        if header_processed:
-                            chunk_size = int(self.output_sample_rate * LMNT_CHANNELS * 2 * 20 / 1000)  # 20ms chunks
-                            while len(accumulated_data) >= chunk_size:
-                                audio_chunk = accumulated_data[:chunk_size]
-                                accumulated_data = accumulated_data[chunk_size:]
-                                if not self._first_chunk_sent and self._first_audio_callback:
-                                    self._first_chunk_sent = True
-                                    await self._first_audio_callback()
-                                self.loop.create_task(self.audio_track.add_new_bytes(audio_chunk))
-                                await asyncio.sleep(0.01)
-                if accumulated_data and header_processed:
-                    chunk_size = int(self.output_sample_rate * LMNT_CHANNELS * 2 * 20 / 1000)
-                    if len(accumulated_data) < chunk_size:
-                        accumulated_data += b'\x00' * (chunk_size - len(accumulated_data))
-                    if not self._first_chunk_sent and self._first_audio_callback:
-                        self._first_chunk_sent = True
-                        await self._first_audio_callback()
-                    self.loop.create_task(self.audio_track.add_new_bytes(accumulated_data))
-        except httpx.HTTPError as e:
-            self.emit("error", f"HTTP error occurred: {str(e)}")
+            self._interrupted = False
+            if isinstance(text, AsyncIterator):
+                async for segment in segment_text(text):
+                    if self._interrupted:
+                        break
+                    await self._synthesize_segment(segment, voice_id, **kwargs)
+            else:
+                if not self._interrupted:
+                    await self._synthesize_segment(text, voice_id, **kwargs)
         except Exception as e:
             self.emit("error", f"TTS synthesis failed: {str(e)}")
+    async def _synthesize_segment(self, text: str, voice_id: Optional[str] = None, **kwargs: Any) -> None:
+        """Synthesize a single text segment"""
+        if not text.strip() or self._interrupted:
+            return
+        target_voice = voice_id or self.voice
+        payload = {
+            "voice": target_voice,
+            "text": text,
+            "model": kwargs.get("model", self.model),
+            "language": kwargs.get("language", self.language),
+            "format": kwargs.get("format", self.format),
+            "sample_rate": kwargs.get("sample_rate", self.output_sample_rate),
+            "temperature": kwargs.get("temperature", self.temperature),
+            "top_p": kwargs.get("top_p", self.top_p),
+        }
+        seed = kwargs.get("seed", self.seed)
+        if seed is not None:
+            payload["seed"] = seed
+        headers = {
+            "X-API-Key": self.api_key,
+            "Content-Type": "application/json",
+        }
+        url = f"{self.base_url}/v1/ai/speech/bytes"
+        async with self._client.stream(
+            "POST",
+            url,
+            headers=headers,
+            json=payload
+        ) as response:
+            if response.status_code == 400:
+                error_data = await response.aread()
+                try:
+                    import json
+                    error_json = json.loads(error_data.decode())
+                    error_msg = error_json.get("error", "Bad request")
+                except:
+                    error_msg = "Bad request"
+                self.emit("error", f"LMNT API error: {error_msg}")
+                return
+            elif response.status_code == 401:
+                self.emit(
+                    "error", "LMNT API authentication failed. Please check your API key.")
+                return
+            elif response.status_code != 200:
+                self.emit(
+                    "error", f"LMNT API error: HTTP {response.status_code}")
+                return
+            header_processed = False
+            accumulated_data = b""
+            async for chunk in response.aiter_bytes():
+                if self._interrupted:
+                    break
+                if chunk:
+                    accumulated_data += chunk
+                    if not header_processed and len(accumulated_data) >= 44:
+                        if accumulated_data.startswith(b'RIFF'):
+                            data_pos = accumulated_data.find(b'data')
+                            if data_pos != -1:
+                                accumulated_data = accumulated_data[data_pos + 8:]
+                        header_processed = True
+                    if header_processed:
+                        chunk_size = int(
+                            self.output_sample_rate * LMNT_CHANNELS * 2 * 20 / 1000)  # 20ms chunks
+                        while len(accumulated_data) >= chunk_size:
+                            audio_chunk = accumulated_data[:chunk_size]
+                            accumulated_data = accumulated_data[chunk_size:]
+                            if not self._first_chunk_sent and self._first_audio_callback:
+                                self._first_chunk_sent = True
+                                await self._first_audio_callback()
+                            self.loop.create_task(
+                                self.audio_track.add_new_bytes(audio_chunk))
+                            await asyncio.sleep(0.01)
+            if accumulated_data and header_processed:
+                chunk_size = int(self.output_sample_rate *
+                                 LMNT_CHANNELS * 2 * 20 / 1000)
+                if len(accumulated_data) < chunk_size:
+                    accumulated_data += b'\x00' * \
+                        (chunk_size - len(accumulated_data))
+                if not self._first_chunk_sent and self._first_audio_callback:
+                    self._first_chunk_sent = True
+                    await self._first_audio_callback()
+                self.loop.create_task(
+                    self.audio_track.add_new_bytes(accumulated_data))
     async def aclose(self) -> None:
         """Cleanup resources"""
@@ -201,5 +217,6 @@ class LMNTTTS(TTS):
     async def interrupt(self) -> None:
         """Interrupt the TTS process"""
+        self._interrupted = True
         if self.audio_track:
-            self.audio_track.interrupt()
+            self.audio_track.interrupt()

videosdk/plugins/lmnt/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.0.26"
1	+ __version__ = "0.0.28"

{videosdk_plugins_lmnt-0.0.26.dist-info → videosdk_plugins_lmnt-0.0.28.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: videosdk-plugins-lmnt
-Version: 0.0.26
+Version: 0.0.28
 Summary: VideoSDK Agent Framework plugin for LMNT AI Text-to-Speech services
 Author: videosdk
 License-Expression: Apache-2.0
@@ -13,7 +13,7 @@ Classifier: Topic :: Multimedia :: Video
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.11
 Requires-Dist: httpx>=0.24.0
-Requires-Dist: videosdk-agents>=0.0.26
+Requires-Dist: videosdk-agents>=0.0.28
 Description-Content-Type: text/markdown
 # VideoSDK LMNT AI Plugin

videosdk_plugins_lmnt-0.0.28.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,6 @@
+videosdk/plugins/lmnt/__init__.py,sha256=JI72LJNs0cCg3S1lweyO-yiBLnViM6Dfp4zPvYJkDJQ,56
+videosdk/plugins/lmnt/tts.py,sha256=iuz_zEKkeLO6ewPfm2Nbh8t4h5cIDwEZK7_9dd4WiXA,8021
+videosdk/plugins/lmnt/version.py,sha256=OxG64Q6SDQQGNb5ggPOgDkHI0rY-RjCF92VCMUiyhOQ,23
+videosdk_plugins_lmnt-0.0.28.dist-info/METADATA,sha256=FcKNOMmfghsiCepM6biWpbAZ6cg86jH-fkCdUxA-hlQ,806
+videosdk_plugins_lmnt-0.0.28.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+videosdk_plugins_lmnt-0.0.28.dist-info/RECORD,,

videosdk_plugins_lmnt-0.0.26.dist-info/RECORD DELETED Viewed

@@ -1,6 +0,0 @@
-videosdk/plugins/lmnt/__init__.py,sha256=JI72LJNs0cCg3S1lweyO-yiBLnViM6Dfp4zPvYJkDJQ,56
-videosdk/plugins/lmnt/tts.py,sha256=XFzyiMVp2KyLUidxM_k-JJXF2pcWrxnbxYKgzOCXxPw,7881
-videosdk/plugins/lmnt/version.py,sha256=cqqfmAmRPThAE9eZk4wNgmAMHbgvSY3O2CYorausl7s,23
-videosdk_plugins_lmnt-0.0.26.dist-info/METADATA,sha256=rFTfUpevYBlqA7IKEPrB5fuBTvMyMWFHuU2XLK4y4wY,806
-videosdk_plugins_lmnt-0.0.26.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-videosdk_plugins_lmnt-0.0.26.dist-info/RECORD,,

{videosdk_plugins_lmnt-0.0.26.dist-info → videosdk_plugins_lmnt-0.0.28.dist-info}/WHEEL RENAMED Viewed

File without changes

videosdk-plugins-lmnt 0.0.26__py3-none-any.whl → 0.0.28__py3-none-any.whl

Potentially problematic release.

videosdk-plugins-lmnt 0.0.26py3-none-any.whl → 0.0.28py3-none-any.whl