PyPI - vision-agents-plugins-wizper - Versions diffs - 0.1.9__tar.gz → 0.1.12__tar.gz - Mend

vision-agents-plugins-wizper 0.1.9tar.gz → 0.1.12tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of vision-agents-plugins-wizper might be problematic. Click here for more details.

Files changed (6) hide show

{vision_agents_plugins_wizper-0.1.9 → vision_agents_plugins_wizper-0.1.12}/.gitignore RENAMED Viewed

@@ -84,3 +84,4 @@ stream-py/
 # Artifacts / assets
 *.pt
 *.kef
+*.onnx

{vision_agents_plugins_wizper-0.1.9 → vision_agents_plugins_wizper-0.1.12}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: vision-agents-plugins-wizper
-Version: 0.1.9
+Version: 0.1.12
 Summary: Wizper plugin for Vision Agents
 Project-URL: Documentation, https://visionagents.ai/
 Project-URL: Website, https://visionagents.ai/

{vision_agents_plugins_wizper-0.1.9 → vision_agents_plugins_wizper-0.1.12}/vision_agents/plugins/wizper/stt.py RENAMED Viewed

@@ -4,40 +4,22 @@ Fal Wizper STT Plugin for Stream
 Provides real-time audio transcription and translation using fal-ai/wizper (Whisper v3).
 This plugin integrates with Stream's audio processing pipeline to provide high-quality
 speech-to-text capabilities.
-Example usage:
-    from vision_agents.plugins import fal
-    # For transcription
-    stt = fal.STT(task="transcribe")
-    # For translation to Portuguese
-    stt = fal.STT(task="translate", target_language="pt")
-    @stt.on("transcript")
-    async def on_transcript(text: str, user: Any, metadata: dict):
-        print(f"Transcript: {text}")
-    @stt.on("error")
-    async def on_error(error: str):
-        print(f"Error: {error}")
 """
-import io
+import logging
 import os
 import tempfile
-import time
-import logging
 from pathlib import Path
-from typing import Any, Dict, Optional, List, Tuple, Union, TYPE_CHECKING
-if TYPE_CHECKING:
-    from vision_agents.core.edge.types import Participant
-import wave
+from typing import TYPE_CHECKING, Optional
 import fal_client
 from getstream.video.rtc.track_util import PcmData
 from vision_agents.core import stt
+from vision_agents.core.stt import TranscriptResponse
+if TYPE_CHECKING:
+    from vision_agents.core.edge.types import Participant
 logger = logging.getLogger(__name__)
@@ -58,74 +40,50 @@ class STT(stt.STT):
     def __init__(
         self,
         task: str = "transcribe",
-        target_language: str | None = None,
-        sample_rate: int = 48000,
+        target_language: Optional[str] = None,
         client: Optional[fal_client.AsyncClient] = None,
     ):
         """
-        Initialize FalWizperSTT.
+        Initialize Wizper STT.
         Args:
             task: "transcribe" or "translate"
             target_language: Target language code (e.g., "pt" for Portuguese)
-            sample_rate: Sample rate of the audio in Hz.
+            client: Optional fal_client.AsyncClient instance for testing
         """
-        super().__init__(sample_rate=sample_rate)
+        super().__init__(provider_name="wizper")
         self.task = task
+        self.sample_rate = 48000
         self.target_language = target_language
-        self.last_activity_time = time.time()
-        self._is_closed = False
         self._fal_client = client if client is not None else fal_client.AsyncClient()
-    def _pcm_to_wav_bytes(self, pcm_data: PcmData) -> bytes:
-        """
-        Convert PCM data to WAV format bytes.
-        Args:
-            pcm_data: PCM audio data from Stream's audio pipeline
-        Returns:
-            WAV format audio data as bytes
-        """
-        wav_buffer = io.BytesIO()
-        with wave.open(wav_buffer, "wb") as wav_file:
-            wav_file.setnchannels(1)  # Mono
-            wav_file.setsampwidth(2)  # 16-bit
-            wav_file.setframerate(self.sample_rate)
-            wav_file.writeframes(pcm_data.samples.tobytes())
-        wav_buffer.seek(0)
-        return wav_buffer.read()
-    async def _process_audio_impl(
-        self, pcm_data: PcmData, user_metadata: Optional[Union[Dict[str, Any], "Participant"]] = None
-    ) -> Optional[List[Tuple[bool, str, Dict[str, Any]]]]:
+    async def process_audio(
+        self,
+        pcm_data: PcmData,
+        participant: Optional["Participant"] = None,
+    ):
         """
-        Process accumulated speech audio through fal-ai/wizper.
-        This method is typically called by VAD (Voice Activity Detection) systems
-        when speech segments are detected.
+        Process audio through fal-ai/wizper for transcription.
         Args:
-            speech_audio: Accumulated speech audio as numpy array
-            user: User metadata from the Stream call
+            pcm_data: The PCM audio data to process
+            participant: Optional participant metadata
         """
-        if self._is_closed:
-            logger.debug("connection is closed, ignoring audio")
-            return None
+        if self.closed:
+            logger.warning("Wizper STT is closed, ignoring audio")
+            return
         if pcm_data.samples.size == 0:
             logger.debug("No audio data to process")
-            return None
+            return
         try:
             logger.debug(
                 "Sending speech audio to fal-ai/wizper",
                 extra={"audio_bytes": pcm_data.samples.nbytes},
             )
-            # Convert PCM to WAV format for upload
-            wav_data = self._pcm_to_wav_bytes(pcm_data)
+            # Convert PCM to WAV format for upload using shared PcmData method
+            wav_data = pcm_data.to_wav_bytes()
             # Create temporary file for upload
             with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
@@ -153,9 +111,10 @@ class STT(stt.STT):
                 )
                 if "text" in result:
                     text = result["text"].strip()
-                    if text:
+                    if text and participant is not None:
+                        response_metadata = TranscriptResponse()
                         self._emit_transcript_event(
-                            text, user_metadata, {"chunks": result.get("chunks", [])}
+                            text, participant, response_metadata
                         )
             finally:
                 # Clean up temporary file
@@ -164,17 +123,15 @@ class STT(stt.STT):
                 except OSError:
                     pass
-            # Return None for asynchronous mode - events are emitted when they arrive
-            return None
         except Exception as e:
-            logger.error(f"FalWizper processing error: {str(e)}")
-            self._emit_error_event(e, "FalWizper processing")
-            return None
+            logger.error(f"Wizper processing error: {str(e)}")
+            self._emit_error_event(e, "Wizper processing")
     async def close(self):
-        """Close the STT service and release any resources."""
-        if self._is_closed:
+        """Close the Wizper STT service and release any resources."""
+        if self.closed:
+            logger.debug("Wizper STT service already closed")
             return
-        self._is_closed = True
-        logger.info("FalWizperSTT closed")
+        logger.info("Closing Wizper STT service")
+        await super().close()

{vision_agents_plugins_wizper-0.1.9 → vision_agents_plugins_wizper-0.1.12}/README.md RENAMED Viewed

File without changes

{vision_agents_plugins_wizper-0.1.9 → vision_agents_plugins_wizper-0.1.12}/pyproject.toml RENAMED Viewed

File without changes

{vision_agents_plugins_wizper-0.1.9 → vision_agents_plugins_wizper-0.1.12}/vision_agents/plugins/wizper/__init__.py RENAMED Viewed

File without changes

vision-agents-plugins-wizper 0.1.9__tar.gz → 0.1.12__tar.gz

Potentially problematic release.

vision-agents-plugins-wizper 0.1.9tar.gz → 0.1.12tar.gz