vision-agents-plugins-wizper 0.1.11__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vision-agents-plugins-wizper might be problematic. Click here for more details.

.gitignore CHANGED
@@ -84,3 +84,4 @@ stream-py/
84
84
  # Artifacts / assets
85
85
  *.pt
86
86
  *.kef
87
+ *.onnx
PKG-INFO CHANGED
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vision-agents-plugins-wizper
3
- Version: 0.1.11
3
+ Version: 0.1.12
4
4
  Summary: Wizper plugin for Vision Agents
5
5
  Project-URL: Documentation, https://visionagents.ai/
6
6
  Project-URL: Website, https://visionagents.ai/
@@ -4,32 +4,13 @@ Fal Wizper STT Plugin for Stream
4
4
  Provides real-time audio transcription and translation using fal-ai/wizper (Whisper v3).
5
5
  This plugin integrates with Stream's audio processing pipeline to provide high-quality
6
6
  speech-to-text capabilities.
7
-
8
- Example usage:
9
- from vision_agents.plugins import fal
10
-
11
- # For transcription
12
- stt = fal.STT(task="transcribe")
13
-
14
- # For translation to Portuguese
15
- stt = fal.STT(task="translate", target_language="pt")
16
-
17
- @stt.on("transcript")
18
- async def on_transcript(text: str, user: Any, metadata: dict):
19
- print(f"Transcript: {text}")
20
-
21
- @stt.on("error")
22
- async def on_error(error: str):
23
- print(f"Error: {error}")
24
7
  """
25
8
 
26
- import io
27
9
  import logging
28
10
  import os
29
11
  import tempfile
30
12
  from pathlib import Path
31
13
  from typing import TYPE_CHECKING, Optional
32
- import wave
33
14
 
34
15
  import fal_client
35
16
  from getstream.video.rtc.track_util import PcmData
@@ -76,27 +57,6 @@ class STT(stt.STT):
76
57
  self.target_language = target_language
77
58
  self._fal_client = client if client is not None else fal_client.AsyncClient()
78
59
 
79
- def _pcm_to_wav_bytes(self, pcm_data: PcmData) -> bytes:
80
- """
81
- Convert PCM data to WAV format bytes.
82
-
83
- Args:
84
- pcm_data: PCM audio data from Stream's audio pipeline
85
-
86
- Returns:
87
- WAV format audio data as bytes
88
- """
89
- wav_buffer = io.BytesIO()
90
-
91
- with wave.open(wav_buffer, "wb") as wav_file:
92
- wav_file.setnchannels(1) # Mono
93
- wav_file.setsampwidth(2) # 16-bit
94
- wav_file.setframerate(self.sample_rate)
95
- wav_file.writeframes(pcm_data.samples.tobytes())
96
-
97
- wav_buffer.seek(0)
98
- return wav_buffer.read()
99
-
100
60
  async def process_audio(
101
61
  self,
102
62
  pcm_data: PcmData,
@@ -122,8 +82,8 @@ class STT(stt.STT):
122
82
  "Sending speech audio to fal-ai/wizper",
123
83
  extra={"audio_bytes": pcm_data.samples.nbytes},
124
84
  )
125
- # Convert PCM to WAV format for upload
126
- wav_data = self._pcm_to_wav_bytes(pcm_data)
85
+ # Convert PCM to WAV format for upload using shared PcmData method
86
+ wav_data = pcm_data.to_wav_bytes()
127
87
 
128
88
  # Create temporary file for upload
129
89
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
@@ -151,7 +111,7 @@ class STT(stt.STT):
151
111
  )
152
112
  if "text" in result:
153
113
  text = result["text"].strip()
154
- if text:
114
+ if text and participant is not None:
155
115
  response_metadata = TranscriptResponse()
156
116
  self._emit_transcript_event(
157
117
  text, participant, response_metadata
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vision-agents-plugins-wizper
3
- Version: 0.1.11
3
+ Version: 0.1.12
4
4
  Summary: Wizper plugin for Vision Agents
5
5
  Project-URL: Documentation, https://visionagents.ai/
6
6
  Project-URL: Website, https://visionagents.ai/
@@ -0,0 +1,9 @@
1
+ ./.gitignore,sha256=ye7v72rmcYcz93U_u9IyYUvYJKEXGElBsTevPVyASo0,923
2
+ ./PKG-INFO,sha256=eBdcwmO3PZHGtalyBIPFycUERCl28ZBjYB97saPWLWw,505
3
+ ./README.md,sha256=7MDH68Ywzj2WKm3QAFCUvupBHxrTdjtAL0WqrqaCHFc,24
4
+ ./pyproject.toml,sha256=eSC8A7YqeCWd9_VfDiwOc9z3KPGzSPbf9IIqk_aWES0,959
5
+ ./vision_agents/plugins/wizper/__init__.py,sha256=aRLgDFc3zq4tNj3G9kmM4zJzSpO7hYqGujz3zTTPsMk,93
6
+ ./vision_agents/plugins/wizper/stt.py,sha256=d2XLQQufSL4qruSumff1gj2aA_dssbA0DUN4RZNNIwA,4778
7
+ vision_agents_plugins_wizper-0.1.12.dist-info/METADATA,sha256=eBdcwmO3PZHGtalyBIPFycUERCl28ZBjYB97saPWLWw,505
8
+ vision_agents_plugins_wizper-0.1.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
+ vision_agents_plugins_wizper-0.1.12.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- ./.gitignore,sha256=S6wPCu4rBDB_yyTYoXbMIR-pn4OPv6b3Ulnx1n5RWvo,916
2
- ./PKG-INFO,sha256=TwinEYIZ4okitRugRaXwoXs71LAPmO7kZSCCKW8U7l8,505
3
- ./README.md,sha256=7MDH68Ywzj2WKm3QAFCUvupBHxrTdjtAL0WqrqaCHFc,24
4
- ./pyproject.toml,sha256=eSC8A7YqeCWd9_VfDiwOc9z3KPGzSPbf9IIqk_aWES0,959
5
- ./vision_agents/plugins/wizper/__init__.py,sha256=aRLgDFc3zq4tNj3G9kmM4zJzSpO7hYqGujz3zTTPsMk,93
6
- ./vision_agents/plugins/wizper/stt.py,sha256=bHEGG8aeN9NBZjtiWhZ8GvK5aszmAebGbXU4pqYisa4,5819
7
- vision_agents_plugins_wizper-0.1.11.dist-info/METADATA,sha256=TwinEYIZ4okitRugRaXwoXs71LAPmO7kZSCCKW8U7l8,505
8
- vision_agents_plugins_wizper-0.1.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
- vision_agents_plugins_wizper-0.1.11.dist-info/RECORD,,