vision-agents-plugins-wizper 0.1.11__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vision-agents-plugins-wizper might be problematic. Click here for more details.
- .gitignore +1 -0
- PKG-INFO +1 -1
- vision_agents/plugins/wizper/stt.py +3 -43
- {vision_agents_plugins_wizper-0.1.11.dist-info → vision_agents_plugins_wizper-0.1.12.dist-info}/METADATA +1 -1
- vision_agents_plugins_wizper-0.1.12.dist-info/RECORD +9 -0
- vision_agents_plugins_wizper-0.1.11.dist-info/RECORD +0 -9
- {vision_agents_plugins_wizper-0.1.11.dist-info → vision_agents_plugins_wizper-0.1.12.dist-info}/WHEEL +0 -0
.gitignore
CHANGED
PKG-INFO
CHANGED
|
@@ -4,32 +4,13 @@ Fal Wizper STT Plugin for Stream
|
|
|
4
4
|
Provides real-time audio transcription and translation using fal-ai/wizper (Whisper v3).
|
|
5
5
|
This plugin integrates with Stream's audio processing pipeline to provide high-quality
|
|
6
6
|
speech-to-text capabilities.
|
|
7
|
-
|
|
8
|
-
Example usage:
|
|
9
|
-
from vision_agents.plugins import fal
|
|
10
|
-
|
|
11
|
-
# For transcription
|
|
12
|
-
stt = fal.STT(task="transcribe")
|
|
13
|
-
|
|
14
|
-
# For translation to Portuguese
|
|
15
|
-
stt = fal.STT(task="translate", target_language="pt")
|
|
16
|
-
|
|
17
|
-
@stt.on("transcript")
|
|
18
|
-
async def on_transcript(text: str, user: Any, metadata: dict):
|
|
19
|
-
print(f"Transcript: {text}")
|
|
20
|
-
|
|
21
|
-
@stt.on("error")
|
|
22
|
-
async def on_error(error: str):
|
|
23
|
-
print(f"Error: {error}")
|
|
24
7
|
"""
|
|
25
8
|
|
|
26
|
-
import io
|
|
27
9
|
import logging
|
|
28
10
|
import os
|
|
29
11
|
import tempfile
|
|
30
12
|
from pathlib import Path
|
|
31
13
|
from typing import TYPE_CHECKING, Optional
|
|
32
|
-
import wave
|
|
33
14
|
|
|
34
15
|
import fal_client
|
|
35
16
|
from getstream.video.rtc.track_util import PcmData
|
|
@@ -76,27 +57,6 @@ class STT(stt.STT):
|
|
|
76
57
|
self.target_language = target_language
|
|
77
58
|
self._fal_client = client if client is not None else fal_client.AsyncClient()
|
|
78
59
|
|
|
79
|
-
def _pcm_to_wav_bytes(self, pcm_data: PcmData) -> bytes:
|
|
80
|
-
"""
|
|
81
|
-
Convert PCM data to WAV format bytes.
|
|
82
|
-
|
|
83
|
-
Args:
|
|
84
|
-
pcm_data: PCM audio data from Stream's audio pipeline
|
|
85
|
-
|
|
86
|
-
Returns:
|
|
87
|
-
WAV format audio data as bytes
|
|
88
|
-
"""
|
|
89
|
-
wav_buffer = io.BytesIO()
|
|
90
|
-
|
|
91
|
-
with wave.open(wav_buffer, "wb") as wav_file:
|
|
92
|
-
wav_file.setnchannels(1) # Mono
|
|
93
|
-
wav_file.setsampwidth(2) # 16-bit
|
|
94
|
-
wav_file.setframerate(self.sample_rate)
|
|
95
|
-
wav_file.writeframes(pcm_data.samples.tobytes())
|
|
96
|
-
|
|
97
|
-
wav_buffer.seek(0)
|
|
98
|
-
return wav_buffer.read()
|
|
99
|
-
|
|
100
60
|
async def process_audio(
|
|
101
61
|
self,
|
|
102
62
|
pcm_data: PcmData,
|
|
@@ -122,8 +82,8 @@ class STT(stt.STT):
|
|
|
122
82
|
"Sending speech audio to fal-ai/wizper",
|
|
123
83
|
extra={"audio_bytes": pcm_data.samples.nbytes},
|
|
124
84
|
)
|
|
125
|
-
# Convert PCM to WAV format for upload
|
|
126
|
-
wav_data =
|
|
85
|
+
# Convert PCM to WAV format for upload using shared PcmData method
|
|
86
|
+
wav_data = pcm_data.to_wav_bytes()
|
|
127
87
|
|
|
128
88
|
# Create temporary file for upload
|
|
129
89
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
|
@@ -151,7 +111,7 @@ class STT(stt.STT):
|
|
|
151
111
|
)
|
|
152
112
|
if "text" in result:
|
|
153
113
|
text = result["text"].strip()
|
|
154
|
-
if text:
|
|
114
|
+
if text and participant is not None:
|
|
155
115
|
response_metadata = TranscriptResponse()
|
|
156
116
|
self._emit_transcript_event(
|
|
157
117
|
text, participant, response_metadata
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
./.gitignore,sha256=ye7v72rmcYcz93U_u9IyYUvYJKEXGElBsTevPVyASo0,923
|
|
2
|
+
./PKG-INFO,sha256=eBdcwmO3PZHGtalyBIPFycUERCl28ZBjYB97saPWLWw,505
|
|
3
|
+
./README.md,sha256=7MDH68Ywzj2WKm3QAFCUvupBHxrTdjtAL0WqrqaCHFc,24
|
|
4
|
+
./pyproject.toml,sha256=eSC8A7YqeCWd9_VfDiwOc9z3KPGzSPbf9IIqk_aWES0,959
|
|
5
|
+
./vision_agents/plugins/wizper/__init__.py,sha256=aRLgDFc3zq4tNj3G9kmM4zJzSpO7hYqGujz3zTTPsMk,93
|
|
6
|
+
./vision_agents/plugins/wizper/stt.py,sha256=d2XLQQufSL4qruSumff1gj2aA_dssbA0DUN4RZNNIwA,4778
|
|
7
|
+
vision_agents_plugins_wizper-0.1.12.dist-info/METADATA,sha256=eBdcwmO3PZHGtalyBIPFycUERCl28ZBjYB97saPWLWw,505
|
|
8
|
+
vision_agents_plugins_wizper-0.1.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
9
|
+
vision_agents_plugins_wizper-0.1.12.dist-info/RECORD,,
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
./.gitignore,sha256=S6wPCu4rBDB_yyTYoXbMIR-pn4OPv6b3Ulnx1n5RWvo,916
|
|
2
|
-
./PKG-INFO,sha256=TwinEYIZ4okitRugRaXwoXs71LAPmO7kZSCCKW8U7l8,505
|
|
3
|
-
./README.md,sha256=7MDH68Ywzj2WKm3QAFCUvupBHxrTdjtAL0WqrqaCHFc,24
|
|
4
|
-
./pyproject.toml,sha256=eSC8A7YqeCWd9_VfDiwOc9z3KPGzSPbf9IIqk_aWES0,959
|
|
5
|
-
./vision_agents/plugins/wizper/__init__.py,sha256=aRLgDFc3zq4tNj3G9kmM4zJzSpO7hYqGujz3zTTPsMk,93
|
|
6
|
-
./vision_agents/plugins/wizper/stt.py,sha256=bHEGG8aeN9NBZjtiWhZ8GvK5aszmAebGbXU4pqYisa4,5819
|
|
7
|
-
vision_agents_plugins_wizper-0.1.11.dist-info/METADATA,sha256=TwinEYIZ4okitRugRaXwoXs71LAPmO7kZSCCKW8U7l8,505
|
|
8
|
-
vision_agents_plugins_wizper-0.1.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
9
|
-
vision_agents_plugins_wizper-0.1.11.dist-info/RECORD,,
|
|
File without changes
|