vision-agents-plugins-wizper 0.1.9__tar.gz → 0.1.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vision-agents-plugins-wizper might be problematic. Click here for more details.
- {vision_agents_plugins_wizper-0.1.9 → vision_agents_plugins_wizper-0.1.11}/PKG-INFO +1 -1
- {vision_agents_plugins_wizper-0.1.9 → vision_agents_plugins_wizper-0.1.11}/vision_agents/plugins/wizper/stt.py +34 -37
- {vision_agents_plugins_wizper-0.1.9 → vision_agents_plugins_wizper-0.1.11}/.gitignore +0 -0
- {vision_agents_plugins_wizper-0.1.9 → vision_agents_plugins_wizper-0.1.11}/README.md +0 -0
- {vision_agents_plugins_wizper-0.1.9 → vision_agents_plugins_wizper-0.1.11}/pyproject.toml +0 -0
- {vision_agents_plugins_wizper-0.1.9 → vision_agents_plugins_wizper-0.1.11}/vision_agents/plugins/wizper/__init__.py +0 -0
|
@@ -24,20 +24,21 @@ Example usage:
|
|
|
24
24
|
"""
|
|
25
25
|
|
|
26
26
|
import io
|
|
27
|
+
import logging
|
|
27
28
|
import os
|
|
28
29
|
import tempfile
|
|
29
|
-
import time
|
|
30
|
-
import logging
|
|
31
30
|
from pathlib import Path
|
|
32
|
-
from typing import
|
|
33
|
-
|
|
34
|
-
if TYPE_CHECKING:
|
|
35
|
-
from vision_agents.core.edge.types import Participant
|
|
31
|
+
from typing import TYPE_CHECKING, Optional
|
|
36
32
|
import wave
|
|
37
33
|
|
|
38
34
|
import fal_client
|
|
39
35
|
from getstream.video.rtc.track_util import PcmData
|
|
36
|
+
|
|
40
37
|
from vision_agents.core import stt
|
|
38
|
+
from vision_agents.core.stt import TranscriptResponse
|
|
39
|
+
|
|
40
|
+
if TYPE_CHECKING:
|
|
41
|
+
from vision_agents.core.edge.types import Participant
|
|
41
42
|
|
|
42
43
|
logger = logging.getLogger(__name__)
|
|
43
44
|
|
|
@@ -58,23 +59,21 @@ class STT(stt.STT):
|
|
|
58
59
|
def __init__(
|
|
59
60
|
self,
|
|
60
61
|
task: str = "transcribe",
|
|
61
|
-
target_language: str
|
|
62
|
-
sample_rate: int = 48000,
|
|
62
|
+
target_language: Optional[str] = None,
|
|
63
63
|
client: Optional[fal_client.AsyncClient] = None,
|
|
64
64
|
):
|
|
65
65
|
"""
|
|
66
|
-
Initialize
|
|
66
|
+
Initialize Wizper STT.
|
|
67
67
|
|
|
68
68
|
Args:
|
|
69
69
|
task: "transcribe" or "translate"
|
|
70
70
|
target_language: Target language code (e.g., "pt" for Portuguese)
|
|
71
|
-
|
|
71
|
+
client: Optional fal_client.AsyncClient instance for testing
|
|
72
72
|
"""
|
|
73
|
-
super().__init__(
|
|
73
|
+
super().__init__(provider_name="wizper")
|
|
74
74
|
self.task = task
|
|
75
|
+
self.sample_rate = 48000
|
|
75
76
|
self.target_language = target_language
|
|
76
|
-
self.last_activity_time = time.time()
|
|
77
|
-
self._is_closed = False
|
|
78
77
|
self._fal_client = client if client is not None else fal_client.AsyncClient()
|
|
79
78
|
|
|
80
79
|
def _pcm_to_wav_bytes(self, pcm_data: PcmData) -> bytes:
|
|
@@ -98,26 +97,25 @@ class STT(stt.STT):
|
|
|
98
97
|
wav_buffer.seek(0)
|
|
99
98
|
return wav_buffer.read()
|
|
100
99
|
|
|
101
|
-
async def
|
|
102
|
-
self,
|
|
103
|
-
|
|
100
|
+
async def process_audio(
|
|
101
|
+
self,
|
|
102
|
+
pcm_data: PcmData,
|
|
103
|
+
participant: Optional["Participant"] = None,
|
|
104
|
+
):
|
|
104
105
|
"""
|
|
105
|
-
Process
|
|
106
|
-
|
|
107
|
-
This method is typically called by VAD (Voice Activity Detection) systems
|
|
108
|
-
when speech segments are detected.
|
|
106
|
+
Process audio through fal-ai/wizper for transcription.
|
|
109
107
|
|
|
110
108
|
Args:
|
|
111
|
-
|
|
112
|
-
|
|
109
|
+
pcm_data: The PCM audio data to process
|
|
110
|
+
participant: Optional participant metadata
|
|
113
111
|
"""
|
|
114
|
-
if self.
|
|
115
|
-
logger.
|
|
116
|
-
return
|
|
112
|
+
if self.closed:
|
|
113
|
+
logger.warning("Wizper STT is closed, ignoring audio")
|
|
114
|
+
return
|
|
117
115
|
|
|
118
116
|
if pcm_data.samples.size == 0:
|
|
119
117
|
logger.debug("No audio data to process")
|
|
120
|
-
return
|
|
118
|
+
return
|
|
121
119
|
|
|
122
120
|
try:
|
|
123
121
|
logger.debug(
|
|
@@ -154,8 +152,9 @@ class STT(stt.STT):
|
|
|
154
152
|
if "text" in result:
|
|
155
153
|
text = result["text"].strip()
|
|
156
154
|
if text:
|
|
155
|
+
response_metadata = TranscriptResponse()
|
|
157
156
|
self._emit_transcript_event(
|
|
158
|
-
text,
|
|
157
|
+
text, participant, response_metadata
|
|
159
158
|
)
|
|
160
159
|
finally:
|
|
161
160
|
# Clean up temporary file
|
|
@@ -164,17 +163,15 @@ class STT(stt.STT):
|
|
|
164
163
|
except OSError:
|
|
165
164
|
pass
|
|
166
165
|
|
|
167
|
-
# Return None for asynchronous mode - events are emitted when they arrive
|
|
168
|
-
return None
|
|
169
|
-
|
|
170
166
|
except Exception as e:
|
|
171
|
-
logger.error(f"
|
|
172
|
-
self._emit_error_event(e, "
|
|
173
|
-
return None
|
|
167
|
+
logger.error(f"Wizper processing error: {str(e)}")
|
|
168
|
+
self._emit_error_event(e, "Wizper processing")
|
|
174
169
|
|
|
175
170
|
async def close(self):
|
|
176
|
-
"""Close the STT service and release any resources."""
|
|
177
|
-
if self.
|
|
171
|
+
"""Close the Wizper STT service and release any resources."""
|
|
172
|
+
if self.closed:
|
|
173
|
+
logger.debug("Wizper STT service already closed")
|
|
178
174
|
return
|
|
179
|
-
|
|
180
|
-
logger.info("
|
|
175
|
+
|
|
176
|
+
logger.info("Closing Wizper STT service")
|
|
177
|
+
await super().close()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|