vision-agents-plugins-wizper 0.2.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vision-agents-plugins-wizper might be problematic. Click here for more details.

@@ -0,0 +1,90 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .cursor/*
7
+ # Distribution / packaging
8
+ .Python
9
+ build/
10
+ dist/
11
+ downloads/
12
+ develop-eggs/
13
+ eggs/
14
+ .eggs/
15
+ lib64/
16
+ parts/
17
+ sdist/
18
+ var/
19
+ wheels/
20
+ share/python-wheels/
21
+ pip-wheel-metadata/
22
+ MANIFEST
23
+ *.egg-info/
24
+ *.egg
25
+
26
+ # Installer logs
27
+ pip-log.txt
28
+ pip-delete-this-directory.txt
29
+
30
+ # Unit test / coverage reports
31
+ htmlcov/
32
+ .tox/
33
+ .nox/
34
+ .coverage
35
+ .coverage.*
36
+ .cache
37
+ coverage.xml
38
+ nosetests.xml
39
+ *.cover
40
+ *.py,cover
41
+ .hypothesis/
42
+ .pytest_cache/
43
+
44
+ # Type checker / lint caches
45
+ .mypy_cache/
46
+ .dmypy.json
47
+ dmypy.json
48
+ .pytype/
49
+ .pyre/
50
+ .ruff_cache/
51
+
52
+ # Environments
53
+ .venv
54
+ env/
55
+ venv/
56
+ ENV/
57
+ env.bak/
58
+ venv.bak/
59
+ .env
60
+ .env.local
61
+ .env.*.local
62
+ .env.bak
63
+ pyvenv.cfg
64
+ .python-version
65
+
66
+ # Editors / IDEs
67
+ .vscode/
68
+ .idea/
69
+
70
+ # Jupyter Notebook
71
+ .ipynb_checkpoints/
72
+
73
+ # OS / Misc
74
+ .DS_Store
75
+ *.log
76
+
77
+ # Tooling & repo-specific
78
+ pyrightconfig.json
79
+ shell.nix
80
+ bin/*
81
+ lib/*
82
+ stream-py/
83
+
84
+ # Artifacts / assets
85
+ *.pt
86
+ *.kef
87
+ *.onnx
88
+ profile.html
89
+
90
+ /opencode.json
@@ -0,0 +1,15 @@
1
+ Metadata-Version: 2.4
2
+ Name: vision-agents-plugins-wizper
3
+ Version: 0.2.10
4
+ Summary: Wizper plugin for Vision Agents
5
+ Project-URL: Documentation, https://visionagents.ai/
6
+ Project-URL: Website, https://visionagents.ai/
7
+ Project-URL: Source, https://github.com/GetStream/Vision-Agents
8
+ License-Expression: MIT
9
+ Keywords: AI,agents,fal,turn detection,voice agents
10
+ Requires-Python: >=3.10
11
+ Requires-Dist: fal-client>=0.7.0
12
+ Requires-Dist: vision-agents
13
+ Description-Content-Type: text/markdown
14
+
15
+ Wizper for vision agents
@@ -0,0 +1 @@
1
+ Wizper for vision agents
@@ -0,0 +1,41 @@
1
+ [build-system]
2
+ requires = ["hatchling", "hatch-vcs"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "vision-agents-plugins-wizper"
7
+ dynamic = ["version"]
8
+ description = "Wizper plugin for Vision Agents"
9
+ readme = "README.md"
10
+ keywords = ["fal", "turn detection", "AI", "voice agents", "agents"]
11
+ requires-python = ">=3.10"
12
+ license = "MIT"
13
+ dependencies = [
14
+ "vision-agents",
15
+ "fal-client>=0.7.0",
16
+ ]
17
+
18
+ [project.urls]
19
+ Documentation = "https://visionagents.ai/"
20
+ Website = "https://visionagents.ai/"
21
+ Source = "https://github.com/GetStream/Vision-Agents"
22
+
23
+ [tool.hatch.version]
24
+ source = "vcs"
25
+ raw-options = { root = "..", search_parent_directories = true, fallback_version = "0.0.0" }
26
+
27
+ [tool.hatch.build.targets.wheel]
28
+ packages = ["."]
29
+
30
+ [tool.hatch.build.targets.sdist]
31
+ include = ["/vision_agents"]
32
+
33
+ [tool.uv.sources]
34
+ vision-agents = { workspace = true }
35
+
36
+ [dependency-groups]
37
+ dev = [
38
+ "pytest>=8.4.1",
39
+ "pytest-asyncio>=1.0.0",
40
+ "numpy>=2.2.6,<2.3",
41
+ ]
@@ -0,0 +1,5 @@
1
+ from .stt import STT
2
+
3
+ # Re-export under the new namespace for convenience
4
+
5
+ __all__ = ["STT"]
@@ -0,0 +1,154 @@
1
+ """
2
+ Fal Wizper STT Plugin for Stream
3
+
4
+ Provides real-time audio transcription and translation using fal-ai/wizper (Whisper v3).
5
+ This plugin integrates with Stream's audio processing pipeline to provide high-quality
6
+ speech-to-text capabilities.
7
+ """
8
+
9
+ import aiofiles
10
+ import asyncio
11
+ import logging
12
+ import os
13
+ import tempfile
14
+ import time
15
+ from pathlib import Path
16
+ from typing import TYPE_CHECKING, Optional
17
+
18
+ import fal_client
19
+ from getstream.video.rtc.track_util import PcmData
20
+
21
+ from vision_agents.core import stt
22
+ from vision_agents.core.stt import TranscriptResponse
23
+
24
+ if TYPE_CHECKING:
25
+ from vision_agents.core.edge.types import Participant
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ class STT(stt.STT):
31
+ """
32
+ Audio transcription and translation using fal-ai/wizper (Whisper v3).
33
+
34
+ This plugin provides real-time speech-to-text capabilities using the fal-ai/wizper
35
+ service, which is based on OpenAI's Whisper v3 model. It supports both transcription
36
+ and translation tasks.
37
+
38
+ Attributes:
39
+ task: The task type - either "transcribe" or "translate"
40
+ target_language: Target language code for translation (e.g., "pt" for Portuguese)
41
+ """
42
+
43
+ def __init__(
44
+ self,
45
+ task: str = "transcribe",
46
+ target_language: Optional[str] = None,
47
+ client: Optional[fal_client.AsyncClient] = None,
48
+ ):
49
+ """
50
+ Initialize Wizper STT.
51
+
52
+ Args:
53
+ task: "transcribe" or "translate"
54
+ target_language: Target language code (e.g., "pt" for Portuguese)
55
+ client: Optional fal_client.AsyncClient instance for testing
56
+ """
57
+ super().__init__(provider_name="wizper")
58
+ self.task = task
59
+ self.sample_rate = 48000
60
+ self.target_language = target_language
61
+ self._fal_client = client if client is not None else fal_client.AsyncClient()
62
+
63
+ async def process_audio(
64
+ self,
65
+ pcm_data: PcmData,
66
+ participant: Optional["Participant"] = None,
67
+ ):
68
+ """
69
+ Process audio through fal-ai/wizper for transcription.
70
+
71
+ Args:
72
+ pcm_data: The PCM audio data to process
73
+ participant: Optional participant metadata
74
+ """
75
+ if self.closed:
76
+ logger.warning("Wizper STT is closed, ignoring audio")
77
+ return
78
+
79
+ if pcm_data.samples.size == 0:
80
+ logger.debug("No audio data to process")
81
+ return
82
+
83
+ try:
84
+ start_time = time.perf_counter()
85
+ logger.debug(
86
+ "Sending speech audio to fal-ai/wizper",
87
+ extra={"audio_bytes": pcm_data.samples.nbytes},
88
+ )
89
+ # Convert PCM to WAV format for upload using shared PcmData method
90
+ wav_data = pcm_data.to_wav_bytes()
91
+
92
+ # Create temporary file for upload (async to avoid blocking)
93
+ temp_file_path = os.path.join(
94
+ tempfile.gettempdir(), f"wizper_{os.getpid()}_{id(pcm_data)}.wav"
95
+ )
96
+ async with aiofiles.open(temp_file_path, "wb") as f:
97
+ await f.write(wav_data)
98
+
99
+ try:
100
+ input_params = {
101
+ "task": "transcribe", # TODO: make this dynamic, currently there's a bug in the fal-ai/wizper service where it only works with "transcribe"
102
+ "chunk_level": "segment",
103
+ "version": "3",
104
+ }
105
+ # Add language for translation
106
+ if self.target_language is not None:
107
+ input_params["language"] = self.target_language
108
+
109
+ # Upload file and get URL
110
+ audio_url = await self._fal_client.upload_file(Path(temp_file_path))
111
+ input_params["audio_url"] = audio_url
112
+
113
+ # Use regular subscribe since streaming isn't supported
114
+ result = await self._fal_client.subscribe(
115
+ "fal-ai/wizper", arguments=input_params
116
+ )
117
+ if "text" in result:
118
+ text = result["text"].strip()
119
+ if text:
120
+ # Create a default participant if none provided
121
+ if participant is None:
122
+ from vision_agents.core.edge.types import Participant
123
+
124
+ participant = Participant(
125
+ original=None, user_id="test-user"
126
+ )
127
+
128
+ processing_time_ms = (time.perf_counter() - start_time) * 1000
129
+ response_metadata = TranscriptResponse(
130
+ processing_time_ms=processing_time_ms,
131
+ model_name="wizper-v3",
132
+ )
133
+ self._emit_transcript_event(
134
+ text, participant, response_metadata
135
+ )
136
+ finally:
137
+ # Clean up temporary file (async to avoid blocking)
138
+ try:
139
+ await asyncio.to_thread(os.unlink, temp_file_path)
140
+ except OSError:
141
+ pass
142
+
143
+ except Exception as e:
144
+ logger.error(f"Wizper processing error: {str(e)}")
145
+ self._emit_error_event(e, "Wizper processing")
146
+
147
+ async def close(self):
148
+ """Close the Wizper STT service and release any resources."""
149
+ if self.closed:
150
+ logger.debug("Wizper STT service already closed")
151
+ return
152
+
153
+ logger.info("Closing Wizper STT service")
154
+ await super().close()