vision-agents-plugins-local 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
.gitignore ADDED
@@ -0,0 +1,98 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .cursor/*
7
+ # Distribution / packaging
8
+ .Python
9
+ build/
10
+ dist/
11
+ downloads/
12
+ develop-eggs/
13
+ eggs/
14
+ .eggs/
15
+ lib64/
16
+ parts/
17
+ sdist/
18
+ var/
19
+ wheels/
20
+ share/python-wheels/
21
+ pip-wheel-metadata/
22
+ MANIFEST
23
+ *.egg-info/
24
+ *.egg
25
+
26
+ # Installer logs
27
+ pip-log.txt
28
+ pip-delete-this-directory.txt
29
+
30
+ # Unit test / coverage reports
31
+ htmlcov/
32
+ .tox/
33
+ .nox/
34
+ .coverage
35
+ .coverage.*
36
+ .cache
37
+ coverage.xml
38
+ nosetests.xml
39
+ *.cover
40
+ *.py,cover
41
+ .hypothesis/
42
+ .pytest_cache/
43
+
44
+ # Type checker / lint caches
45
+ .mypy_cache/
46
+ .dmypy.json
47
+ dmypy.json
48
+ .pytype/
49
+ .pyre/
50
+ .ruff_cache/
51
+
52
+ # Environments
53
+ .venv
54
+ env/
55
+ venv/
56
+ ENV/
57
+ env.bak/
58
+ venv.bak/
59
+ .env
60
+ .env.local
61
+ .env.*.local
62
+ .env.bak
63
+ pyvenv.cfg
64
+ .python-version
65
+
66
+ # Editors / IDEs
67
+ .vscode/
68
+ .idea/
69
+
70
+ # Jupyter Notebook
71
+ .ipynb_checkpoints/
72
+
73
+ # OS / Misc
74
+ .DS_Store
75
+ *.log
76
+
77
+ # Tooling & repo-specific
78
+ pyrightconfig.json
79
+ shell.nix
80
+ bin/*
81
+ lib/*
82
+ stream-py/
83
+
84
+ # Example lock files (regenerated by uv sync)
85
+ examples/*/uv.lock
86
+ plugins/*/example/uv.lock
87
+
88
+ # Artifacts / assets
89
+ *.pt
90
+ *.kef
91
+ *.onnx
92
+ profile.html
93
+
94
+ /opencode.json
95
+ .ralph-tui/
96
+ .claude/
97
+
98
+ .uv-cache/
PKG-INFO ADDED
@@ -0,0 +1,14 @@
1
+ Metadata-Version: 2.4
2
+ Name: vision-agents-plugins-local
3
+ Version: 0.4.4
4
+ Summary: Local audio & video integration for Vision Agents
5
+ Project-URL: Documentation, https://visionagents.ai/
6
+ Project-URL: Website, https://visionagents.ai/
7
+ Project-URL: Source, https://github.com/GetStream/Vision-Agents
8
+ License-Expression: MIT
9
+ Keywords: AI,agents,local,voice agents
10
+ Requires-Python: >=3.10
11
+ Requires-Dist: aiortc<1.15.0,>=1.14.0
12
+ Requires-Dist: av<17,>=14.2.0
13
+ Requires-Dist: sounddevice>=0.5.0
14
+ Requires-Dist: vision-agents
README.md ADDED
File without changes
pyproject.toml ADDED
@@ -0,0 +1,42 @@
1
+ [build-system]
2
+ requires = ["hatchling", "hatch-vcs"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "vision-agents-plugins-local"
7
+ dynamic = ["version"]
8
+ description = "Local audio & video integration for Vision Agents"
9
+ readme = "README.md"
10
+ keywords = ["local", "AI", "voice agents", "agents"]
11
+ requires-python = ">=3.10"
12
+ license = "MIT"
13
+ dependencies = [
14
+ "vision-agents",
15
+ "sounddevice>=0.5.0",
16
+ "aiortc>=1.14.0, <1.15.0",
17
+ "av>=14.2.0, <17",
18
+ ]
19
+
20
+ [project.urls]
21
+ Documentation = "https://visionagents.ai/"
22
+ Website = "https://visionagents.ai/"
23
+ Source = "https://github.com/GetStream/Vision-Agents"
24
+
25
+ [tool.hatch.version]
26
+ source = "vcs"
27
+ raw-options = { root = "..", search_parent_directories = true, fallback_version = "0.0.0" }
28
+
29
+ [tool.hatch.build.targets.wheel]
30
+ packages = ["."]
31
+
32
+ [tool.hatch.build.targets.sdist]
33
+ include = ["/vision_agents"]
34
+
35
+ [tool.uv.sources]
36
+ vision-agents = { workspace = true }
37
+
38
+ [dependency-groups]
39
+ dev = [
40
+ "pytest>=8.4.1",
41
+ "pytest-asyncio>=1.0.0",
42
+ ]
@@ -0,0 +1,10 @@
1
+ from .devices import AudioInputDevice, AudioOutputDevice, CameraDevice
2
+ from .edge import LocalCall, LocalEdge
3
+
4
+ __all__ = [
5
+ "AudioInputDevice",
6
+ "AudioOutputDevice",
7
+ "CameraDevice",
8
+ "LocalCall",
9
+ "LocalEdge",
10
+ ]
@@ -0,0 +1,370 @@
1
+ """
2
+ Device enumeration and selection utilities for LocalTransport.
3
+
4
+ Provides typed device representations and interactive prompts for
5
+ selecting audio and video devices when running agents locally.
6
+ """
7
+
8
+ import glob
9
+ import logging
10
+ import platform
11
+ import queue
12
+ import re
13
+ import subprocess
14
+ from dataclasses import dataclass
15
+
16
+ import numpy as np
17
+ import sounddevice as sd
18
+
19
+ from .utils import prompt_selection
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ _AVFOUNDATION_RE = re.compile(r"\[AVFoundation.*?\]\s*\[(\d+)\]\s*(.+)")
25
+ _DSHOW_DEVICE_RE = re.compile(r'"(.+?)"')
26
+
27
+
28
+ class AudioInputDevice:
29
+ """Audio input device (microphone).
30
+
31
+ Combines device metadata with stream capture. Subclass to implement
32
+ custom audio backends (e.g. GStreamer).
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ index: int,
38
+ name: str,
39
+ sample_rate: int = 48000,
40
+ channels: int = 1,
41
+ is_default: bool = False,
42
+ blocksize: int | None = None,
43
+ ):
44
+ self.index = index
45
+ self.name = name
46
+ self._sample_rate = sample_rate
47
+ self._channels = channels
48
+ self.is_default = is_default
49
+ self._blocksize = (
50
+ blocksize if blocksize is not None else int(sample_rate * 0.02)
51
+ )
52
+ self._stream: sd.InputStream | None = None
53
+ self._buffer: queue.Queue[np.ndarray] = queue.Queue(maxsize=200)
54
+
55
+ @property
56
+ def sample_rate(self) -> int:
57
+ return self._sample_rate
58
+
59
+ @property
60
+ def channels(self) -> int:
61
+ return self._channels
62
+
63
+ def _callback(
64
+ self,
65
+ indata: np.ndarray,
66
+ frames: int,
67
+ time_info: object,
68
+ status: object,
69
+ ) -> None:
70
+ if status:
71
+ logger.warning("Audio input status: %s", status)
72
+ try:
73
+ self._buffer.put_nowait(indata.copy())
74
+ except queue.Full:
75
+ pass
76
+
77
+ def start(self) -> None:
78
+ """Open and start the audio input stream."""
79
+ self._stream = sd.InputStream(
80
+ samplerate=self._sample_rate,
81
+ channels=self._channels,
82
+ dtype="int16",
83
+ blocksize=self._blocksize,
84
+ device=self.index,
85
+ callback=self._callback,
86
+ )
87
+ self._stream.start()
88
+ logger.info(
89
+ "Started audio input: %dHz, %d channels",
90
+ self._sample_rate,
91
+ self._channels,
92
+ )
93
+
94
+ def read(self) -> np.ndarray | None:
95
+ """Block until audio data is available (up to 100ms timeout)."""
96
+ try:
97
+ return self._buffer.get(timeout=0.1)
98
+ except queue.Empty:
99
+ return None
100
+
101
+ def stop(self) -> None:
102
+ """Stop and close the audio input stream."""
103
+ if self._stream is not None:
104
+ self._stream.stop()
105
+ self._stream.close()
106
+ self._stream = None
107
+ logger.info("Stopped audio input")
108
+
109
+
110
+ class AudioOutputDevice:
111
+ """Audio output device (speaker/headphones).
112
+
113
+ Combines device metadata with stream playback. Subclass to implement
114
+ custom audio backends (e.g. GStreamer).
115
+ """
116
+
117
+ def __init__(
118
+ self,
119
+ index: int,
120
+ name: str,
121
+ sample_rate: int = 48000,
122
+ channels: int = 2,
123
+ is_default: bool = False,
124
+ blocksize: int = 2048,
125
+ ):
126
+ self.index = index
127
+ self.name = name
128
+ self._sample_rate = sample_rate
129
+ self._channels = channels
130
+ self.is_default = is_default
131
+ self._blocksize = blocksize
132
+ self._stream: sd.OutputStream | None = None
133
+
134
+ @property
135
+ def sample_rate(self) -> int:
136
+ return self._sample_rate
137
+
138
+ @property
139
+ def channels(self) -> int:
140
+ return self._channels
141
+
142
+ def start(self) -> None:
143
+ """Open and start the audio output stream."""
144
+ self._stream = sd.OutputStream(
145
+ samplerate=self._sample_rate,
146
+ channels=self._channels,
147
+ dtype="int16",
148
+ blocksize=self._blocksize,
149
+ device=self.index,
150
+ )
151
+ self._stream.start()
152
+ logger.info(
153
+ "Started audio output: %dHz, %d channels",
154
+ self._sample_rate,
155
+ self._channels,
156
+ )
157
+
158
+ def write(self, samples: np.ndarray) -> None:
159
+ """Write flat int16 samples to the device."""
160
+ if self._stream is None:
161
+ return
162
+ frames = len(samples) // self._channels
163
+ audio = samples.reshape(frames, self._channels)
164
+ self._stream.write(audio)
165
+
166
+ def flush(self) -> None:
167
+ """Abort current playback and restart the stream."""
168
+ if self._stream is not None:
169
+ self._stream.abort()
170
+ self._stream.start()
171
+
172
+ def stop(self) -> None:
173
+ """Stop and close the audio output stream."""
174
+ if self._stream is not None:
175
+ self._stream.stop()
176
+ self._stream.close()
177
+ self._stream = None
178
+ logger.info("Stopped audio output")
179
+
180
+
181
+ @dataclass(frozen=True)
182
+ class CameraDevice:
183
+ """A detected camera."""
184
+
185
+ index: int
186
+ name: str
187
+ device: str
188
+
189
+
190
+ def list_audio_input_devices() -> list[AudioInputDevice]:
191
+ """Return all audio input devices."""
192
+ raw = sd.query_devices()
193
+ default_in = sd.default.device[0]
194
+ return [
195
+ AudioInputDevice(
196
+ index=i,
197
+ name=dev["name"],
198
+ sample_rate=int(dev["default_samplerate"]),
199
+ channels=dev["max_input_channels"],
200
+ is_default=(i == default_in),
201
+ )
202
+ for i, dev in enumerate(raw)
203
+ if dev["max_input_channels"] > 0
204
+ ]
205
+
206
+
207
+ def list_audio_output_devices() -> list[AudioOutputDevice]:
208
+ """Return all audio output devices."""
209
+ raw = sd.query_devices()
210
+ default_out = sd.default.device[1]
211
+ return [
212
+ AudioOutputDevice(
213
+ index=i,
214
+ name=dev["name"],
215
+ sample_rate=int(dev["default_samplerate"]),
216
+ channels=dev["max_output_channels"],
217
+ is_default=(i == default_out),
218
+ )
219
+ for i, dev in enumerate(raw)
220
+ if dev["max_output_channels"] > 0
221
+ ]
222
+
223
+
224
+ def select_audio_input_device() -> AudioInputDevice | None:
225
+ """Interactive prompt to select an audio input device."""
226
+ devices = list_audio_input_devices()
227
+ default = next((d for d in devices if d.is_default), None)
228
+ return prompt_selection(
229
+ items=devices,
230
+ formatter=_format_audio_device,
231
+ header="INPUT DEVICES (Microphones)",
232
+ default=default,
233
+ )
234
+
235
+
236
+ def select_audio_output_device() -> AudioOutputDevice | None:
237
+ """Interactive prompt to select an audio output device."""
238
+ devices = list_audio_output_devices()
239
+ default = next((d for d in devices if d.is_default), None)
240
+ return prompt_selection(
241
+ items=devices,
242
+ formatter=_format_audio_device,
243
+ header="OUTPUT DEVICES (Speakers)",
244
+ default=default,
245
+ )
246
+
247
+
248
+ def select_video_device() -> CameraDevice | None:
249
+ """Interactive prompt to select a camera or skip.
250
+
251
+ Returns:
252
+ The selected camera device, or None if skipped.
253
+ """
254
+ cameras = list_cameras()
255
+
256
+ return prompt_selection(
257
+ items=cameras,
258
+ formatter=lambda c: c.name,
259
+ header="VIDEO DEVICES (Cameras)",
260
+ allow_skip=True,
261
+ empty_message="No cameras detected\n (Camera support requires ffmpeg to be installed)",
262
+ )
263
+
264
+
265
+ def list_cameras() -> list[CameraDevice]:
266
+ """List available cameras on the system."""
267
+ system = platform.system()
268
+
269
+ if system == "Darwin":
270
+ return _list_cameras_darwin()
271
+ if system == "Linux":
272
+ return _list_cameras_linux()
273
+ if system == "Windows":
274
+ return _list_cameras_windows()
275
+
276
+ return []
277
+
278
+
279
+ def _format_audio_device(dev: AudioInputDevice | AudioOutputDevice) -> str:
280
+ """Format an audio device for display."""
281
+ default = " [DEFAULT]" if dev.is_default else ""
282
+ return f"{dev.name} ({dev.sample_rate}Hz){default}"
283
+
284
+
285
+ def _list_cameras_darwin() -> list[CameraDevice]:
286
+ """List cameras on macOS via ffmpeg/AVFoundation."""
287
+ try:
288
+ result = subprocess.run(
289
+ ["ffmpeg", "-f", "avfoundation", "-list_devices", "true", "-i", ""],
290
+ capture_output=True,
291
+ text=True,
292
+ timeout=5,
293
+ )
294
+ except (subprocess.TimeoutExpired, FileNotFoundError):
295
+ logger.warning("Failed to list cameras (is ffmpeg installed?)")
296
+ return []
297
+
298
+ cameras: list[CameraDevice] = []
299
+ in_video_section = False
300
+
301
+ for line in result.stderr.splitlines():
302
+ if "AVFoundation video devices:" in line:
303
+ in_video_section = True
304
+ continue
305
+ if "AVFoundation audio devices:" in line:
306
+ break
307
+ if in_video_section:
308
+ match = _AVFOUNDATION_RE.search(line)
309
+ if match:
310
+ cam_idx = int(match.group(1))
311
+ cameras.append(
312
+ CameraDevice(
313
+ index=cam_idx, name=match.group(2), device=str(cam_idx)
314
+ )
315
+ )
316
+
317
+ return cameras
318
+
319
+
320
+ def _list_cameras_linux() -> list[CameraDevice]:
321
+ """List cameras on Linux via /dev/video* and sysfs."""
322
+ cameras: list[CameraDevice] = []
323
+
324
+ for i, dev_path in enumerate(sorted(glob.glob("/dev/video*"))):
325
+ name_path = f"/sys/class/video4linux/{dev_path.split('/')[-1]}/name"
326
+ try:
327
+ with open(name_path) as f:
328
+ name = f.read().strip()
329
+ except OSError:
330
+ name = dev_path
331
+ cameras.append(CameraDevice(index=i, name=name, device=dev_path))
332
+
333
+ return cameras
334
+
335
+
336
+ def _list_cameras_windows() -> list[CameraDevice]:
337
+ """List cameras on Windows via ffmpeg/DirectShow."""
338
+ try:
339
+ result = subprocess.run(
340
+ ["ffmpeg", "-f", "dshow", "-list_devices", "true", "-i", "dummy"],
341
+ capture_output=True,
342
+ text=True,
343
+ timeout=5,
344
+ )
345
+ except (subprocess.TimeoutExpired, FileNotFoundError):
346
+ logger.warning("Failed to list cameras (is ffmpeg installed?)")
347
+ return []
348
+
349
+ cameras: list[CameraDevice] = []
350
+ in_video_section = False
351
+
352
+ for line in result.stderr.splitlines():
353
+ if "DirectShow video devices" in line:
354
+ in_video_section = True
355
+ continue
356
+ if "DirectShow audio devices" in line:
357
+ break
358
+ if in_video_section:
359
+ match = _DSHOW_DEVICE_RE.search(line)
360
+ if match:
361
+ name = match.group(1)
362
+ cameras.append(
363
+ CameraDevice(
364
+ index=len(cameras),
365
+ name=name,
366
+ device=f'video="{name}"',
367
+ )
368
+ )
369
+
370
+ return cameras