trugen-sdk 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- trugen_sdk-1.0.0/LICENSE +21 -0
- trugen_sdk-1.0.0/PKG-INFO +355 -0
- trugen_sdk-1.0.0/README.md +318 -0
- trugen_sdk-1.0.0/pyproject.toml +53 -0
- trugen_sdk-1.0.0/setup.cfg +4 -0
- trugen_sdk-1.0.0/trugen/__init__.py +12 -0
- trugen_sdk-1.0.0/trugen/_audio_manager.py +285 -0
- trugen_sdk-1.0.0/trugen/_media_streams.py +59 -0
- trugen_sdk-1.0.0/trugen/_runner.py +258 -0
- trugen_sdk-1.0.0/trugen/_session.py +518 -0
- trugen_sdk-1.0.0/trugen/client.py +120 -0
- trugen_sdk-1.0.0/trugen/constants.py +31 -0
- trugen_sdk-1.0.0/trugen/session.py +3 -0
- trugen_sdk-1.0.0/trugen/types.py +28 -0
- trugen_sdk-1.0.0/trugen/utils/emitter.py +71 -0
- trugen_sdk-1.0.0/trugen_sdk.egg-info/PKG-INFO +355 -0
- trugen_sdk-1.0.0/trugen_sdk.egg-info/SOURCES.txt +18 -0
- trugen_sdk-1.0.0/trugen_sdk.egg-info/dependency_links.txt +1 -0
- trugen_sdk-1.0.0/trugen_sdk.egg-info/requires.txt +8 -0
- trugen_sdk-1.0.0/trugen_sdk.egg-info/top_level.txt +1 -0
trugen_sdk-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 TruGen AI
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: trugen-sdk
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Official Python SDK for TruGen AI - Real-time AI avatar streaming
|
|
5
|
+
Author-email: TruGen AI <support@trugen.ai>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://trugen.ai
|
|
8
|
+
Project-URL: Documentation, https://docs.trugen.ai
|
|
9
|
+
Project-URL: Repository, https://github.com/trugenai/python-sdk
|
|
10
|
+
Project-URL: Issues, https://github.com/trugenai/python-sdk/issues
|
|
11
|
+
Keywords: trugen,ai,avatar,avatar-streaming,voice-ai,livekit,real-time,webrtc
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Communications
|
|
21
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
22
|
+
Classifier: Topic :: Multimedia :: Video
|
|
23
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
|
24
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
25
|
+
Classifier: Typing :: Typed
|
|
26
|
+
Requires-Python: >=3.10
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
License-File: LICENSE
|
|
29
|
+
Requires-Dist: livekit>=0.11.0
|
|
30
|
+
Requires-Dist: aiohttp>=3.8.0
|
|
31
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
32
|
+
Provides-Extra: display
|
|
33
|
+
Requires-Dist: opencv-python>=4.8.0; extra == "display"
|
|
34
|
+
Requires-Dist: sounddevice>=0.4.6; extra == "display"
|
|
35
|
+
Requires-Dist: numpy>=1.24.0; extra == "display"
|
|
36
|
+
Dynamic: license-file
|
|
37
|
+
|
|
38
|
+
# TruGen AI Python SDK
|
|
39
|
+
|
|
40
|
+
Official Python SDK for [TruGen AI](https://trugen.ai) - Real-time AI avatar streaming.
|
|
41
|
+
|
|
42
|
+
[](https://badge.fury.io/py/trugen-sdk)
|
|
43
|
+
[](https://www.python.org/downloads/)
|
|
44
|
+
[](https://opensource.org/licenses/MIT)
|
|
45
|
+
|
|
46
|
+
All WebRTC and audio/video processing complexity (LiveKit, Acoustic Echo Cancellation, decoding, threading) is handled **under the hood**. You only ever need to import from `trugen`.
|
|
47
|
+
|
|
48
|
+
## Installation
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
# Using uv (recommended)
|
|
52
|
+
uv add trugen-sdk
|
|
53
|
+
|
|
54
|
+
# With optional display utilities (for OpenCV and audio playback testing)
|
|
55
|
+
uv add trugen-sdk --extra display
|
|
56
|
+
|
|
57
|
+
# Using pip
|
|
58
|
+
pip install trugen-sdk
|
|
59
|
+
|
|
60
|
+
# With optional display utilities (for OpenCV and audio playback testing)
|
|
61
|
+
pip install trugen-sdk[display]
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Quick Start
|
|
65
|
+
|
|
66
|
+
### Simple OpenCV Video Display (Using TruGenRunner)
|
|
67
|
+
|
|
68
|
+
For most UI/desktop applications, `TruGenRunner` handles spawning a background event loop thread for the session, while serving BGR video frames and state to the main thread safely.
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
import cv2
|
|
72
|
+
import os
|
|
73
|
+
from trugen import TruGenClient, TruGenRunner
|
|
74
|
+
|
|
75
|
+
# 1. Define how to connect to the session
|
|
76
|
+
async def create_session():
|
|
77
|
+
client = TruGenClient(api_key=os.getenv("TRUGEN_API_KEY", ""))
|
|
78
|
+
session = await client.create_session(agent_id=os.getenv("TRUGEN_AGENT_ID", ""))
|
|
79
|
+
await session.connect()
|
|
80
|
+
await session.enable_audio_output() # Speaker + AEC in one call
|
|
81
|
+
return session
|
|
82
|
+
|
|
83
|
+
# 2. Initialize the runner
|
|
84
|
+
runner = TruGenRunner(session_factory=create_session)
|
|
85
|
+
|
|
86
|
+
# 3. Handle incoming frames
|
|
87
|
+
@runner.on_frame
|
|
88
|
+
def show_frame(frame):
|
|
89
|
+
if frame is not None:
|
|
90
|
+
cv2.imshow("TruGen Avatar", frame)
|
|
91
|
+
if cv2.waitKey(1) & 0xFF == ord('q'):
|
|
92
|
+
runner.stop()
|
|
93
|
+
|
|
94
|
+
# 4. Start rendering loop (blocks main thread)
|
|
95
|
+
if __name__ == "__main__":
|
|
96
|
+
runner.run()
|
|
97
|
+
cv2.destroyAllWindows()
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## Features
|
|
103
|
+
|
|
104
|
+
- 🎥 **Real-time Audio/Video Streaming** - Receive synchronized high-quality audio and video frames directly from the avatar.
|
|
105
|
+
- 🔇 **Built-in Acoustic Echo Cancellation (AEC)** - Automatic APM synchronization via `enable_audio_output()` prevents the avatar from hearing and responding to its own voice.
|
|
106
|
+
- 🟩 **BGR OpenCV Frames** - Zero-boilerplate async iterator `video_frames_bgr()` yielding pre-converted NumPy arrays ready for OpenCV.
|
|
107
|
+
- ⚙️ **GUI Runner Support** - Thread-safe `TruGenRunner` wrapper solves blocking rendering loops in OpenCV, Pygame, PyQt/PySide, or custom game engines.
|
|
108
|
+
- 🎵 **Custom Audio Injection** - Programmatically inject WAV files (`upload_audio()`) or raw 16-bit PCM bytes (`send_audio()`) directly into the room.
|
|
109
|
+
- 🎙️ **Microphone Lifecycle Management** - Built-in utilities for muting/unmuting the mic and monitoring mic permissions (pending/granted/denied).
|
|
110
|
+
- 💬 **Real-time Captions** - Event hooks to handle caption and transcript updates with zero latency.
|
|
111
|
+
- 📝 **Clean Transcripts** - Distinguish between user transcripts (`user.transcription_received`) and agent utterances (`agent.transcription_final`) for logging.
|
|
112
|
+
- 📡 **Async Iterator API** - Stream raw audio (`AudioFrame`) and video (`VideoFrame`) natively via Python async generators.
|
|
113
|
+
- 🎯 **Event-Driven Architecture** - Decorator-based event handlers for connection, tracks, speaking states, and transcriptions.
|
|
114
|
+
- 📝 **Fully Typed** - Complete type hints for IDE autocompletion and safety.
|
|
115
|
+
|
|
116
|
+
---
|
|
117
|
+
|
|
118
|
+
## API Reference
|
|
119
|
+
|
|
120
|
+
### `TruGenClient`
|
|
121
|
+
|
|
122
|
+
The entry point for starting TruGen AI sessions.
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
from trugen import TruGenClient
|
|
126
|
+
|
|
127
|
+
# Initialize with your API key
|
|
128
|
+
client = TruGenClient(api_key="your-api-key")
|
|
129
|
+
|
|
130
|
+
# Create a session with your Agent ID
|
|
131
|
+
session = await client.create_session(agent_id="your-agent-id")
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
### `TruGenSession`
|
|
137
|
+
|
|
138
|
+
Represents an active connection to a streaming room.
|
|
139
|
+
|
|
140
|
+
#### Connection
|
|
141
|
+
- `await session.connect()`: Connect to the streaming room and publish the local microphone.
|
|
142
|
+
- `await session.disconnect()`: Disconnect and cleanly release all hardware/stream resources.
|
|
143
|
+
|
|
144
|
+
#### Audio Output
|
|
145
|
+
- `await session.enable_audio_output()`: Activates speaker playback with built-in echo cancellation. Call this once after `connect()`.
|
|
146
|
+
|
|
147
|
+
#### Video & Audio Generators
|
|
148
|
+
- `session.video_frames_bgr()`: Async generator yielding NumPy arrays (`NDArray`) in BGR format, ready for OpenCV.
|
|
149
|
+
- `session.video_frames()`: Async generator yielding raw LiveKit `VideoFrame` objects.
|
|
150
|
+
- `session.audio_frames()`: Async generator yielding raw LiveKit `AudioFrame` objects.
|
|
151
|
+
|
|
152
|
+
#### Microphone Control
|
|
153
|
+
- `session.mute_input_audio()`: Mutes the local microphone.
|
|
154
|
+
- `session.unmute_input_audio()`: Unmutes the local microphone.
|
|
155
|
+
- `session.is_input_muted()`: Returns `True` if the microphone is muted.
|
|
156
|
+
- `session.get_input_audio_state()`: Returns an `InputAudioState` object containing mute status and mic permission status (`pending`, `granted`, `denied`).
|
|
157
|
+
- `await session.start_mic()`: Connects and publishes the microphone track.
|
|
158
|
+
- `await session.stop_mic()`: Stops capturing and unpublishes the microphone track.
|
|
159
|
+
|
|
160
|
+
#### Custom Audio Injection
|
|
161
|
+
- `await session.upload_audio(file_path)`: Streams a PCM WAV file into the room.
|
|
162
|
+
- `await session.send_audio(data, sample_rate=48000, num_channels=1)`: Injects raw 16-bit PCM bytes into the audio stream.
|
|
163
|
+
|
|
164
|
+
#### Low-Level Accessors
|
|
165
|
+
- `session.get_video_track()`: Returns the remote `RemoteVideoTrack` object (or `None`).
|
|
166
|
+
- `session.get_audio_track()`: Returns the remote `RemoteAudioTrack` object (or `None`).
|
|
167
|
+
- `session.room`: Returns the underlying `livekit.rtc.Room` instance for advanced operations.
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
### `TruGenRunner`
|
|
172
|
+
|
|
173
|
+
Handles multi-threading to run the async session event loop on a background thread while feeding events safely to the main rendering thread.
|
|
174
|
+
|
|
175
|
+
#### Controls
|
|
176
|
+
- `runner.run()`: Starts the runner and blocks the main thread to run the rendering loop.
|
|
177
|
+
- `runner.stop()`: Safely stops the background loop and disconnects the session (thread-safe).
|
|
178
|
+
- `runner.toggle_mute()`: Toggles the microphone mute state (thread-safe).
|
|
179
|
+
|
|
180
|
+
#### Properties & Accessors
|
|
181
|
+
- `runner.mic_muted`: Returns `True` if the microphone is currently muted.
|
|
182
|
+
- `runner.session_state`: Returns the current session state enum (`TruGenState`).
|
|
183
|
+
- `runner.session`: Access the active `TruGenSession` instance (returns `None` until connected).
|
|
184
|
+
- `runner.get_caption()`: Returns a tuple `(text, timestamp)` containing the last received caption chunk and the monotonic timestamp it arrived.
|
|
185
|
+
|
|
186
|
+
#### Event Decorators
|
|
187
|
+
|
|
188
|
+
You can register event handlers using decorators on the `TruGenRunner` (for UI/main-thread callbacks) or directly on the `TruGenSession` (for low-level async callbacks).
|
|
189
|
+
|
|
190
|
+
##### 1. Runner Decorators (`TruGenRunner`)
|
|
191
|
+
|
|
192
|
+
- **`@runner.on_frame`**: Receives BGR video frames (NumPy arrays) or `None` on the main thread.
|
|
193
|
+
```python
|
|
194
|
+
@runner.on_frame
|
|
195
|
+
def on_frame(frame):
|
|
196
|
+
if frame is not None:
|
|
197
|
+
cv2.imshow("Avatar", frame)
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
- **`@runner.on_caption`**: Receives real-time streaming caption chunks (ideal for UI overlays).
|
|
201
|
+
```python
|
|
202
|
+
@runner.on_caption
|
|
203
|
+
def on_caption(text: str):
|
|
204
|
+
# Fired for each caption chunk as it arrives
|
|
205
|
+
pass
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
- **`@runner.on_state`**: Called when the session's connection state transitions.
|
|
209
|
+
```python
|
|
210
|
+
@runner.on_state
|
|
211
|
+
def on_state(state: TruGenState):
|
|
212
|
+
print(f"Status: {state.value}")
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
- **`@runner.on_event`**: Handles any standard `TruGenEvent` enum or custom string event.
|
|
216
|
+
```python
|
|
217
|
+
# Log final complete transcripts
|
|
218
|
+
@runner.on_event("user.transcription_received")
|
|
219
|
+
def on_user_transcript(text: str):
|
|
220
|
+
print(f"[User] {text}")
|
|
221
|
+
|
|
222
|
+
@runner.on_event("agent.transcription_final")
|
|
223
|
+
def on_agent_transcript(text: str):
|
|
224
|
+
print(f"[Agent] {text}")
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
##### 2. Session Decorators (`TruGenSession`)
|
|
228
|
+
|
|
229
|
+
If you are not using `TruGenRunner`, you can listen to events directly on the `TruGenSession` using the `@session.on()` decorator:
|
|
230
|
+
|
|
231
|
+
```python
|
|
232
|
+
# Log final complete transcripts directly from the session
|
|
233
|
+
@session.on("user.transcription_received")
|
|
234
|
+
def on_user_speech(text: str):
|
|
235
|
+
print(f"[User] {text}")
|
|
236
|
+
|
|
237
|
+
@session.on("agent.transcription_final")
|
|
238
|
+
def on_agent_speech(text: str):
|
|
239
|
+
print(f"[Agent] {text}")
|
|
240
|
+
|
|
241
|
+
# Handle speaking state changes
|
|
242
|
+
@session.on(TruGenEvent.AGENT_SPEAKING_STARTED)
|
|
243
|
+
def agent_speech_start():
|
|
244
|
+
print("Agent started speaking...")
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
---
|
|
248
|
+
|
|
249
|
+
### Events (`TruGenEvent`)
|
|
250
|
+
|
|
251
|
+
Register listener callbacks directly on a `TruGenSession` or a `TruGenRunner` using `@session.on()` or `@runner.on_event()`.
|
|
252
|
+
|
|
253
|
+
| Event Enum / String | Fired When | Callback Arguments |
|
|
254
|
+
|---|---|---|
|
|
255
|
+
| `TruGenEvent.STATE_CHANGED` | The session state changes | `state: TruGenState` |
|
|
256
|
+
| `TruGenEvent.CONNECTION_ESTABLISHED` | Successfully connected to room | None |
|
|
257
|
+
| `TruGenEvent.CONNECTION_CLOSED` | Session room disconnected | `reason: DisconnectReason` |
|
|
258
|
+
| `TruGenEvent.VIDEO_STREAM_STARTED` | Remote video track subscribed | `track: RemoteVideoTrack` |
|
|
259
|
+
| `TruGenEvent.AUDIO_STREAM_STARTED` | Remote audio track subscribed | `track: RemoteAudioTrack` |
|
|
260
|
+
| `TruGenEvent.INPUT_AUDIO_STREAM_STARTED` | Local mic audio stream begins publishing | None |
|
|
261
|
+
| `TruGenEvent.AGENT_SPEAKING_STARTED` | Agent starts speaking | None |
|
|
262
|
+
| `TruGenEvent.AGENT_SPEAKING_ENDED` | Agent stops speaking | None |
|
|
263
|
+
| `TruGenEvent.USER_SPEECH_STARTED` | User starts speaking | None |
|
|
264
|
+
| `TruGenEvent.USER_SPEECH_ENDED` | User stops speaking | None |
|
|
265
|
+
| `TruGenEvent.TEXT_CHUNK_RECEIVED` | Caption/Text chunk received | `text: str` |
|
|
266
|
+
| `TruGenEvent.MIC_PERMISSION_PENDING` | Mic permission request is pending | None |
|
|
267
|
+
| `TruGenEvent.MIC_PERMISSION_GRANTED` | Mic permission has been granted | None |
|
|
268
|
+
| `TruGenEvent.MIC_PERMISSION_DENIED` | Mic permission has been denied | None |
|
|
269
|
+
| `"user.transcription_received"` | User completes a final utterance | `text: str` |
|
|
270
|
+
| `"agent.transcription_final"` | Agent completes a final utterance | `text: str` |
|
|
271
|
+
| `"connection.reconnecting"` | Transient network reconnection starts | None |
|
|
272
|
+
| `"connection.reconnected"` | Network reconnection completes | None |
|
|
273
|
+
| `"connection.quality_changed"` | Participant connection quality changes | `participant, quality` |
|
|
274
|
+
| `TruGenEvent.ERROR` | A session or connection error occurs | `error: Exception` |
|
|
275
|
+
|
|
276
|
+
---
|
|
277
|
+
|
|
278
|
+
### Session States (`TruGenState`)
|
|
279
|
+
|
|
280
|
+
| Enum Value | Description |
|
|
281
|
+
|---|---|
|
|
282
|
+
| `TruGenState.INITIALIZING` | Session created but not yet connected |
|
|
283
|
+
| `TruGenState.CONNECTING` | WebRTC handshake and connection in progress |
|
|
284
|
+
| `TruGenState.CONNECTED` | Connection established; actively streaming media |
|
|
285
|
+
| `TruGenState.DISCONNECTED` | Session ended and connection closed |
|
|
286
|
+
| `TruGenState.ERROR` | Unrecoverable error occurred |
|
|
287
|
+
|
|
288
|
+
---
|
|
289
|
+
|
|
290
|
+
## Detailed Examples
|
|
291
|
+
|
|
292
|
+
### Interactive Session with GUI & WAV Injection
|
|
293
|
+
|
|
294
|
+
For a fully-featured interactive application showing:
|
|
295
|
+
- Real-time video window and connection status.
|
|
296
|
+
- Mic mute controls and speaking indicator.
|
|
297
|
+
- Floating caption overlay.
|
|
298
|
+
- WAV audio injection support (presses `A` to inject a local WAV file to the agent).
|
|
299
|
+
|
|
300
|
+
See the built-in examples in the directory:
|
|
301
|
+
- [Basic GUI Viewer](examples/basic_session_ui.py) - Simple viewer containing status bar, mic indicators, and floating captions.
|
|
302
|
+
- [Advanced GUI Viewer](examples/advanced_session_ui.py) - Full features demonstration including WAV audio injection, reconnect handles, and complete transcripts.
|
|
303
|
+
|
|
304
|
+
---
|
|
305
|
+
|
|
306
|
+
## Configuration
|
|
307
|
+
|
|
308
|
+
Set the API authentication credentials in a `.env` file or export them directly in your environment:
|
|
309
|
+
|
|
310
|
+
```bash
|
|
311
|
+
export TRUGEN_API_KEY="your-api-key"
|
|
312
|
+
export TRUGEN_AGENT_ID="your-agent-id"
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
## Error Handling
|
|
316
|
+
|
|
317
|
+
Handle exceptions using standard try/except blocks around `create_session` and connection logic:
|
|
318
|
+
|
|
319
|
+
```python
|
|
320
|
+
import asyncio
|
|
321
|
+
from trugen import TruGenClient
|
|
322
|
+
|
|
323
|
+
client = TruGenClient(api_key="invalid-key")
|
|
324
|
+
|
|
325
|
+
async def main():
|
|
326
|
+
try:
|
|
327
|
+
session = await client.create_session(agent_id="my-agent")
|
|
328
|
+
await session.connect()
|
|
329
|
+
except RuntimeError as e:
|
|
330
|
+
print(f"Connection failed: {e}")
|
|
331
|
+
except Exception as e:
|
|
332
|
+
print(f"An unexpected error occurred: {e}")
|
|
333
|
+
|
|
334
|
+
asyncio.run(main())
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
## Requirements
|
|
338
|
+
|
|
339
|
+
- Python 3.10+
|
|
340
|
+
- **Core Dependencies:**
|
|
341
|
+
- `livekit` (>=0.11.0)
|
|
342
|
+
- `aiohttp` (>=3.8.0)
|
|
343
|
+
- **Optional Dependencies (`[display]`):**
|
|
344
|
+
- `opencv-python` (>=4.8.0)
|
|
345
|
+
- `sounddevice` (>=0.4.6)
|
|
346
|
+
- `numpy` (>=1.24.0)
|
|
347
|
+
|
|
348
|
+
## License
|
|
349
|
+
|
|
350
|
+
MIT License - see [LICENSE](LICENSE) for details.
|
|
351
|
+
|
|
352
|
+
## Links
|
|
353
|
+
|
|
354
|
+
- [TruGen AI Website](https://trugen.ai)
|
|
355
|
+
- [Developer Portal / Dashboard](https://dashboard.trugen.ai)
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
# TruGen AI Python SDK
|
|
2
|
+
|
|
3
|
+
Official Python SDK for [TruGen AI](https://trugen.ai) - Real-time AI avatar streaming.
|
|
4
|
+
|
|
5
|
+
[](https://badge.fury.io/py/trugen-sdk)
|
|
6
|
+
[](https://www.python.org/downloads/)
|
|
7
|
+
[](https://opensource.org/licenses/MIT)
|
|
8
|
+
|
|
9
|
+
All WebRTC and audio/video processing complexity (LiveKit, Acoustic Echo Cancellation, decoding, threading) is handled **under the hood**. You only ever need to import from `trugen`.
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
# Using uv (recommended)
|
|
15
|
+
uv add trugen-sdk
|
|
16
|
+
|
|
17
|
+
# With optional display utilities (for OpenCV and audio playback testing)
|
|
18
|
+
uv add trugen-sdk --extra display
|
|
19
|
+
|
|
20
|
+
# Using pip
|
|
21
|
+
pip install trugen-sdk
|
|
22
|
+
|
|
23
|
+
# With optional display utilities (for OpenCV and audio playback testing)
|
|
24
|
+
pip install trugen-sdk[display]
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
### Simple OpenCV Video Display (Using TruGenRunner)
|
|
30
|
+
|
|
31
|
+
For most UI/desktop applications, `TruGenRunner` handles spawning a background event loop thread for the session, while serving BGR video frames and state to the main thread safely.
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
import cv2
|
|
35
|
+
import os
|
|
36
|
+
from trugen import TruGenClient, TruGenRunner
|
|
37
|
+
|
|
38
|
+
# 1. Define how to connect to the session
|
|
39
|
+
async def create_session():
|
|
40
|
+
client = TruGenClient(api_key=os.getenv("TRUGEN_API_KEY", ""))
|
|
41
|
+
session = await client.create_session(agent_id=os.getenv("TRUGEN_AGENT_ID", ""))
|
|
42
|
+
await session.connect()
|
|
43
|
+
await session.enable_audio_output() # Speaker + AEC in one call
|
|
44
|
+
return session
|
|
45
|
+
|
|
46
|
+
# 2. Initialize the runner
|
|
47
|
+
runner = TruGenRunner(session_factory=create_session)
|
|
48
|
+
|
|
49
|
+
# 3. Handle incoming frames
|
|
50
|
+
@runner.on_frame
|
|
51
|
+
def show_frame(frame):
|
|
52
|
+
if frame is not None:
|
|
53
|
+
cv2.imshow("TruGen Avatar", frame)
|
|
54
|
+
if cv2.waitKey(1) & 0xFF == ord('q'):
|
|
55
|
+
runner.stop()
|
|
56
|
+
|
|
57
|
+
# 4. Start rendering loop (blocks main thread)
|
|
58
|
+
if __name__ == "__main__":
|
|
59
|
+
runner.run()
|
|
60
|
+
cv2.destroyAllWindows()
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## Features
|
|
66
|
+
|
|
67
|
+
- 🎥 **Real-time Audio/Video Streaming** - Receive synchronized high-quality audio and video frames directly from the avatar.
|
|
68
|
+
- 🔇 **Built-in Acoustic Echo Cancellation (AEC)** - Automatic APM synchronization via `enable_audio_output()` prevents the avatar from hearing and responding to its own voice.
|
|
69
|
+
- 🟩 **BGR OpenCV Frames** - Zero-boilerplate async iterator `video_frames_bgr()` yielding pre-converted NumPy arrays ready for OpenCV.
|
|
70
|
+
- ⚙️ **GUI Runner Support** - Thread-safe `TruGenRunner` wrapper solves blocking rendering loops in OpenCV, Pygame, PyQt/PySide, or custom game engines.
|
|
71
|
+
- 🎵 **Custom Audio Injection** - Programmatically inject WAV files (`upload_audio()`) or raw 16-bit PCM bytes (`send_audio()`) directly into the room.
|
|
72
|
+
- 🎙️ **Microphone Lifecycle Management** - Built-in utilities for muting/unmuting the mic and monitoring mic permissions (pending/granted/denied).
|
|
73
|
+
- 💬 **Real-time Captions** - Event hooks to handle caption and transcript updates with zero latency.
|
|
74
|
+
- 📝 **Clean Transcripts** - Distinguish between user transcripts (`user.transcription_received`) and agent utterances (`agent.transcription_final`) for logging.
|
|
75
|
+
- 📡 **Async Iterator API** - Stream raw audio (`AudioFrame`) and video (`VideoFrame`) natively via Python async generators.
|
|
76
|
+
- 🎯 **Event-Driven Architecture** - Decorator-based event handlers for connection, tracks, speaking states, and transcriptions.
|
|
77
|
+
- 📝 **Fully Typed** - Complete type hints for IDE autocompletion and safety.
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## API Reference
|
|
82
|
+
|
|
83
|
+
### `TruGenClient`
|
|
84
|
+
|
|
85
|
+
The entry point for starting TruGen AI sessions.
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
from trugen import TruGenClient
|
|
89
|
+
|
|
90
|
+
# Initialize with your API key
|
|
91
|
+
client = TruGenClient(api_key="your-api-key")
|
|
92
|
+
|
|
93
|
+
# Create a session with your Agent ID
|
|
94
|
+
session = await client.create_session(agent_id="your-agent-id")
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
---
|
|
98
|
+
|
|
99
|
+
### `TruGenSession`
|
|
100
|
+
|
|
101
|
+
Represents an active connection to a streaming room.
|
|
102
|
+
|
|
103
|
+
#### Connection
|
|
104
|
+
- `await session.connect()`: Connect to the streaming room and publish the local microphone.
|
|
105
|
+
- `await session.disconnect()`: Disconnect and cleanly release all hardware/stream resources.
|
|
106
|
+
|
|
107
|
+
#### Audio Output
|
|
108
|
+
- `await session.enable_audio_output()`: Activates speaker playback with built-in echo cancellation. Call this once after `connect()`.
|
|
109
|
+
|
|
110
|
+
#### Video & Audio Generators
|
|
111
|
+
- `session.video_frames_bgr()`: Async generator yielding NumPy arrays (`NDArray`) in BGR format, ready for OpenCV.
|
|
112
|
+
- `session.video_frames()`: Async generator yielding raw LiveKit `VideoFrame` objects.
|
|
113
|
+
- `session.audio_frames()`: Async generator yielding raw LiveKit `AudioFrame` objects.
|
|
114
|
+
|
|
115
|
+
#### Microphone Control
|
|
116
|
+
- `session.mute_input_audio()`: Mutes the local microphone.
|
|
117
|
+
- `session.unmute_input_audio()`: Unmutes the local microphone.
|
|
118
|
+
- `session.is_input_muted()`: Returns `True` if the microphone is muted.
|
|
119
|
+
- `session.get_input_audio_state()`: Returns an `InputAudioState` object containing mute status and mic permission status (`pending`, `granted`, `denied`).
|
|
120
|
+
- `await session.start_mic()`: Connects and publishes the microphone track.
|
|
121
|
+
- `await session.stop_mic()`: Stops capturing and unpublishes the microphone track.
|
|
122
|
+
|
|
123
|
+
#### Custom Audio Injection
|
|
124
|
+
- `await session.upload_audio(file_path)`: Streams a PCM WAV file into the room.
|
|
125
|
+
- `await session.send_audio(data, sample_rate=48000, num_channels=1)`: Injects raw 16-bit PCM bytes into the audio stream.
|
|
126
|
+
|
|
127
|
+
#### Low-Level Accessors
|
|
128
|
+
- `session.get_video_track()`: Returns the remote `RemoteVideoTrack` object (or `None`).
|
|
129
|
+
- `session.get_audio_track()`: Returns the remote `RemoteAudioTrack` object (or `None`).
|
|
130
|
+
- `session.room`: Returns the underlying `livekit.rtc.Room` instance for advanced operations.
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
### `TruGenRunner`
|
|
135
|
+
|
|
136
|
+
Handles multi-threading to run the async session event loop on a background thread while feeding events safely to the main rendering thread.
|
|
137
|
+
|
|
138
|
+
#### Controls
|
|
139
|
+
- `runner.run()`: Starts the runner and blocks the main thread to run the rendering loop.
|
|
140
|
+
- `runner.stop()`: Safely stops the background loop and disconnects the session (thread-safe).
|
|
141
|
+
- `runner.toggle_mute()`: Toggles the microphone mute state (thread-safe).
|
|
142
|
+
|
|
143
|
+
#### Properties & Accessors
|
|
144
|
+
- `runner.mic_muted`: Returns `True` if the microphone is currently muted.
|
|
145
|
+
- `runner.session_state`: Returns the current session state enum (`TruGenState`).
|
|
146
|
+
- `runner.session`: Access the active `TruGenSession` instance (returns `None` until connected).
|
|
147
|
+
- `runner.get_caption()`: Returns a tuple `(text, timestamp)` containing the last received caption chunk and the monotonic timestamp it arrived.
|
|
148
|
+
|
|
149
|
+
#### Event Decorators
|
|
150
|
+
|
|
151
|
+
You can register event handlers using decorators on the `TruGenRunner` (for UI/main-thread callbacks) or directly on the `TruGenSession` (for low-level async callbacks).
|
|
152
|
+
|
|
153
|
+
##### 1. Runner Decorators (`TruGenRunner`)
|
|
154
|
+
|
|
155
|
+
- **`@runner.on_frame`**: Receives BGR video frames (NumPy arrays) or `None` on the main thread.
|
|
156
|
+
```python
|
|
157
|
+
@runner.on_frame
|
|
158
|
+
def on_frame(frame):
|
|
159
|
+
if frame is not None:
|
|
160
|
+
cv2.imshow("Avatar", frame)
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
- **`@runner.on_caption`**: Receives real-time streaming caption chunks (ideal for UI overlays).
|
|
164
|
+
```python
|
|
165
|
+
@runner.on_caption
|
|
166
|
+
def on_caption(text: str):
|
|
167
|
+
# Fired for each caption chunk as it arrives
|
|
168
|
+
pass
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
- **`@runner.on_state`**: Called when the session's connection state transitions.
|
|
172
|
+
```python
|
|
173
|
+
@runner.on_state
|
|
174
|
+
def on_state(state: TruGenState):
|
|
175
|
+
print(f"Status: {state.value}")
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
- **`@runner.on_event`**: Handles any standard `TruGenEvent` enum or custom string event.
|
|
179
|
+
```python
|
|
180
|
+
# Log final complete transcripts
|
|
181
|
+
@runner.on_event("user.transcription_received")
|
|
182
|
+
def on_user_transcript(text: str):
|
|
183
|
+
print(f"[User] {text}")
|
|
184
|
+
|
|
185
|
+
@runner.on_event("agent.transcription_final")
|
|
186
|
+
def on_agent_transcript(text: str):
|
|
187
|
+
print(f"[Agent] {text}")
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
##### 2. Session Decorators (`TruGenSession`)
|
|
191
|
+
|
|
192
|
+
If you are not using `TruGenRunner`, you can listen to events directly on the `TruGenSession` using the `@session.on()` decorator:
|
|
193
|
+
|
|
194
|
+
```python
|
|
195
|
+
# Log final complete transcripts directly from the session
|
|
196
|
+
@session.on("user.transcription_received")
|
|
197
|
+
def on_user_speech(text: str):
|
|
198
|
+
print(f"[User] {text}")
|
|
199
|
+
|
|
200
|
+
@session.on("agent.transcription_final")
|
|
201
|
+
def on_agent_speech(text: str):
|
|
202
|
+
print(f"[Agent] {text}")
|
|
203
|
+
|
|
204
|
+
# Handle speaking state changes
|
|
205
|
+
@session.on(TruGenEvent.AGENT_SPEAKING_STARTED)
|
|
206
|
+
def agent_speech_start():
|
|
207
|
+
print("Agent started speaking...")
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
---
|
|
211
|
+
|
|
212
|
+
### Events (`TruGenEvent`)
|
|
213
|
+
|
|
214
|
+
Register listener callbacks directly on a `TruGenSession` or a `TruGenRunner` using `@session.on()` or `@runner.on_event()`.
|
|
215
|
+
|
|
216
|
+
| Event Enum / String | Fired When | Callback Arguments |
|
|
217
|
+
|---|---|---|
|
|
218
|
+
| `TruGenEvent.STATE_CHANGED` | The session state changes | `state: TruGenState` |
|
|
219
|
+
| `TruGenEvent.CONNECTION_ESTABLISHED` | Successfully connected to room | None |
|
|
220
|
+
| `TruGenEvent.CONNECTION_CLOSED` | Session room disconnected | `reason: DisconnectReason` |
|
|
221
|
+
| `TruGenEvent.VIDEO_STREAM_STARTED` | Remote video track subscribed | `track: RemoteVideoTrack` |
|
|
222
|
+
| `TruGenEvent.AUDIO_STREAM_STARTED` | Remote audio track subscribed | `track: RemoteAudioTrack` |
|
|
223
|
+
| `TruGenEvent.INPUT_AUDIO_STREAM_STARTED` | Local mic audio stream begins publishing | None |
|
|
224
|
+
| `TruGenEvent.AGENT_SPEAKING_STARTED` | Agent starts speaking | None |
|
|
225
|
+
| `TruGenEvent.AGENT_SPEAKING_ENDED` | Agent stops speaking | None |
|
|
226
|
+
| `TruGenEvent.USER_SPEECH_STARTED` | User starts speaking | None |
|
|
227
|
+
| `TruGenEvent.USER_SPEECH_ENDED` | User stops speaking | None |
|
|
228
|
+
| `TruGenEvent.TEXT_CHUNK_RECEIVED` | Caption/Text chunk received | `text: str` |
|
|
229
|
+
| `TruGenEvent.MIC_PERMISSION_PENDING` | Mic permission request is pending | None |
|
|
230
|
+
| `TruGenEvent.MIC_PERMISSION_GRANTED` | Mic permission has been granted | None |
|
|
231
|
+
| `TruGenEvent.MIC_PERMISSION_DENIED` | Mic permission has been denied | None |
|
|
232
|
+
| `"user.transcription_received"` | User completes a final utterance | `text: str` |
|
|
233
|
+
| `"agent.transcription_final"` | Agent completes a final utterance | `text: str` |
|
|
234
|
+
| `"connection.reconnecting"` | Transient network reconnection starts | None |
|
|
235
|
+
| `"connection.reconnected"` | Network reconnection completes | None |
|
|
236
|
+
| `"connection.quality_changed"` | Participant connection quality changes | `participant, quality` |
|
|
237
|
+
| `TruGenEvent.ERROR` | A session or connection error occurs | `error: Exception` |
|
|
238
|
+
|
|
239
|
+
---
|
|
240
|
+
|
|
241
|
+
### Session States (`TruGenState`)
|
|
242
|
+
|
|
243
|
+
| Enum Value | Description |
|
|
244
|
+
|---|---|
|
|
245
|
+
| `TruGenState.INITIALIZING` | Session created but not yet connected |
|
|
246
|
+
| `TruGenState.CONNECTING` | WebRTC handshake and connection in progress |
|
|
247
|
+
| `TruGenState.CONNECTED` | Connection established; actively streaming media |
|
|
248
|
+
| `TruGenState.DISCONNECTED` | Session ended and connection closed |
|
|
249
|
+
| `TruGenState.ERROR` | Unrecoverable error occurred |
|
|
250
|
+
|
|
251
|
+
---
|
|
252
|
+
|
|
253
|
+
## Detailed Examples
|
|
254
|
+
|
|
255
|
+
### Interactive Session with GUI & WAV Injection
|
|
256
|
+
|
|
257
|
+
For a fully-featured interactive application showing:
|
|
258
|
+
- Real-time video window and connection status.
|
|
259
|
+
- Mic mute controls and speaking indicator.
|
|
260
|
+
- Floating caption overlay.
|
|
261
|
+
- WAV audio injection support (presses `A` to inject a local WAV file to the agent).
|
|
262
|
+
|
|
263
|
+
See the built-in examples in the directory:
|
|
264
|
+
- [Basic GUI Viewer](examples/basic_session_ui.py) - Simple viewer containing status bar, mic indicators, and floating captions.
|
|
265
|
+
- [Advanced GUI Viewer](examples/advanced_session_ui.py) - Full features demonstration including WAV audio injection, reconnect handles, and complete transcripts.
|
|
266
|
+
|
|
267
|
+
---
|
|
268
|
+
|
|
269
|
+
## Configuration
|
|
270
|
+
|
|
271
|
+
Set the API authentication credentials in a `.env` file or export them directly in your environment:
|
|
272
|
+
|
|
273
|
+
```bash
|
|
274
|
+
export TRUGEN_API_KEY="your-api-key"
|
|
275
|
+
export TRUGEN_AGENT_ID="your-agent-id"
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
## Error Handling
|
|
279
|
+
|
|
280
|
+
Handle exceptions using standard try/except blocks around `create_session` and connection logic:
|
|
281
|
+
|
|
282
|
+
```python
|
|
283
|
+
import asyncio
|
|
284
|
+
from trugen import TruGenClient
|
|
285
|
+
|
|
286
|
+
client = TruGenClient(api_key="invalid-key")
|
|
287
|
+
|
|
288
|
+
async def main():
|
|
289
|
+
try:
|
|
290
|
+
session = await client.create_session(agent_id="my-agent")
|
|
291
|
+
await session.connect()
|
|
292
|
+
except RuntimeError as e:
|
|
293
|
+
print(f"Connection failed: {e}")
|
|
294
|
+
except Exception as e:
|
|
295
|
+
print(f"An unexpected error occurred: {e}")
|
|
296
|
+
|
|
297
|
+
asyncio.run(main())
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
## Requirements
|
|
301
|
+
|
|
302
|
+
- Python 3.10+
|
|
303
|
+
- **Core Dependencies:**
|
|
304
|
+
- `livekit` (>=0.11.0)
|
|
305
|
+
- `aiohttp` (>=3.8.0)
|
|
306
|
+
- **Optional Dependencies (`[display]`):**
|
|
307
|
+
- `opencv-python` (>=4.8.0)
|
|
308
|
+
- `sounddevice` (>=0.4.6)
|
|
309
|
+
- `numpy` (>=1.24.0)
|
|
310
|
+
|
|
311
|
+
## License
|
|
312
|
+
|
|
313
|
+
MIT License - see [LICENSE](LICENSE) for details.
|
|
314
|
+
|
|
315
|
+
## Links
|
|
316
|
+
|
|
317
|
+
- [TruGen AI Website](https://trugen.ai)
|
|
318
|
+
- [Developer Portal / Dashboard](https://dashboard.trugen.ai)
|