voxinfra 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- voxinfra-0.1.0/PKG-INFO +185 -0
- voxinfra-0.1.0/README.md +154 -0
- voxinfra-0.1.0/pyproject.toml +54 -0
- voxinfra-0.1.0/setup.cfg +4 -0
- voxinfra-0.1.0/tests/test_memory.py +81 -0
- voxinfra-0.1.0/tests/test_obs.py +83 -0
- voxinfra-0.1.0/tests/test_tools.py +89 -0
- voxinfra-0.1.0/voxinfra/__init__.py +13 -0
- voxinfra-0.1.0/voxinfra/config.py +30 -0
- voxinfra-0.1.0/voxinfra/memory.py +90 -0
- voxinfra-0.1.0/voxinfra/models.py +85 -0
- voxinfra-0.1.0/voxinfra/obs.py +83 -0
- voxinfra-0.1.0/voxinfra/session.py +149 -0
- voxinfra-0.1.0/voxinfra/tools.py +125 -0
- voxinfra-0.1.0/voxinfra.egg-info/PKG-INFO +185 -0
- voxinfra-0.1.0/voxinfra.egg-info/SOURCES.txt +17 -0
- voxinfra-0.1.0/voxinfra.egg-info/dependency_links.txt +1 -0
- voxinfra-0.1.0/voxinfra.egg-info/requires.txt +14 -0
- voxinfra-0.1.0/voxinfra.egg-info/top_level.txt +1 -0
voxinfra-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: voxinfra
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Production infrastructure for voice agents — memory, tool orchestration, and observability
|
|
5
|
+
Author: Ashu Singhania
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: voice,agent,livekit,memory,observability,llm
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
Requires-Dist: mem0ai>=0.1.0
|
|
19
|
+
Requires-Dist: livekit-agents>=1.5.0
|
|
20
|
+
Requires-Dist: opentelemetry-sdk>=1.24.0
|
|
21
|
+
Requires-Dist: opentelemetry-api>=1.24.0
|
|
22
|
+
Requires-Dist: supabase>=2.4.0
|
|
23
|
+
Requires-Dist: httpx>=0.27.0
|
|
24
|
+
Requires-Dist: pydantic>=2.6.0
|
|
25
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
28
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
29
|
+
Requires-Dist: pytest-mock>=3.12; extra == "dev"
|
|
30
|
+
Requires-Dist: ruff>=0.4.0; extra == "dev"
|
|
31
|
+
|
|
32
|
+
# VoxInfra
|
|
33
|
+
|
|
34
|
+
Production memory, tool safety, and call replay for LiveKit voice agents.
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## The Problem
|
|
39
|
+
|
|
40
|
+
- **No memory** — Your agent forgets users the moment the call ends, making every interaction feel cold and repetitive.
|
|
41
|
+
- **Latency kills** — A single CRM lookup timing out can freeze the audio stream and drop the call.
|
|
42
|
+
- **No replay** — When a call goes wrong, you have no structured trace to debug what the agent heard, recalled, or decided.
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## Install
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
pip install voxinfra
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## Quick Start
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
from dotenv import load_dotenv
|
|
58
|
+
from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli
|
|
59
|
+
from livekit.plugins import deepgram, elevenlabs, openai, silero
|
|
60
|
+
from voxinfra import VoxSession, vox_tool
|
|
61
|
+
|
|
62
|
+
load_dotenv()
|
|
63
|
+
vox = VoxSession() # reads MEM0_API_KEY, SUPABASE_URL from env
|
|
64
|
+
|
|
65
|
+
@vox_tool(sla_ms=200, fallback={"name": "there"})
|
|
66
|
+
async def get_user_info(user_id: str) -> dict:
|
|
67
|
+
return await crm.lookup(user_id)
|
|
68
|
+
|
|
69
|
+
class MyAgent(Agent):
|
|
70
|
+
async def on_enter(self):
|
|
71
|
+
call_id = str(__import__("uuid").uuid4())
|
|
72
|
+
self.session.userdata["call_id"] = call_id
|
|
73
|
+
vox.obs.start_call(call_id, self.session.userdata["user_id"])
|
|
74
|
+
|
|
75
|
+
async def on_exit(self):
|
|
76
|
+
await vox.end_call(self.session.userdata["call_id"])
|
|
77
|
+
|
|
78
|
+
async def llm_node(self, chat_ctx, tools, model_settings):
|
|
79
|
+
user_id = self.session.userdata["user_id"]
|
|
80
|
+
call_id = self.session.userdata["call_id"]
|
|
81
|
+
async with vox.turn(user_id=user_id, chat_ctx=chat_ctx, call_id=call_id) as turn:
|
|
82
|
+
await turn.call_tool(get_user_info, user_id)
|
|
83
|
+
return await turn.run_llm(tools, model_settings)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## VoxMemory
|
|
89
|
+
|
|
90
|
+
Persistent cross-call memory backed by [Mem0](https://mem0.ai) (hosted) or Qdrant (self-hosted). At the start of every turn, VoxInfra semantically searches past conversations and injects the top results into the LLM context — automatically, in 3 lines or fewer to keep voice prompts lean.
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
# Automatic via vox.turn() — no extra code needed
|
|
94
|
+
async with vox.turn(user_id="u-123", chat_ctx=chat_ctx, call_id=call_id) as turn:
|
|
95
|
+
# Memory was recalled and injected before this line
|
|
96
|
+
return await turn.run_llm(tools, model_settings)
|
|
97
|
+
|
|
98
|
+
# Manual DPDP right-to-erasure
|
|
99
|
+
await vox.memory.delete_user("u-123")
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## VoxTools
|
|
105
|
+
|
|
106
|
+
A registry of async tools with per-tool SLA enforcement. If a tool exceeds its SLA, VoxInfra serves a cached result or fallback — the agent keeps talking and the call never drops.
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
@vox_tool(sla_ms=200, fallback={"status": "unavailable"})
|
|
110
|
+
async def get_appointments(user_id: str) -> dict:
|
|
111
|
+
return await calendar_api.fetch(user_id)
|
|
112
|
+
|
|
113
|
+
# Inside llm_node
|
|
114
|
+
async with vox.turn(...) as turn:
|
|
115
|
+
# If get_appointments takes >200ms, fallback is returned silently
|
|
116
|
+
result = await turn.call_tool(get_appointments, user_id)
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
The `@vox_tool` decorator is transparent — it only attaches metadata. The function remains directly callable without going through VoxTools.
|
|
120
|
+
|
|
121
|
+
---
|
|
122
|
+
|
|
123
|
+
## VoxObs
|
|
124
|
+
|
|
125
|
+
Structured call traces persisted to a Supabase table (`vox_call_traces`). Every turn records user transcript, memory recall latency, LLM latency, TTS latency, and all tool call results with SLA breach flags. Use `vox.end_call()` to finalise and flush the trace.
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
# In Agent.on_exit
|
|
129
|
+
await vox.end_call(self.session.userdata["call_id"])
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
The Supabase write is always fire-and-forget — it never blocks the call path.
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## Config
|
|
137
|
+
|
|
138
|
+
All configuration via environment variables or constructor kwargs.
|
|
139
|
+
|
|
140
|
+
| Variable | Description | Required |
|
|
141
|
+
|---|---|---|
|
|
142
|
+
| `MEM0_API_KEY` | Mem0 hosted API key | Yes (mem0 backend) |
|
|
143
|
+
| `QDRANT_URL` | Qdrant server URL | Yes (qdrant backend) |
|
|
144
|
+
| `QDRANT_API_KEY` | Qdrant API key | No |
|
|
145
|
+
| `SUPABASE_URL` | Supabase project URL | No (obs disabled without it) |
|
|
146
|
+
| `SUPABASE_SERVICE_KEY` | Supabase service role key | No |
|
|
147
|
+
| `VOXINFRA_API_KEY` | Reserved for future cloud features | No |
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
# From env (default)
|
|
151
|
+
vox = VoxSession()
|
|
152
|
+
|
|
153
|
+
# Explicit
|
|
154
|
+
vox = VoxSession(
|
|
155
|
+
mem0_api_key="m0-...",
|
|
156
|
+
supabase_url="https://xyz.supabase.co",
|
|
157
|
+
supabase_key="service-key",
|
|
158
|
+
default_tool_sla_ms=250,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
# Qdrant backend (self-hosted)
|
|
162
|
+
vox = VoxSession(
|
|
163
|
+
memory_backend="qdrant",
|
|
164
|
+
qdrant_url="http://localhost:6333",
|
|
165
|
+
qdrant_collection="my_agent_memories",
|
|
166
|
+
)
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
## DPDP Compliance
|
|
172
|
+
|
|
173
|
+
For deployments subject to India's Digital Personal Data Protection Act, VoxInfra supports a fully self-hosted data path. Use the Qdrant backend pointed at a Mumbai-region instance to ensure all memory vectors stay within Indian jurisdiction. Call traces can be stored in a Supabase project provisioned in the `ap-south-1` region. Users have the right to erasure under DPDP Section 13 — invoke it with a single call:
|
|
174
|
+
|
|
175
|
+
```python
|
|
176
|
+
await vox.memory.delete_user(user_id)
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
This calls Mem0's `delete_all` or Qdrant's collection filter delete under the hood, removing all stored memories for that user permanently.
|
|
180
|
+
|
|
181
|
+
---
|
|
182
|
+
|
|
183
|
+
## License
|
|
184
|
+
|
|
185
|
+
MIT © Ashu Singhania
|
voxinfra-0.1.0/README.md
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# VoxInfra
|
|
2
|
+
|
|
3
|
+
Production memory, tool safety, and call replay for LiveKit voice agents.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## The Problem
|
|
8
|
+
|
|
9
|
+
- **No memory** — Your agent forgets users the moment the call ends, making every interaction feel cold and repetitive.
|
|
10
|
+
- **Latency kills** — A single CRM lookup timing out can freeze the audio stream and drop the call.
|
|
11
|
+
- **No replay** — When a call goes wrong, you have no structured trace to debug what the agent heard, recalled, or decided.
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Install
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pip install voxinfra
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## Quick Start
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
from dotenv import load_dotenv
|
|
27
|
+
from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli
|
|
28
|
+
from livekit.plugins import deepgram, elevenlabs, openai, silero
|
|
29
|
+
from voxinfra import VoxSession, vox_tool
|
|
30
|
+
|
|
31
|
+
load_dotenv()
|
|
32
|
+
vox = VoxSession() # reads MEM0_API_KEY, SUPABASE_URL from env
|
|
33
|
+
|
|
34
|
+
@vox_tool(sla_ms=200, fallback={"name": "there"})
|
|
35
|
+
async def get_user_info(user_id: str) -> dict:
|
|
36
|
+
return await crm.lookup(user_id)
|
|
37
|
+
|
|
38
|
+
class MyAgent(Agent):
|
|
39
|
+
async def on_enter(self):
|
|
40
|
+
call_id = str(__import__("uuid").uuid4())
|
|
41
|
+
self.session.userdata["call_id"] = call_id
|
|
42
|
+
vox.obs.start_call(call_id, self.session.userdata["user_id"])
|
|
43
|
+
|
|
44
|
+
async def on_exit(self):
|
|
45
|
+
await vox.end_call(self.session.userdata["call_id"])
|
|
46
|
+
|
|
47
|
+
async def llm_node(self, chat_ctx, tools, model_settings):
|
|
48
|
+
user_id = self.session.userdata["user_id"]
|
|
49
|
+
call_id = self.session.userdata["call_id"]
|
|
50
|
+
async with vox.turn(user_id=user_id, chat_ctx=chat_ctx, call_id=call_id) as turn:
|
|
51
|
+
await turn.call_tool(get_user_info, user_id)
|
|
52
|
+
return await turn.run_llm(tools, model_settings)
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## VoxMemory
|
|
58
|
+
|
|
59
|
+
Persistent cross-call memory backed by [Mem0](https://mem0.ai) (hosted) or Qdrant (self-hosted). At the start of every turn, VoxInfra semantically searches past conversations and injects the top results into the LLM context — automatically, in 3 lines or fewer to keep voice prompts lean.
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
# Automatic via vox.turn() — no extra code needed
|
|
63
|
+
async with vox.turn(user_id="u-123", chat_ctx=chat_ctx, call_id=call_id) as turn:
|
|
64
|
+
# Memory was recalled and injected before this line
|
|
65
|
+
return await turn.run_llm(tools, model_settings)
|
|
66
|
+
|
|
67
|
+
# Manual DPDP right-to-erasure
|
|
68
|
+
await vox.memory.delete_user("u-123")
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
73
|
+
## VoxTools
|
|
74
|
+
|
|
75
|
+
A registry of async tools with per-tool SLA enforcement. If a tool exceeds its SLA, VoxInfra serves a cached result or fallback — the agent keeps talking and the call never drops.
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
@vox_tool(sla_ms=200, fallback={"status": "unavailable"})
|
|
79
|
+
async def get_appointments(user_id: str) -> dict:
|
|
80
|
+
return await calendar_api.fetch(user_id)
|
|
81
|
+
|
|
82
|
+
# Inside llm_node
|
|
83
|
+
async with vox.turn(...) as turn:
|
|
84
|
+
# If get_appointments takes >200ms, fallback is returned silently
|
|
85
|
+
result = await turn.call_tool(get_appointments, user_id)
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
The `@vox_tool` decorator is transparent — it only attaches metadata. The function remains directly callable without going through VoxTools.
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## VoxObs
|
|
93
|
+
|
|
94
|
+
Structured call traces persisted to a Supabase table (`vox_call_traces`). Every turn records user transcript, memory recall latency, LLM latency, TTS latency, and all tool call results with SLA breach flags. Use `vox.end_call()` to finalise and flush the trace.
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
# In Agent.on_exit
|
|
98
|
+
await vox.end_call(self.session.userdata["call_id"])
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
The Supabase write is always fire-and-forget — it never blocks the call path.
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
## Config
|
|
106
|
+
|
|
107
|
+
All configuration via environment variables or constructor kwargs.
|
|
108
|
+
|
|
109
|
+
| Variable | Description | Required |
|
|
110
|
+
|---|---|---|
|
|
111
|
+
| `MEM0_API_KEY` | Mem0 hosted API key | Yes (mem0 backend) |
|
|
112
|
+
| `QDRANT_URL` | Qdrant server URL | Yes (qdrant backend) |
|
|
113
|
+
| `QDRANT_API_KEY` | Qdrant API key | No |
|
|
114
|
+
| `SUPABASE_URL` | Supabase project URL | No (obs disabled without it) |
|
|
115
|
+
| `SUPABASE_SERVICE_KEY` | Supabase service role key | No |
|
|
116
|
+
| `VOXINFRA_API_KEY` | Reserved for future cloud features | No |
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
# From env (default)
|
|
120
|
+
vox = VoxSession()
|
|
121
|
+
|
|
122
|
+
# Explicit
|
|
123
|
+
vox = VoxSession(
|
|
124
|
+
mem0_api_key="m0-...",
|
|
125
|
+
supabase_url="https://xyz.supabase.co",
|
|
126
|
+
supabase_key="service-key",
|
|
127
|
+
default_tool_sla_ms=250,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Qdrant backend (self-hosted)
|
|
131
|
+
vox = VoxSession(
|
|
132
|
+
memory_backend="qdrant",
|
|
133
|
+
qdrant_url="http://localhost:6333",
|
|
134
|
+
qdrant_collection="my_agent_memories",
|
|
135
|
+
)
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
## DPDP Compliance
|
|
141
|
+
|
|
142
|
+
For deployments subject to India's Digital Personal Data Protection Act, VoxInfra supports a fully self-hosted data path. Use the Qdrant backend pointed at a Mumbai-region instance to ensure all memory vectors stay within Indian jurisdiction. Call traces can be stored in a Supabase project provisioned in the `ap-south-1` region. Users have the right to erasure under DPDP Section 13 — invoke it with a single call:
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
await vox.memory.delete_user(user_id)
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
This calls Mem0's `delete_all` or Qdrant's collection filter delete under the hood, removing all stored memories for that user permanently.
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## License
|
|
153
|
+
|
|
154
|
+
MIT © Ashu Singhania
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "voxinfra"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Production infrastructure for voice agents — memory, tool orchestration, and observability"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [{ name = "Ashu Singhania" }]
|
|
13
|
+
keywords = ["voice", "agent", "livekit", "memory", "observability", "llm"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Topic :: Software Development :: Libraries",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
dependencies = [
|
|
26
|
+
"mem0ai>=0.1.0",
|
|
27
|
+
"livekit-agents>=1.5.0",
|
|
28
|
+
"opentelemetry-sdk>=1.24.0",
|
|
29
|
+
"opentelemetry-api>=1.24.0",
|
|
30
|
+
"supabase>=2.4.0",
|
|
31
|
+
"httpx>=0.27.0",
|
|
32
|
+
"pydantic>=2.6.0",
|
|
33
|
+
"python-dotenv>=1.0.0",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
[project.optional-dependencies]
|
|
37
|
+
dev = [
|
|
38
|
+
"pytest>=8.0",
|
|
39
|
+
"pytest-asyncio>=0.23",
|
|
40
|
+
"pytest-mock>=3.12",
|
|
41
|
+
"ruff>=0.4.0",
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
[tool.setuptools.packages.find]
|
|
45
|
+
where = ["."]
|
|
46
|
+
include = ["voxinfra*"]
|
|
47
|
+
|
|
48
|
+
[tool.ruff]
|
|
49
|
+
line-length = 100
|
|
50
|
+
target-version = "py310"
|
|
51
|
+
|
|
52
|
+
[tool.pytest.ini_options]
|
|
53
|
+
asyncio_mode = "auto"
|
|
54
|
+
testpaths = ["tests"]
|
voxinfra-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from unittest.mock import MagicMock, patch
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from voxinfra.config import VoxConfig
|
|
7
|
+
from voxinfra.memory import VoxMemory
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def make_memory(mem0_api_key: str = "test-key") -> VoxMemory:
|
|
11
|
+
"""Build a VoxMemory with a mocked MemoryClient."""
|
|
12
|
+
config = VoxConfig(mem0_api_key=mem0_api_key, memory_backend="mem0")
|
|
13
|
+
mock_client = MagicMock()
|
|
14
|
+
# Patch both the module-level name and the mem0 package import path
|
|
15
|
+
with patch("voxinfra.memory.MemoryClient", return_value=mock_client):
|
|
16
|
+
mem = VoxMemory(config)
|
|
17
|
+
# Replace client directly in case patching resolved differently
|
|
18
|
+
mem._client = mock_client
|
|
19
|
+
return mem
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@pytest.mark.asyncio
|
|
23
|
+
async def test_recall_returns_empty_on_no_memories() -> None:
|
|
24
|
+
mem = make_memory()
|
|
25
|
+
mem._client.search = MagicMock(return_value=[])
|
|
26
|
+
snippet, latency_ms, count = await mem.recall("user-1", "some context")
|
|
27
|
+
assert snippet == ""
|
|
28
|
+
assert latency_ms >= 0.0
|
|
29
|
+
assert count == 0
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@pytest.mark.asyncio
|
|
33
|
+
async def test_recall_formats_memories_correctly() -> None:
|
|
34
|
+
mem = make_memory()
|
|
35
|
+
mem._client.search = MagicMock(
|
|
36
|
+
return_value=[
|
|
37
|
+
{"memory": "User prefers Hindi", "score": 0.95},
|
|
38
|
+
{"memory": "Has diabetes", "score": 0.88},
|
|
39
|
+
]
|
|
40
|
+
)
|
|
41
|
+
snippet, latency_ms, count = await mem.recall("user-1", "health preferences")
|
|
42
|
+
assert "User prefers Hindi" in snippet
|
|
43
|
+
assert "Has diabetes" in snippet
|
|
44
|
+
assert count == 2
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@pytest.mark.asyncio
|
|
48
|
+
async def test_recall_never_raises_on_exception() -> None:
|
|
49
|
+
mem = make_memory()
|
|
50
|
+
mem._client.search = MagicMock(side_effect=RuntimeError("network error"))
|
|
51
|
+
snippet, latency_ms, count = await mem.recall("user-1", "anything")
|
|
52
|
+
assert snippet == ""
|
|
53
|
+
assert latency_ms == 0.0
|
|
54
|
+
assert count == 0
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@pytest.mark.asyncio
|
|
58
|
+
async def test_store_is_fire_and_forget() -> None:
|
|
59
|
+
mem = make_memory()
|
|
60
|
+
add_called = asyncio.Event()
|
|
61
|
+
|
|
62
|
+
def fake_add(**kwargs):
|
|
63
|
+
add_called.set()
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
mem._client.add = MagicMock(side_effect=fake_add)
|
|
67
|
+
|
|
68
|
+
# store() should return immediately without blocking
|
|
69
|
+
await mem.store("user-1", "some transcript")
|
|
70
|
+
|
|
71
|
+
# Give the background task a moment to run
|
|
72
|
+
await asyncio.sleep(0.05)
|
|
73
|
+
assert add_called.is_set(), "add() should have been called via background task"
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@pytest.mark.asyncio
|
|
77
|
+
async def test_delete_user_calls_delete_all() -> None:
|
|
78
|
+
mem = make_memory()
|
|
79
|
+
mem._client.delete_all = MagicMock(return_value=None)
|
|
80
|
+
await mem.delete_user("user-123")
|
|
81
|
+
mem._client.delete_all.assert_called_once_with(user_id="user-123")
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from voxinfra.config import VoxConfig
|
|
4
|
+
from voxinfra.models import ToolResult, TurnTrace
|
|
5
|
+
from voxinfra.obs import VoxObs
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def make_obs() -> VoxObs:
|
|
9
|
+
config = VoxConfig(mem0_api_key="test-key", obs_enabled=False)
|
|
10
|
+
return VoxObs(config)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def make_turn(call_id: str, turn_number: int, total_turn_ms: float = 0.0) -> TurnTrace:
|
|
14
|
+
return TurnTrace(call_id=call_id, turn_number=turn_number, user_transcript="hello", total_turn_ms=total_turn_ms)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_start_call_creates_trace() -> None:
|
|
18
|
+
obs = make_obs()
|
|
19
|
+
obs.start_call("call-1", "user-1")
|
|
20
|
+
trace = obs.get_call("call-1")
|
|
21
|
+
assert trace is not None
|
|
22
|
+
assert trace.user_id == "user-1"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@pytest.mark.asyncio
|
|
26
|
+
async def test_turns_accumulate_correctly() -> None:
|
|
27
|
+
obs = make_obs()
|
|
28
|
+
obs.start_call("call-2", "user-2")
|
|
29
|
+
for i in range(1, 4):
|
|
30
|
+
turn = obs.start_turn("call-2", f"turn {i}")
|
|
31
|
+
obs.finish_turn("call-2", turn)
|
|
32
|
+
call = await obs.finish_call("call-2")
|
|
33
|
+
assert call is not None
|
|
34
|
+
assert call.total_turns == 3
|
|
35
|
+
assert len(call.turns) == 3
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@pytest.mark.asyncio
|
|
39
|
+
async def test_avg_turn_ms_computed_correctly() -> None:
|
|
40
|
+
obs = make_obs()
|
|
41
|
+
obs.start_call("call-3", "user-3")
|
|
42
|
+
for ms in [100.0, 200.0, 300.0]:
|
|
43
|
+
turn = make_turn("call-3", 1, total_turn_ms=ms)
|
|
44
|
+
obs.finish_turn("call-3", turn)
|
|
45
|
+
call = await obs.finish_call("call-3")
|
|
46
|
+
assert call is not None
|
|
47
|
+
assert call.avg_turn_ms == pytest.approx(200.0)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@pytest.mark.asyncio
|
|
51
|
+
async def test_sla_breach_count_computed_correctly() -> None:
|
|
52
|
+
obs = make_obs()
|
|
53
|
+
obs.start_call("call-4", "user-4")
|
|
54
|
+
|
|
55
|
+
def make_tool(breached: bool) -> ToolResult:
|
|
56
|
+
return ToolResult(
|
|
57
|
+
tool_name="t",
|
|
58
|
+
success=True,
|
|
59
|
+
latency_ms=10.0,
|
|
60
|
+
sla_ms=300,
|
|
61
|
+
sla_breached=breached,
|
|
62
|
+
used_fallback=False,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
turn1 = make_turn("call-4", 1)
|
|
66
|
+
turn1.tool_results = [make_tool(True), make_tool(True)]
|
|
67
|
+
obs.finish_turn("call-4", turn1)
|
|
68
|
+
|
|
69
|
+
turn2 = make_turn("call-4", 2)
|
|
70
|
+
turn2.tool_results = [make_tool(False)]
|
|
71
|
+
obs.finish_turn("call-4", turn2)
|
|
72
|
+
|
|
73
|
+
call = await obs.finish_call("call-4")
|
|
74
|
+
assert call is not None
|
|
75
|
+
assert call.sla_breach_count == 2
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@pytest.mark.asyncio
|
|
79
|
+
async def test_finish_call_removes_from_active() -> None:
|
|
80
|
+
obs = make_obs()
|
|
81
|
+
obs.start_call("call-5", "user-5")
|
|
82
|
+
await obs.finish_call("call-5")
|
|
83
|
+
assert obs.get_call("call-5") is None
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from unittest.mock import MagicMock
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from voxinfra.config import VoxConfig
|
|
7
|
+
from voxinfra.tools import VoxTools
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def make_tools(default_sla_ms: int = 300) -> VoxTools:
|
|
11
|
+
config = VoxConfig(mem0_api_key="test-key", default_tool_sla_ms=default_sla_ms)
|
|
12
|
+
return VoxTools(config)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@pytest.mark.asyncio
|
|
16
|
+
async def test_fast_tool_returns_result() -> None:
|
|
17
|
+
tools = make_tools()
|
|
18
|
+
|
|
19
|
+
async def fast_tool(x: int) -> int:
|
|
20
|
+
await asyncio.sleep(0.01)
|
|
21
|
+
return x * 2
|
|
22
|
+
|
|
23
|
+
result = await tools.call(fast_tool, 5)
|
|
24
|
+
assert result.success is True
|
|
25
|
+
assert result.sla_breached is False
|
|
26
|
+
assert result.used_fallback is False
|
|
27
|
+
assert result.result == 10
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@pytest.mark.asyncio
|
|
31
|
+
async def test_slow_tool_uses_fallback() -> None:
|
|
32
|
+
tools = make_tools()
|
|
33
|
+
fallback = {"status": "cached"}
|
|
34
|
+
|
|
35
|
+
async def slow_tool() -> dict:
|
|
36
|
+
await asyncio.sleep(0.5)
|
|
37
|
+
return {"status": "ok"}
|
|
38
|
+
|
|
39
|
+
tools.register(slow_tool, sla_ms=100, fallback=fallback)
|
|
40
|
+
result = await tools.call(slow_tool)
|
|
41
|
+
assert result.sla_breached is True
|
|
42
|
+
assert result.used_fallback is True
|
|
43
|
+
assert result.result == fallback
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@pytest.mark.asyncio
|
|
47
|
+
async def test_slow_tool_uses_cache_before_fallback() -> None:
|
|
48
|
+
tools = make_tools()
|
|
49
|
+
cached_value = {"data": "from_cache"}
|
|
50
|
+
fallback = {"data": "fallback"}
|
|
51
|
+
|
|
52
|
+
async def slow_tool() -> dict:
|
|
53
|
+
await asyncio.sleep(0.5)
|
|
54
|
+
return {"data": "fresh"}
|
|
55
|
+
|
|
56
|
+
tools.register(slow_tool, sla_ms=100, fallback=fallback)
|
|
57
|
+
# Pre-populate cache
|
|
58
|
+
tools._registry["slow_tool"]["cache"]["slow_tool:some_key"] = cached_value
|
|
59
|
+
|
|
60
|
+
result = await tools.call(slow_tool)
|
|
61
|
+
assert result.sla_breached is True
|
|
62
|
+
assert result.used_fallback is True
|
|
63
|
+
assert result.result == cached_value # cache wins over fallback
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@pytest.mark.asyncio
|
|
67
|
+
async def test_failing_tool_returns_error_result() -> None:
|
|
68
|
+
tools = make_tools()
|
|
69
|
+
|
|
70
|
+
async def broken_tool() -> dict:
|
|
71
|
+
raise ValueError("CRM is down")
|
|
72
|
+
|
|
73
|
+
tools.register(broken_tool, sla_ms=300, fallback=None)
|
|
74
|
+
result = await tools.call(broken_tool)
|
|
75
|
+
assert result.success is False
|
|
76
|
+
assert result.error is not None
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@pytest.mark.asyncio
|
|
80
|
+
async def test_call_never_raises() -> None:
|
|
81
|
+
tools = make_tools()
|
|
82
|
+
|
|
83
|
+
async def extreme_failure() -> None:
|
|
84
|
+
raise SystemExit("chaos")
|
|
85
|
+
|
|
86
|
+
tools.register(extreme_failure, sla_ms=300, fallback=None)
|
|
87
|
+
# Must not propagate any exception
|
|
88
|
+
result = await tools.call(extreme_failure)
|
|
89
|
+
assert result.success is False
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from voxinfra.session import VoxSession
|
|
2
|
+
from voxinfra.memory import VoxMemory
|
|
3
|
+
from voxinfra.tools import VoxTools, vox_tool
|
|
4
|
+
from voxinfra.obs import VoxObs
|
|
5
|
+
from voxinfra.config import VoxConfig
|
|
6
|
+
from voxinfra.models import CallTrace, TurnTrace, ToolResult, MemoryEntry
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"VoxSession", "VoxMemory", "VoxTools", "vox_tool",
|
|
10
|
+
"VoxObs", "VoxConfig", "CallTrace", "TurnTrace", "ToolResult", "MemoryEntry",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import os
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Literal
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class VoxConfig:
|
|
8
|
+
api_key: str = field(default_factory=lambda: os.getenv("VOXINFRA_API_KEY", ""))
|
|
9
|
+
mem0_api_key: str = field(default_factory=lambda: os.getenv("MEM0_API_KEY", ""))
|
|
10
|
+
memory_backend: Literal["mem0", "qdrant"] = "mem0"
|
|
11
|
+
qdrant_url: str = field(default_factory=lambda: os.getenv("QDRANT_URL", "http://localhost:6333"))
|
|
12
|
+
qdrant_api_key: str = field(default_factory=lambda: os.getenv("QDRANT_API_KEY", ""))
|
|
13
|
+
qdrant_collection: str = "voxinfra_memories"
|
|
14
|
+
memory_top_k: int = 5
|
|
15
|
+
supabase_url: str = field(default_factory=lambda: os.getenv("SUPABASE_URL", ""))
|
|
16
|
+
supabase_key: str = field(default_factory=lambda: os.getenv("SUPABASE_SERVICE_KEY", ""))
|
|
17
|
+
obs_enabled: bool = True
|
|
18
|
+
default_tool_sla_ms: int = 300
|
|
19
|
+
full_trace_retention_days: int = 7
|
|
20
|
+
summary_retention_days: int = 90
|
|
21
|
+
|
|
22
|
+
def validate(self) -> None:
|
|
23
|
+
if self.memory_backend == "mem0" and not self.mem0_api_key:
|
|
24
|
+
raise ValueError("MEM0_API_KEY is required when memory_backend='mem0'.")
|
|
25
|
+
if self.memory_backend == "qdrant" and not self.qdrant_url:
|
|
26
|
+
raise ValueError("qdrant_url is required when memory_backend='qdrant'.")
|
|
27
|
+
|
|
28
|
+
@classmethod
|
|
29
|
+
def from_env(cls) -> "VoxConfig":
|
|
30
|
+
return cls()
|