voxarena 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
voxarena-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Simkeyur
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,243 @@
1
+ Metadata-Version: 2.4
2
+ Name: voxarena
3
+ Version: 0.1.0
4
+ Summary: An evaluation arena for realtime voice agents.
5
+ Author: VoxArena contributors
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Simkeyur
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/simkeyur/vox-arena
29
+ Project-URL: Issues, https://github.com/simkeyur/vox-arena/issues
30
+ Keywords: voice-agents,realtime-llm,evaluation,benchmarking,gemini-live,openai-realtime,pipecat
31
+ Classifier: Development Status :: 3 - Alpha
32
+ Classifier: Intended Audience :: Developers
33
+ Classifier: License :: OSI Approved :: MIT License
34
+ Classifier: Programming Language :: Python :: 3
35
+ Classifier: Programming Language :: Python :: 3.11
36
+ Classifier: Programming Language :: Python :: 3.12
37
+ Classifier: Topic :: Software Development :: Testing
38
+ Requires-Python: >=3.11
39
+ Description-Content-Type: text/markdown
40
+ License-File: LICENSE
41
+ Requires-Dist: fastapi>=0.110.0
42
+ Requires-Dist: uvicorn>=0.28.0
43
+ Requires-Dist: pydantic>=2.6.0
44
+ Requires-Dist: pydantic-settings>=2.2.0
45
+ Requires-Dist: pipecat-ai[google,openai]>=0.5.0
46
+ Requires-Dist: python-dotenv>=1.0.1
47
+ Requires-Dist: pyyaml>=6.0.1
48
+ Requires-Dist: loguru>=0.7.2
49
+ Provides-Extra: dev
50
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
51
+ Dynamic: license-file
52
+
53
+ <p align="center">
54
+ <picture>
55
+ <source media="(prefers-color-scheme: dark)" srcset="ui/src/assets/logo-dark.png" />
56
+ <img src="ui/src/assets/logo.png" alt="VoxArena" width="220" />
57
+ </picture>
58
+ </p>
59
+
60
+ <p align="center"><em>An evaluation arena for realtime voice agents.</em></p>
61
+
62
+ <p align="center">
63
+
64
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
65
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11%2B-blue.svg)](https://www.python.org/downloads/)
66
+ [![Built with Pipecat](https://img.shields.io/badge/built%20with-pipecat-9cf.svg)](https://github.com/pipecat-ai/pipecat)
67
+ [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](#contributing)
68
+
69
+ </p>
70
+
71
+ VoxArena is a reproducible benchmarking harness for realtime voice agents. Run the same scripted conversation across Gemini Live, OpenAI Realtime, and other [Pipecat](https://github.com/pipecat-ai/pipecat)-supported providers — and compare them apples-to-apples on latency, tool-call accuracy, and hallucinations.
72
+
73
+ Drop it into your CI pipeline, your dev loop, or the bundled control panel.
74
+
75
+ ---
76
+
77
+ ## 🚀 CI & Pipeline Integration
78
+
79
+ VoxArena ships a `voxarena` CLI designed for headless use in your build pipeline. It returns a non-zero exit code when metrics fall below thresholds you define, and emits JUnit XML for native CI reporting.
80
+
81
+ ```bash
82
+ pip install voxarena
83
+
84
+ voxarena run \
85
+ --provider gemini \
86
+ --script ./script/utterances.yaml \
87
+ --min-tool-accuracy 0.9 \
88
+ --max-hallucinations 0 \
89
+ --max-avg-ttfa-ms 1500 \
90
+ --output result.json \
91
+ --junit voxarena.xml
92
+ # exit 0 if every threshold passes, 1 otherwise
93
+ ```
94
+
95
+ ### Compare two providers in one shot
96
+
97
+ ```bash
98
+ voxarena compare \
99
+ --gemini-model gemini-3.1-flash-live-preview \
100
+ --openai-model gpt-realtime-2 \
101
+ --num-turns 5 \
102
+ --min-tool-accuracy 0.9 \
103
+ --output compare.json
104
+ ```
105
+
106
+ ### GitHub Actions
107
+
108
+ ```yaml
109
+ - name: Voice agent regression check
110
+ env:
111
+ GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
112
+ run: |
113
+ pip install voxarena
114
+ voxarena run --provider gemini \
115
+ --min-tool-accuracy 0.92 --max-hallucinations 0 \
116
+ --junit voxarena.xml --quiet
117
+
118
+ - uses: mikepenz/action-junit-report@v4
119
+ if: always()
120
+ with:
121
+ report_paths: voxarena.xml
122
+ ```
123
+
124
+ ### Subcommands
125
+
126
+ | Command | What it does |
127
+ | --- | --- |
128
+ | `voxarena run` | Single-provider scripted run; exits 0/1 against thresholds. |
129
+ | `voxarena compare` | Runs Gemini and OpenAI in parallel against the same script. |
130
+ | `voxarena report` | Generates a markdown comparison report from past runs. |
131
+
132
+ Run `voxarena <command> --help` for the full flag set.
133
+
134
+ ---
135
+
136
+ ## Features
137
+
138
+ - 🎙️ **Provider-agnostic agent** — one Pipecat pipeline drives every provider; swap models without re-implementing your agent
139
+ - 🔁 **Scripted conversations** — multi-turn YAML scripts with pre-recorded WAV inputs and expected tool calls / response content
140
+ - 📊 **Automated scoring** — tool-call correctness, response matching, hallucination counts, time-to-first-audio, interruption-stop latency
141
+ - 🆚 **Side-by-side comparisons** — run multiple providers in parallel against the same script
142
+ - 🗄️ **Persistent run history** — JSON manifests on disk, indexed in SQLite
143
+ - 🖥️ **Web control panel** — React UI to launch runs, watch live status, browse results, and edit scripts
144
+ - 🧩 **Extensible** — add a new provider by implementing one adapter class
145
+
146
+ ## Architecture
147
+
148
+ ```mermaid
149
+ flowchart TD
150
+ A["Recorded WAVs<br/>script/audio/*.wav"] --> B["Injection Harness<br/>voxarena/harness.py"]
151
+ B --> C
152
+
153
+ subgraph C ["Pipecat Pipeline"]
154
+ direction LR
155
+ C1["Audio Injector"] --> C2["Provider Adapter"]
156
+ C2 --> C3["Audio Capture"]
157
+ C3 --> C4["Metrics Collector"]
158
+ end
159
+
160
+ C2 <--> D{{"Provider Backend"}}
161
+ D --> D1["Gemini Live"]
162
+ D --> D2["OpenAI Realtime"]
163
+ D --> D3["...future providers"]
164
+
165
+ C4 --> E["Run Manifest<br/>results/PROVIDER/RUN_ID/manifest.json"]
166
+ E --> F[("SQLite Index<br/>runs.db")]
167
+
168
+ F <--> G["voxarena CLI<br/>+ FastAPI Backend"]
169
+ G <--> H["React Control Panel<br/>ui/"]
170
+
171
+ style D1 fill:#4285F4,color:#fff,stroke:#333
172
+ style D2 fill:#10A37F,color:#fff,stroke:#333
173
+ style D3 fill:#999,color:#fff,stroke:#333
174
+ style F fill:#f5f5f5,stroke:#333
175
+ style H fill:#fff7da,stroke:#333
176
+ ```
177
+
178
+ ## Local Dev (with UI)
179
+
180
+ ```bash
181
+ git clone https://github.com/simkeyur/vox-arena.git
182
+ cd vox-arena
183
+ cp .env.example .env # add GOOGLE_API_KEY / OPENAI_API_KEY
184
+
185
+ python3 -m venv .venv && source .venv/bin/activate
186
+ pip install -e .
187
+
188
+ uvicorn voxarena.main:app --reload --port 8000
189
+ ```
190
+
191
+ Then in another terminal:
192
+
193
+ ```bash
194
+ cd ui && npm install && npm run dev
195
+ ```
196
+
197
+ Open the control panel at `http://localhost:5173`.
198
+
199
+ ## Bring Your Own Agent
200
+
201
+ The demo ships with the "Saffron Leaf" restaurant agent so you can run end-to-end on day one. To evaluate your own:
202
+
203
+ 1. Replace the system prompt and tool schemas in `voxarena/agent.py`
204
+ 2. Implement (or stub) your tools in `voxarena/tools.py`
205
+ 3. Re-record `script/audio/*.wav` and update `script/utterances.yaml` to reflect your real workload
206
+ 4. Run the arena as normal — every provider gets scored against your scripts
207
+
208
+ ## Scripted Conversations
209
+
210
+ Conversations live in [`script/utterances.yaml`](script/utterances.yaml). Each turn pairs an utterance id with an `expect` block describing the correct tool call and/or response content:
211
+
212
+ ```yaml
213
+ - id: u04
214
+ text: "Are you open on Sundays?"
215
+ expect:
216
+ tool: get_hours
217
+ args:
218
+ day: sunday
219
+ response_contains:
220
+ - "closed"
221
+ ```
222
+
223
+ The harness plays `script/audio/{id}.wav` into the pipeline and scores the agent's actual tool calls and transcript against `expect`.
224
+
225
+ ## Configuration
226
+
227
+ | Variable | Description |
228
+ | --- | --- |
229
+ | `GOOGLE_API_KEY` / `OPENAI_API_KEY` | Provider credentials |
230
+ | `GEMINI_MODEL` / `OPENAI_MODEL` | Realtime model under test |
231
+ | `GEMINI_EVAL_MODEL` / `OPENAI_EVAL_MODEL` | Cheaper text models for grading |
232
+ | `PORT` | FastAPI server port |
233
+ | `BASE_DIR` | Override workdir (CLI: `--workdir`) |
234
+
235
+ ## Contributing
236
+
237
+ To add a new provider: implement an adapter in `voxarena/providers/` following the pattern in `gemini.py` / `openai.py`, wire it into `voxarena/harness.py` and `voxarena/config.py`, and open a PR.
238
+
239
+ For bugs and feature requests, please open an issue.
240
+
241
+ ## License
242
+
243
+ [MIT](LICENSE).
@@ -0,0 +1,191 @@
1
+ <p align="center">
2
+ <picture>
3
+ <source media="(prefers-color-scheme: dark)" srcset="ui/src/assets/logo-dark.png" />
4
+ <img src="ui/src/assets/logo.png" alt="VoxArena" width="220" />
5
+ </picture>
6
+ </p>
7
+
8
+ <p align="center"><em>An evaluation arena for realtime voice agents.</em></p>
9
+
10
+ <p align="center">
11
+
12
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
13
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11%2B-blue.svg)](https://www.python.org/downloads/)
14
+ [![Built with Pipecat](https://img.shields.io/badge/built%20with-pipecat-9cf.svg)](https://github.com/pipecat-ai/pipecat)
15
+ [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](#contributing)
16
+
17
+ </p>
18
+
19
+ VoxArena is a reproducible benchmarking harness for realtime voice agents. Run the same scripted conversation across Gemini Live, OpenAI Realtime, and other [Pipecat](https://github.com/pipecat-ai/pipecat)-supported providers — and compare them apples-to-apples on latency, tool-call accuracy, and hallucinations.
20
+
21
+ Drop it into your CI pipeline, your dev loop, or the bundled control panel.
22
+
23
+ ---
24
+
25
+ ## 🚀 CI & Pipeline Integration
26
+
27
+ VoxArena ships a `voxarena` CLI designed for headless use in your build pipeline. It returns a non-zero exit code when metrics fall below thresholds you define, and emits JUnit XML for native CI reporting.
28
+
29
+ ```bash
30
+ pip install voxarena
31
+
32
+ voxarena run \
33
+ --provider gemini \
34
+ --script ./script/utterances.yaml \
35
+ --min-tool-accuracy 0.9 \
36
+ --max-hallucinations 0 \
37
+ --max-avg-ttfa-ms 1500 \
38
+ --output result.json \
39
+ --junit voxarena.xml
40
+ # exit 0 if every threshold passes, 1 otherwise
41
+ ```
42
+
43
+ ### Compare two providers in one shot
44
+
45
+ ```bash
46
+ voxarena compare \
47
+ --gemini-model gemini-3.1-flash-live-preview \
48
+ --openai-model gpt-realtime-2 \
49
+ --num-turns 5 \
50
+ --min-tool-accuracy 0.9 \
51
+ --output compare.json
52
+ ```
53
+
54
+ ### GitHub Actions
55
+
56
+ ```yaml
57
+ - name: Voice agent regression check
58
+ env:
59
+ GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
60
+ run: |
61
+ pip install voxarena
62
+ voxarena run --provider gemini \
63
+ --min-tool-accuracy 0.92 --max-hallucinations 0 \
64
+ --junit voxarena.xml --quiet
65
+
66
+ - uses: mikepenz/action-junit-report@v4
67
+ if: always()
68
+ with:
69
+ report_paths: voxarena.xml
70
+ ```
71
+
72
+ ### Subcommands
73
+
74
+ | Command | What it does |
75
+ | --- | --- |
76
+ | `voxarena run` | Single-provider scripted run; exits 0/1 against thresholds. |
77
+ | `voxarena compare` | Runs Gemini and OpenAI in parallel against the same script. |
78
+ | `voxarena report` | Generates a markdown comparison report from past runs. |
79
+
80
+ Run `voxarena <command> --help` for the full flag set.
81
+
82
+ ---
83
+
84
+ ## Features
85
+
86
+ - 🎙️ **Provider-agnostic agent** — one Pipecat pipeline drives every provider; swap models without re-implementing your agent
87
+ - 🔁 **Scripted conversations** — multi-turn YAML scripts with pre-recorded WAV inputs and expected tool calls / response content
88
+ - 📊 **Automated scoring** — tool-call correctness, response matching, hallucination counts, time-to-first-audio, interruption-stop latency
89
+ - 🆚 **Side-by-side comparisons** — run multiple providers in parallel against the same script
90
+ - 🗄️ **Persistent run history** — JSON manifests on disk, indexed in SQLite
91
+ - 🖥️ **Web control panel** — React UI to launch runs, watch live status, browse results, and edit scripts
92
+ - 🧩 **Extensible** — add a new provider by implementing one adapter class
93
+
94
+ ## Architecture
95
+
96
+ ```mermaid
97
+ flowchart TD
98
+ A["Recorded WAVs<br/>script/audio/*.wav"] --> B["Injection Harness<br/>voxarena/harness.py"]
99
+ B --> C
100
+
101
+ subgraph C ["Pipecat Pipeline"]
102
+ direction LR
103
+ C1["Audio Injector"] --> C2["Provider Adapter"]
104
+ C2 --> C3["Audio Capture"]
105
+ C3 --> C4["Metrics Collector"]
106
+ end
107
+
108
+ C2 <--> D{{"Provider Backend"}}
109
+ D --> D1["Gemini Live"]
110
+ D --> D2["OpenAI Realtime"]
111
+ D --> D3["...future providers"]
112
+
113
+ C4 --> E["Run Manifest<br/>results/PROVIDER/RUN_ID/manifest.json"]
114
+ E --> F[("SQLite Index<br/>runs.db")]
115
+
116
+ F <--> G["voxarena CLI<br/>+ FastAPI Backend"]
117
+ G <--> H["React Control Panel<br/>ui/"]
118
+
119
+ style D1 fill:#4285F4,color:#fff,stroke:#333
120
+ style D2 fill:#10A37F,color:#fff,stroke:#333
121
+ style D3 fill:#999,color:#fff,stroke:#333
122
+ style F fill:#f5f5f5,stroke:#333
123
+ style H fill:#fff7da,stroke:#333
124
+ ```
125
+
126
+ ## Local Dev (with UI)
127
+
128
+ ```bash
129
+ git clone https://github.com/simkeyur/vox-arena.git
130
+ cd vox-arena
131
+ cp .env.example .env # add GOOGLE_API_KEY / OPENAI_API_KEY
132
+
133
+ python3 -m venv .venv && source .venv/bin/activate
134
+ pip install -e .
135
+
136
+ uvicorn voxarena.main:app --reload --port 8000
137
+ ```
138
+
139
+ Then in another terminal:
140
+
141
+ ```bash
142
+ cd ui && npm install && npm run dev
143
+ ```
144
+
145
+ Open the control panel at `http://localhost:5173`.
146
+
147
+ ## Bring Your Own Agent
148
+
149
+ The demo ships with the "Saffron Leaf" restaurant agent so you can run end-to-end on day one. To evaluate your own:
150
+
151
+ 1. Replace the system prompt and tool schemas in `voxarena/agent.py`
152
+ 2. Implement (or stub) your tools in `voxarena/tools.py`
153
+ 3. Re-record `script/audio/*.wav` and update `script/utterances.yaml` to reflect your real workload
154
+ 4. Run the arena as normal — every provider gets scored against your scripts
155
+
156
+ ## Scripted Conversations
157
+
158
+ Conversations live in [`script/utterances.yaml`](script/utterances.yaml). Each turn pairs an utterance id with an `expect` block describing the correct tool call and/or response content:
159
+
160
+ ```yaml
161
+ - id: u04
162
+ text: "Are you open on Sundays?"
163
+ expect:
164
+ tool: get_hours
165
+ args:
166
+ day: sunday
167
+ response_contains:
168
+ - "closed"
169
+ ```
170
+
171
+ The harness plays `script/audio/{id}.wav` into the pipeline and scores the agent's actual tool calls and transcript against `expect`.
172
+
173
+ ## Configuration
174
+
175
+ | Variable | Description |
176
+ | --- | --- |
177
+ | `GOOGLE_API_KEY` / `OPENAI_API_KEY` | Provider credentials |
178
+ | `GEMINI_MODEL` / `OPENAI_MODEL` | Realtime model under test |
179
+ | `GEMINI_EVAL_MODEL` / `OPENAI_EVAL_MODEL` | Cheaper text models for grading |
180
+ | `PORT` | FastAPI server port |
181
+ | `BASE_DIR` | Override workdir (CLI: `--workdir`) |
182
+
183
+ ## Contributing
184
+
185
+ To add a new provider: implement an adapter in `voxarena/providers/` following the pattern in `gemini.py` / `openai.py`, wire it into `voxarena/harness.py` and `voxarena/config.py`, and open a PR.
186
+
187
+ For bugs and feature requests, please open an issue.
188
+
189
+ ## License
190
+
191
+ [MIT](LICENSE).
@@ -0,0 +1,54 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "voxarena"
7
+ version = "0.1.0"
8
+ description = "An evaluation arena for realtime voice agents."
9
+ readme = "README.md"
10
+ requires-python = ">=3.11"
11
+ license = { file = "LICENSE" }
12
+ authors = [{ name = "VoxArena contributors" }]
13
+ keywords = [
14
+ "voice-agents",
15
+ "realtime-llm",
16
+ "evaluation",
17
+ "benchmarking",
18
+ "gemini-live",
19
+ "openai-realtime",
20
+ "pipecat",
21
+ ]
22
+ classifiers = [
23
+ "Development Status :: 3 - Alpha",
24
+ "Intended Audience :: Developers",
25
+ "License :: OSI Approved :: MIT License",
26
+ "Programming Language :: Python :: 3",
27
+ "Programming Language :: Python :: 3.11",
28
+ "Programming Language :: Python :: 3.12",
29
+ "Topic :: Software Development :: Testing",
30
+ ]
31
+ dependencies = [
32
+ "fastapi>=0.110.0",
33
+ "uvicorn>=0.28.0",
34
+ "pydantic>=2.6.0",
35
+ "pydantic-settings>=2.2.0",
36
+ "pipecat-ai[openai,google]>=0.5.0",
37
+ "python-dotenv>=1.0.1",
38
+ "pyyaml>=6.0.1",
39
+ "loguru>=0.7.2",
40
+ ]
41
+
42
+ [project.optional-dependencies]
43
+ dev = ["pytest>=8.0.0"]
44
+
45
+ [project.scripts]
46
+ voxarena = "voxarena.cli:main"
47
+
48
+ [project.urls]
49
+ Homepage = "https://github.com/simkeyur/vox-arena"
50
+ Issues = "https://github.com/simkeyur/vox-arena/issues"
51
+
52
+ [tool.setuptools.packages.find]
53
+ where = ["."]
54
+ include = ["voxarena*"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,91 @@
1
+ """Agent definition — system prompt + tool schemas.
2
+
3
+ The bundled defaults ship the "Saffron Leaf" restaurant assistant under
4
+ ``voxarena/data/saffron_leaf/`` so VoxArena runs end-to-end out of the box.
5
+
6
+ To evaluate your own agent, point ``data_dir`` at a directory containing a
7
+ ``system_prompt.txt`` and edit ``voxarena/tools.py`` to register your tools.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import hashlib
12
+ import json
13
+ import os
14
+ from typing import Any, Dict, List, Optional
15
+
16
+ from voxarena.tools import TOOL_SCHEMAS
17
+
18
+ DEFAULT_DATA_DIR = os.path.join(os.path.dirname(__file__), "data", "saffron_leaf")
19
+
20
+
21
+ class Agent:
22
+ """A versioned, hashable bundle of (system prompt + tool schemas).
23
+
24
+ Args:
25
+ prompt_version: Manifest tag for the prompt revision.
26
+ tool_schema_version: Manifest tag for the tool schema revision.
27
+ data_dir: Directory containing ``system_prompt.txt``. Defaults to the
28
+ bundled Saffron Leaf example agent.
29
+ """
30
+
31
+ def __init__(
32
+ self,
33
+ prompt_version: str = "v1.0",
34
+ tool_schema_version: str = "v1.0",
35
+ data_dir: Optional[str] = None,
36
+ ):
37
+ self.prompt_version = prompt_version
38
+ self.tool_schema_version = tool_schema_version
39
+ self.data_dir = data_dir or DEFAULT_DATA_DIR
40
+
41
+ self.prompt_path = os.path.join(self.data_dir, "system_prompt.txt")
42
+ self.system_prompt = self._load_system_prompt()
43
+
44
+ self.tool_schemas = TOOL_SCHEMAS
45
+
46
+ self.prompt_hash = self._sha256(self.system_prompt)
47
+ self.tool_schema_hash = self._sha256(json.dumps(self.tool_schemas, sort_keys=True))
48
+
49
+ def _load_system_prompt(self) -> str:
50
+ if not os.path.exists(self.prompt_path):
51
+ raise FileNotFoundError(f"System prompt file not found at {self.prompt_path}")
52
+ with open(self.prompt_path, "r") as f:
53
+ return f.read().strip()
54
+
55
+ @staticmethod
56
+ def _sha256(content: str) -> str:
57
+ return hashlib.sha256(content.encode("utf-8")).hexdigest()
58
+
59
+ def get_agent_metadata(self) -> Dict[str, Any]:
60
+ """Versioning and checksum info recorded in manifests."""
61
+ return {
62
+ "prompt_version": self.prompt_version,
63
+ "prompt_hash": self.prompt_hash,
64
+ "tool_schema_version": self.tool_schema_version,
65
+ "tool_schema_hash": self.tool_schema_hash,
66
+ }
67
+
68
+ def get_openai_tools(self) -> List[Dict[str, Any]]:
69
+ return [
70
+ {
71
+ "type": "function",
72
+ "name": s["name"],
73
+ "description": s["description"],
74
+ "parameters": s["parameters"],
75
+ }
76
+ for s in self.tool_schemas
77
+ ]
78
+
79
+ def get_gemini_tools(self) -> List[Dict[str, Any]]:
80
+ return [
81
+ {
82
+ "function_declarations": [
83
+ {
84
+ "name": s["name"],
85
+ "description": s["description"],
86
+ "parameters": s["parameters"],
87
+ }
88
+ ]
89
+ }
90
+ for s in self.tool_schemas
91
+ ]