tooldrift 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tooldrift-0.1.0/.gitignore +34 -0
- tooldrift-0.1.0/LICENSE +21 -0
- tooldrift-0.1.0/PKG-INFO +256 -0
- tooldrift-0.1.0/README.md +225 -0
- tooldrift-0.1.0/examples/contract.yaml +74 -0
- tooldrift-0.1.0/examples/suite.weather.yaml +80 -0
- tooldrift-0.1.0/pyproject.toml +63 -0
- tooldrift-0.1.0/src/tooldrift/__init__.py +15 -0
- tooldrift-0.1.0/src/tooldrift/cli.py +422 -0
- tooldrift-0.1.0/src/tooldrift/contract.py +253 -0
- tooldrift-0.1.0/src/tooldrift/diff.py +266 -0
- tooldrift-0.1.0/src/tooldrift/probe.py +221 -0
- tooldrift-0.1.0/src/tooldrift/providers.py +135 -0
- tooldrift-0.1.0/src/tooldrift/report.py +114 -0
- tooldrift-0.1.0/src/tooldrift/table.py +133 -0
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
.eggs/
|
|
6
|
+
.venv/
|
|
7
|
+
venv/
|
|
8
|
+
env/
|
|
9
|
+
dist/
|
|
10
|
+
build/
|
|
11
|
+
.pytest_cache/
|
|
12
|
+
.mypy_cache/
|
|
13
|
+
.ruff_cache/
|
|
14
|
+
.coverage
|
|
15
|
+
htmlcov/
|
|
16
|
+
|
|
17
|
+
# Snapshots produced by `tooldrift snapshot`
|
|
18
|
+
snapshots/
|
|
19
|
+
|
|
20
|
+
# Editor / OS
|
|
21
|
+
.DS_Store
|
|
22
|
+
.idea/
|
|
23
|
+
.vscode/
|
|
24
|
+
|
|
25
|
+
# build scratch
|
|
26
|
+
PUBLISH_BLOCKED.md
|
|
27
|
+
BUILD_SETUP_NEXT_STEPS.md
|
|
28
|
+
.spawn.*
|
|
29
|
+
.builder_*
|
|
30
|
+
.build_metadata*
|
|
31
|
+
.stage_*
|
|
32
|
+
CLAUDE.md
|
|
33
|
+
.claude/
|
|
34
|
+
.builder_prompt_common.md
|
tooldrift-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 SuperMarioYL
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
tooldrift-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tooldrift
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Per-provider tool-call contract regression sentinel for Chinese LLMs (DeepSeek/Qwen/Kimi/GLM/MiniMax).
|
|
5
|
+
Project-URL: Homepage, https://github.com/SuperMarioYL/tooldrift
|
|
6
|
+
Project-URL: Repository, https://github.com/SuperMarioYL/tooldrift
|
|
7
|
+
Project-URL: Issues, https://github.com/SuperMarioYL/tooldrift/issues
|
|
8
|
+
Author: SuperMarioYL
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: agent,ci,deepseek,function-calling,glm,kimi,llm,minimax,qwen,regression,tool-calling
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
19
|
+
Classifier: Topic :: Software Development :: Testing
|
|
20
|
+
Requires-Python: >=3.12
|
|
21
|
+
Requires-Dist: httpx>=0.27
|
|
22
|
+
Requires-Dist: pydantic>=2.7
|
|
23
|
+
Requires-Dist: pyyaml>=6.0
|
|
24
|
+
Requires-Dist: rich>=13.7
|
|
25
|
+
Requires-Dist: typer>=0.12
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: build>=1.2; extra == 'dev'
|
|
28
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
29
|
+
Requires-Dist: ruff>=0.5; extra == 'dev'
|
|
30
|
+
Description-Content-Type: text/markdown
|
|
31
|
+
|
|
32
|
+
<div align="right"><sub><a href="./README.en.md">English</a> ⇄ <b>简体中文</b></sub></div>
|
|
33
|
+
|
|
34
|
+
<p align="center">
|
|
35
|
+
<picture>
|
|
36
|
+
<source media="(prefers-color-scheme: dark)" srcset="./assets/hero-dark.svg">
|
|
37
|
+
<source media="(prefers-color-scheme: light)" srcset="./assets/hero-light.svg">
|
|
38
|
+
<img src="./assets/hero-light.svg" width="880" alt="ToolDrift — 国产模型 tool-call 契约回归哨兵">
|
|
39
|
+
</picture>
|
|
40
|
+
</p>
|
|
41
|
+
|
|
42
|
+
<p align="center"><sub>国产模型 tool-call 契约回归哨兵:换 DeepSeek / Qwen / Kimi / GLM / MiniMax 前,CI 先红灯告诉你哪个工具的 schema 不再等价。</sub></p>
|
|
43
|
+
|
|
44
|
+
<p align="center">
|
|
45
|
+
<a href="./LICENSE"><img src="https://img.shields.io/badge/license-MIT-0071E3.svg" alt="License: MIT"></a>
|
|
46
|
+
<a href="https://github.com/SuperMarioYL/tooldrift/releases"><img src="https://img.shields.io/github/v/release/SuperMarioYL/tooldrift?color=5E5CE6" alt="Latest release"></a>
|
|
47
|
+
<a href="https://github.com/SuperMarioYL/tooldrift/actions/workflows/tooldrift.yml"><img src="https://img.shields.io/github/actions/workflow/status/SuperMarioYL/tooldrift/tooldrift.yml?branch=main&label=CI" alt="CI status"></a>
|
|
48
|
+
<img src="https://img.shields.io/badge/python-3.12-3776AB.svg" alt="Python 3.12">
|
|
49
|
+
<img src="https://img.shields.io/badge/Agent-tool--call_sentinel-10A37F.svg" alt="Agent tool-call sentinel">
|
|
50
|
+
<img src="https://img.shields.io/badge/CI-red%2Fgreen-E5484D.svg" alt="red/green CI gate">
|
|
51
|
+
</p>
|
|
52
|
+
|
|
53
|
+
> **把「换模型上线后才发现 function-calling 静默坏掉」提前成一次切换前的 `diff`。** 同一份工具套件分别打两家国产模型的 OpenAI-兼容 `/chat/completions`,把各自的 `tool_calls` 归一化成可对账的契约快照,逐字段比对——等价亮绿,不等价亮红并精确指出差异点,drift 时进程非零退出,直接挂进 CI。
|
|
54
|
+
|
|
55
|
+
ToolDrift 不是「Promptfoo 的中文版」,也不是统一 API / 路由。它接的是一块**没人守护的地**:DeepSeek-Reasonix 这类**绑死单一国产模型**的 [Coding Agent](https://github.com/Hmbown/DeepSeek-TUI) 很火,但它们结构上不做「换走时 tool-call 契约是否还等价」的交叉校验;通用 eval 框架(Promptfoo)只断言文本输出,刻意不内化任一家的协议怪癖。当 MiniMax-M2 这类被 [sermakarevich](https://twitter.com/sermakarevich) 反复讨论的 **X27** 级 agent 模型把「tool-calling 好」当卖点推、而国产模型价格战让「换供应商砍成本」成为每月运维动作时,「换模型 → function-calling 静默坏掉」就从偶发变成系统性风险。ToolDrift 命名并守护「跨国产模型 tool-call 契约等价性」这个新原语——它是绑死方叙事的对偶:站在**迁移方**,帮你安全地换走。
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## 目录
|
|
60
|
+
|
|
61
|
+
- [架构](#架构)
|
|
62
|
+
- [为什么需要它](#为什么需要它)
|
|
63
|
+
- [安装](#安装)
|
|
64
|
+
- [快速开始](#快速开始)
|
|
65
|
+
- [用法](#用法)
|
|
66
|
+
- [Demo](#demo)
|
|
67
|
+
- [五家 `tool_calls` 对比表](#五家-tool_calls-对比表)
|
|
68
|
+
- [配置](#配置)
|
|
69
|
+
- [付费层 · 托管契约漂移看板](#付费层--托管契约漂移看板)
|
|
70
|
+
- [路线图](#路线图)
|
|
71
|
+
- [对比 DeepSeek-Reasonix](#对比-deepseek-reasonix)
|
|
72
|
+
- [License](#license)
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
## <img src="https://api.iconify.design/tabler:topology-star-3.svg?color=%230071E3&width=24" height="22" align="absmiddle" alt=""> 架构
|
|
77
|
+
|
|
78
|
+
单进程 CLI,无服务、无数据库、**永不代理业务流量**——只读各家端点、归一化、比对。
|
|
79
|
+
|
|
80
|
+
<p align="center">
|
|
81
|
+
<picture>
|
|
82
|
+
<source media="(prefers-color-scheme: dark)" srcset="./assets/atlas-dark.svg">
|
|
83
|
+
<source media="(prefers-color-scheme: light)" srcset="./assets/atlas-light.svg">
|
|
84
|
+
<img src="./assets/atlas-light.svg" width="880" alt="架构:suite.yaml → probe 两家 provider → 归一化契约快照 → diff → 红绿 CI 退出码">
|
|
85
|
+
</picture>
|
|
86
|
+
</p>
|
|
87
|
+
|
|
88
|
+
核心原语是 **`ContractSnapshot`**:把每家模型吐 `tool_calls` 的「形状」提炼成一份可 diff 的快照——
|
|
89
|
+
|
|
90
|
+
```text
|
|
91
|
+
ContractSnapshot
|
|
92
|
+
├─ provider / model_id # 契约绑定到具体模型/版本
|
|
93
|
+
└─ tools: { tool_name -> ToolCallShape }
|
|
94
|
+
ToolCallShape
|
|
95
|
+
├─ emitted 该工具是否被调出
|
|
96
|
+
├─ arg_keys arguments 顶层键集合(排序后)
|
|
97
|
+
├─ arg_nesting 每个参数的 JSON 类型/嵌套形状
|
|
98
|
+
├─ arguments_encoding object | json_string
|
|
99
|
+
├─ parallel_arity 并行 tool_calls 的数量语义
|
|
100
|
+
├─ tool_call_id_format openai | custom | absent
|
|
101
|
+
└─ finish_reason "tool_calls" vs 其它取值
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
`diff(a, b)` 在 `tool_name` 上对齐两份快照、逐字段判等价,产出 `[ToolDelta]`——这正是 Promptfoo(断文本)和 DeepSeek-Reasonix(绑死一家)结构上都**不会**做的那块。
|
|
105
|
+
|
|
106
|
+
## <img src="https://api.iconify.design/tabler:bulb.svg?color=%230071E3&width=24" height="22" align="absmiddle" alt=""> 为什么需要它
|
|
107
|
+
|
|
108
|
+
每家国产模型把 `tool_calls` **吐得都不一样**、且不向后兼容:参数名变了、`arguments` 从对象变字符串、并行调用的数组语义不同、`finish_reason` 取值不同。改一行 `base_url`/`model_id`、跑通几个聊天 prompt 就上线,结果某个工具的 schema 静默漂移,agent 在生产里调错工具或调不出工具。这是**逐模型(per-model)的契约漂移**,通用文本 eval 覆盖不到。ToolDrift 把它前移成切换前 CI 里的一盏红绿灯。
|
|
109
|
+
|
|
110
|
+
## <img src="https://api.iconify.design/tabler:rocket.svg?color=%230071E3&width=24" height="22" align="absmiddle" alt=""> 安装
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
pip install tooldrift # 或: uv tool install tooldrift
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
国内打不开 PyPI?克隆仓库后 `pip install -e .` 即可(仅 5 个纯 Python 依赖)。
|
|
117
|
+
|
|
118
|
+
## <img src="https://api.iconify.design/tabler:player-play.svg?color=%230071E3&width=24" height="22" align="absmiddle" alt=""> 快速开始
|
|
119
|
+
|
|
120
|
+
**零 API key 先看到红绿**——所有命令都支持 `--from-fixtures`,回放 `tests/fixtures/` 里的离线样本:
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
tooldrift snapshot --base deepseek --from-fixtures # 抓一份契约快照
|
|
124
|
+
tooldrift run --old deepseek --new qwen --from-fixtures # 跨两家比对,drift 时非零退出
|
|
125
|
+
echo "CI exit code: $?" # → 1(捕获到漂移)
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
<details><summary>样例输出(DeepSeek → Qwen 切换捕获到 2 处契约漂移)</summary>
|
|
129
|
+
|
|
130
|
+
```text
|
|
131
|
+
ToolDrift deepseek/deepseek-chat → qwen/qwen-plus suite=weather
|
|
132
|
+
✗ get_forecast contract drift
|
|
133
|
+
arg_keys days, include, location, unit → days, location
|
|
134
|
+
arg_nesting:days integer → string
|
|
135
|
+
arguments_encoding json_string → object
|
|
136
|
+
tool_call_id_format openai → custom
|
|
137
|
+
finish_reason tool_calls → stop
|
|
138
|
+
✗ get_weather contract drift
|
|
139
|
+
arguments_encoding json_string → object
|
|
140
|
+
tool_call_id_format openai → custom
|
|
141
|
+
finish_reason tool_calls → stop
|
|
142
|
+
╭──────────────────────────────────────────────────╮
|
|
143
|
+
│ FAIL — BREAKING drift in 2 of 2 tool(s). Exit 1. │
|
|
144
|
+
╰──────────────────────────────────────────────────╯
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
</details>
|
|
148
|
+
|
|
149
|
+
接真实端点:把各家 key 写进环境变量(`DEEPSEEK_API_KEY`、`DASHSCOPE_API_KEY`…,见 [`examples/contract.yaml`](./examples/contract.yaml)),去掉 `--from-fixtures` 即可。
|
|
150
|
+
|
|
151
|
+
## <img src="https://api.iconify.design/tabler:terminal-2.svg?color=%230071E3&width=24" height="22" align="absmiddle" alt=""> 用法
|
|
152
|
+
|
|
153
|
+
四个子命令,对应 OSS 核心:
|
|
154
|
+
|
|
155
|
+
```bash
|
|
156
|
+
# 1) snapshot —— 抓一家的契约快照,落 JSON(契约绑定到具体版本)
|
|
157
|
+
tooldrift snapshot --base deepseek --suite examples/suite.weather.yaml -o snapshots/deepseek.json
|
|
158
|
+
|
|
159
|
+
# 2) diff —— 纯离线比对两份已落地的快照(无网络),drift 即非零退出
|
|
160
|
+
tooldrift diff snapshots/deepseek.json snapshots/qwen.json
|
|
161
|
+
|
|
162
|
+
# 3) run —— 一行式 CI 入口:探测 old vs new、比对、红绿报告 + 退出码
|
|
163
|
+
tooldrift run --old deepseek --new qwen --suite examples/suite.weather.yaml
|
|
164
|
+
|
|
165
|
+
# 4) compare-table —— 跨五家产出可传播的 Markdown 对比表
|
|
166
|
+
tooldrift compare-table --from-fixtures -o COMPARISON.md
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
更多示例见 [`examples/`](./examples/)。把第 3 行直接写进 CI(见下方路线图里的 `.github/workflows/tooldrift.yml`),换模型 PR 上 schema 不等价就 fail。
|
|
170
|
+
|
|
171
|
+
## <img src="https://api.iconify.design/tabler:photo.svg?color=%230071E3&width=24" height="22" align="absmiddle" alt=""> Demo
|
|
172
|
+
|
|
173
|
+

|
|
174
|
+
|
|
175
|
+
> 同样的 30 秒流程也有 asciinema 录像:[`assets/demo.cast`](./assets/demo.cast)。
|
|
176
|
+
|
|
177
|
+
## <img src="https://api.iconify.design/tabler:table.svg?color=%230071E3&width=24" height="22" align="absmiddle" alt=""> 五家 `tool_calls` 对比表
|
|
178
|
+
|
|
179
|
+
`tooldrift compare-table` 跑一次的副产品——这张表本身就是最好的传播钩子(下表为离线 fixture 实测,标 `Δ` 的行即跨家不等价点):
|
|
180
|
+
|
|
181
|
+
### `tool_calls` contract comparison — suite `weather`
|
|
182
|
+
|
|
183
|
+
| tool | field | deepseek | qwen |
|
|
184
|
+
|---|---|---|---|
|
|
185
|
+
| **get_forecast** | emitted | ✓ | ✓ |
|
|
186
|
+
| | **Δ arg_keys** | `days, include, location, unit` | `days, location` |
|
|
187
|
+
| | **Δ args_encoding** | `json_string` | `object` |
|
|
188
|
+
| | parallel_arity | 1 | 1 |
|
|
189
|
+
| | **Δ id_format** | `openai` | `custom` |
|
|
190
|
+
| | **Δ finish_reason** | `tool_calls` | `stop` |
|
|
191
|
+
| **get_weather** | emitted | ✓ | ✓ |
|
|
192
|
+
| | arg_keys | `location, unit` | `location, unit` |
|
|
193
|
+
| | **Δ args_encoding** | `json_string` | `object` |
|
|
194
|
+
| | parallel_arity | 1 | 1 |
|
|
195
|
+
| | **Δ id_format** | `openai` | `custom` |
|
|
196
|
+
| | **Δ finish_reason** | `tool_calls` | `stop` |
|
|
197
|
+
|
|
198
|
+
> 接上五家真实 key 后,`tooldrift compare-table` 会把 kimi / glm / minimax 三列也填满。
|
|
199
|
+
|
|
200
|
+
## <img src="https://api.iconify.design/tabler:adjustments.svg?color=%230071E3&width=24" height="22" align="absmiddle" alt=""> 配置
|
|
201
|
+
|
|
202
|
+
`contract.yaml` 顶层键(完整示例见 [`examples/contract.yaml`](./examples/contract.yaml)):
|
|
203
|
+
|
|
204
|
+
| 键 | 类型 | 默认 | 含义 |
|
|
205
|
+
|---|---|---|---|
|
|
206
|
+
| `version` | int | `1` | 契约文件格式版本 |
|
|
207
|
+
| `suite` | path | — | 引用的工具套件 YAML(prompt + 工具定义) |
|
|
208
|
+
| `providers` | map | — | 受测 provider 列表:每家 `base_url` / `model_id` / `api_key_env` |
|
|
209
|
+
| `providers.<p>.api_key_env` | str | — | 读 key 的环境变量名——key **从不**写进文件或快照 |
|
|
210
|
+
| `expected` | map | *(可选)* | 钉死一份「已知良好」契约,让 `run` 回归每家是否仍满足它 |
|
|
211
|
+
|
|
212
|
+
## <img src="https://api.iconify.design/tabler:building-bank.svg?color=%230071E3&width=24" height="22" align="absmiddle" alt=""> 付费层 · 托管契约漂移看板
|
|
213
|
+
|
|
214
|
+
OSS 核心(`snapshot / diff / run / compare-table` CLI)**永久免费**,护城河是开放的契约快照格式。商业层是**托管的「契约漂移看板」**——持续对五家最新 API 定时回归,新版本一发就推送告警(如「GLM 又改了 `arguments` 字符串化」),按团队订阅:
|
|
215
|
+
|
|
216
|
+
| 档位 | 价格 | 内容 |
|
|
217
|
+
|---|---|---|
|
|
218
|
+
| **Team** | ¥499/月 | 托管定时回归 + 五家变更告警(邮件 / 飞书 / 钉钉 webhook)+ 私有 contract 托管 |
|
|
219
|
+
| **Enterprise** | ¥2,999/月起 | 私有部署、报告留痕(信创 / 政企合规交付)、五家之外按需适配(豆包 / 百川) |
|
|
220
|
+
|
|
221
|
+
首付费客户来自**承诺「支持多家国产模型」的 agent 中间件 / 框架团队**——他们每接一家新模型都是盲跳,最有动机为现成等价性测试 + 协议变更订阅付费。看板本身不在本仓库范围内(CLI 已埋好开关与文档接缝);想试用托管层请提 issue 联系。
|
|
222
|
+
|
|
223
|
+
## <img src="https://api.iconify.design/tabler:map-2.svg?color=%230071E3&width=24" height="22" align="absmiddle" alt=""> 路线图
|
|
224
|
+
|
|
225
|
+
- [x] **m1** · `snapshot` 探测一家、归一化 `ContractSnapshot`、落 JSON
|
|
226
|
+
- [x] **m2** · `diff` 纯函数 + `run` 红绿报告 + CI 非零退出码
|
|
227
|
+
- [x] **m3** · `compare-table` 跨五家产出对比 Markdown 表
|
|
228
|
+
- [x] **m4** · CI 模板(`.github/workflows/tooldrift.yml`)+ demo + 双语 polished README + 货币化接缝
|
|
229
|
+
- [ ] 流式 tool-call(SSE delta)重组
|
|
230
|
+
- [ ] 五家之外的模型适配(豆包 / 百川…,付费按需)
|
|
231
|
+
- [ ] 托管「契约漂移看板」SaaS(付费层)
|
|
232
|
+
|
|
233
|
+
## <img src="https://api.iconify.design/tabler:git-compare.svg?color=%230071E3&width=24" height="22" align="absmiddle" alt=""> 对比 DeepSeek-Reasonix
|
|
234
|
+
|
|
235
|
+
诚实定位——ToolDrift 站在迁移方,是绑死方的对偶,不踩对方赛道:
|
|
236
|
+
|
|
237
|
+
| 维度 | ToolDrift | [DeepSeek-Reasonix](https://github.com/esengine/DeepSeek-Reasonix) |
|
|
238
|
+
|---|---|---|
|
|
239
|
+
| 目标 | 跨国产模型 tool-call 契约**等价性回归** | 围绕 DeepSeek **一家**把 agent 工程化到极致 |
|
|
240
|
+
| 单模型深度 / prefix-cache 工程 | — | ✓(这是它 2.5 万星的护城河) |
|
|
241
|
+
| 跨模型迁移安全(换走时是否等价) | ✓ | —(结构上不做,做了会消解 DeepSeek-native 卖点) |
|
|
242
|
+
| 即开即用的 agent 终端体验 | partial(CLI 工具,非 agent) | ✓ |
|
|
243
|
+
| 进 CI 的红绿退出码 | ✓ | — |
|
|
244
|
+
|
|
245
|
+
## <img src="https://api.iconify.design/tabler:license.svg?color=%230071E3&width=24" height="22" align="absmiddle" alt=""> License
|
|
246
|
+
|
|
247
|
+
[MIT](./LICENSE)。欢迎提 [issue](https://github.com/SuperMarioYL/tooldrift/issues) 描述你的真实迁移场景,或 PR 贡献一家新 provider 的适配。
|
|
248
|
+
|
|
249
|
+
## Share this
|
|
250
|
+
|
|
251
|
+
```
|
|
252
|
+
ToolDrift — 国产模型 tool-call 契约回归哨兵。换 DeepSeek/Qwen/Kimi/GLM/MiniMax 前,
|
|
253
|
+
CI 先红灯告诉你哪个工具 schema 不再等价。Agent 迁移方的对偶。 https://github.com/SuperMarioYL/tooldrift
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
<p align="center"><sub><a href="./LICENSE">MIT</a> © 2026 SuperMarioYL</sub></p>
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
<div align="right"><sub><a href="./README.en.md">English</a> ⇄ <b>简体中文</b></sub></div>
|
|
2
|
+
|
|
3
|
+
<p align="center">
|
|
4
|
+
<picture>
|
|
5
|
+
<source media="(prefers-color-scheme: dark)" srcset="./assets/hero-dark.svg">
|
|
6
|
+
<source media="(prefers-color-scheme: light)" srcset="./assets/hero-light.svg">
|
|
7
|
+
<img src="./assets/hero-light.svg" width="880" alt="ToolDrift — 国产模型 tool-call 契约回归哨兵">
|
|
8
|
+
</picture>
|
|
9
|
+
</p>
|
|
10
|
+
|
|
11
|
+
<p align="center"><sub>国产模型 tool-call 契约回归哨兵:换 DeepSeek / Qwen / Kimi / GLM / MiniMax 前,CI 先红灯告诉你哪个工具的 schema 不再等价。</sub></p>
|
|
12
|
+
|
|
13
|
+
<p align="center">
|
|
14
|
+
<a href="./LICENSE"><img src="https://img.shields.io/badge/license-MIT-0071E3.svg" alt="License: MIT"></a>
|
|
15
|
+
<a href="https://github.com/SuperMarioYL/tooldrift/releases"><img src="https://img.shields.io/github/v/release/SuperMarioYL/tooldrift?color=5E5CE6" alt="Latest release"></a>
|
|
16
|
+
<a href="https://github.com/SuperMarioYL/tooldrift/actions/workflows/tooldrift.yml"><img src="https://img.shields.io/github/actions/workflow/status/SuperMarioYL/tooldrift/tooldrift.yml?branch=main&label=CI" alt="CI status"></a>
|
|
17
|
+
<img src="https://img.shields.io/badge/python-3.12-3776AB.svg" alt="Python 3.12">
|
|
18
|
+
<img src="https://img.shields.io/badge/Agent-tool--call_sentinel-10A37F.svg" alt="Agent tool-call sentinel">
|
|
19
|
+
<img src="https://img.shields.io/badge/CI-red%2Fgreen-E5484D.svg" alt="red/green CI gate">
|
|
20
|
+
</p>
|
|
21
|
+
|
|
22
|
+
> **把「换模型上线后才发现 function-calling 静默坏掉」提前成一次切换前的 `diff`。** 同一份工具套件分别打两家国产模型的 OpenAI-兼容 `/chat/completions`,把各自的 `tool_calls` 归一化成可对账的契约快照,逐字段比对——等价亮绿,不等价亮红并精确指出差异点,drift 时进程非零退出,直接挂进 CI。
|
|
23
|
+
|
|
24
|
+
ToolDrift 不是「Promptfoo 的中文版」,也不是统一 API / 路由。它接的是一块**没人守护的地**:DeepSeek-Reasonix 这类**绑死单一国产模型**的 [Coding Agent](https://github.com/Hmbown/DeepSeek-TUI) 很火,但它们结构上不做「换走时 tool-call 契约是否还等价」的交叉校验;通用 eval 框架(Promptfoo)只断言文本输出,刻意不内化任一家的协议怪癖。当 MiniMax-M2 这类被 [sermakarevich](https://twitter.com/sermakarevich) 反复讨论的 **X27** 级 agent 模型把「tool-calling 好」当卖点推、而国产模型价格战让「换供应商砍成本」成为每月运维动作时,「换模型 → function-calling 静默坏掉」就从偶发变成系统性风险。ToolDrift 命名并守护「跨国产模型 tool-call 契约等价性」这个新原语——它是绑死方叙事的对偶:站在**迁移方**,帮你安全地换走。
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## 目录
|
|
29
|
+
|
|
30
|
+
- [架构](#架构)
|
|
31
|
+
- [为什么需要它](#为什么需要它)
|
|
32
|
+
- [安装](#安装)
|
|
33
|
+
- [快速开始](#快速开始)
|
|
34
|
+
- [用法](#用法)
|
|
35
|
+
- [Demo](#demo)
|
|
36
|
+
- [五家 `tool_calls` 对比表](#五家-tool_calls-对比表)
|
|
37
|
+
- [配置](#配置)
|
|
38
|
+
- [付费层 · 托管契约漂移看板](#付费层--托管契约漂移看板)
|
|
39
|
+
- [路线图](#路线图)
|
|
40
|
+
- [对比 DeepSeek-Reasonix](#对比-deepseek-reasonix)
|
|
41
|
+
- [License](#license)
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
## <img src="https://api.iconify.design/tabler:topology-star-3.svg?color=%230071E3&width=24" height="22" align="absmiddle" alt=""> 架构
|
|
46
|
+
|
|
47
|
+
单进程 CLI,无服务、无数据库、**永不代理业务流量**——只读各家端点、归一化、比对。
|
|
48
|
+
|
|
49
|
+
<p align="center">
|
|
50
|
+
<picture>
|
|
51
|
+
<source media="(prefers-color-scheme: dark)" srcset="./assets/atlas-dark.svg">
|
|
52
|
+
<source media="(prefers-color-scheme: light)" srcset="./assets/atlas-light.svg">
|
|
53
|
+
<img src="./assets/atlas-light.svg" width="880" alt="架构:suite.yaml → probe 两家 provider → 归一化契约快照 → diff → 红绿 CI 退出码">
|
|
54
|
+
</picture>
|
|
55
|
+
</p>
|
|
56
|
+
|
|
57
|
+
核心原语是 **`ContractSnapshot`**:把每家模型吐 `tool_calls` 的「形状」提炼成一份可 diff 的快照——
|
|
58
|
+
|
|
59
|
+
```text
|
|
60
|
+
ContractSnapshot
|
|
61
|
+
├─ provider / model_id # 契约绑定到具体模型/版本
|
|
62
|
+
└─ tools: { tool_name -> ToolCallShape }
|
|
63
|
+
ToolCallShape
|
|
64
|
+
├─ emitted 该工具是否被调出
|
|
65
|
+
├─ arg_keys arguments 顶层键集合(排序后)
|
|
66
|
+
├─ arg_nesting 每个参数的 JSON 类型/嵌套形状
|
|
67
|
+
├─ arguments_encoding object | json_string
|
|
68
|
+
├─ parallel_arity 并行 tool_calls 的数量语义
|
|
69
|
+
├─ tool_call_id_format openai | custom | absent
|
|
70
|
+
└─ finish_reason "tool_calls" vs 其它取值
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
`diff(a, b)` 在 `tool_name` 上对齐两份快照、逐字段判等价,产出 `[ToolDelta]`——这正是 Promptfoo(断文本)和 DeepSeek-Reasonix(绑死一家)结构上都**不会**做的那块。
|
|
74
|
+
|
|
75
|
+
## <img src="https://api.iconify.design/tabler:bulb.svg?color=%230071E3&width=24" height="22" align="absmiddle" alt=""> 为什么需要它
|
|
76
|
+
|
|
77
|
+
每家国产模型把 `tool_calls` **吐得都不一样**、且不向后兼容:参数名变了、`arguments` 从对象变字符串、并行调用的数组语义不同、`finish_reason` 取值不同。改一行 `base_url`/`model_id`、跑通几个聊天 prompt 就上线,结果某个工具的 schema 静默漂移,agent 在生产里调错工具或调不出工具。这是**逐模型(per-model)的契约漂移**,通用文本 eval 覆盖不到。ToolDrift 把它前移成切换前 CI 里的一盏红绿灯。
|
|
78
|
+
|
|
79
|
+
## <img src="https://api.iconify.design/tabler:rocket.svg?color=%230071E3&width=24" height="22" align="absmiddle" alt=""> 安装
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
pip install tooldrift # 或: uv tool install tooldrift
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
国内打不开 PyPI?克隆仓库后 `pip install -e .` 即可(仅 5 个纯 Python 依赖)。
|
|
86
|
+
|
|
87
|
+
## <img src="https://api.iconify.design/tabler:player-play.svg?color=%230071E3&width=24" height="22" align="absmiddle" alt=""> 快速开始
|
|
88
|
+
|
|
89
|
+
**零 API key 先看到红绿**——所有命令都支持 `--from-fixtures`,回放 `tests/fixtures/` 里的离线样本:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
tooldrift snapshot --base deepseek --from-fixtures # 抓一份契约快照
|
|
93
|
+
tooldrift run --old deepseek --new qwen --from-fixtures # 跨两家比对,drift 时非零退出
|
|
94
|
+
echo "CI exit code: $?" # → 1(捕获到漂移)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
<details><summary>样例输出(DeepSeek → Qwen 切换捕获到 2 处契约漂移)</summary>
|
|
98
|
+
|
|
99
|
+
```text
|
|
100
|
+
ToolDrift deepseek/deepseek-chat → qwen/qwen-plus suite=weather
|
|
101
|
+
✗ get_forecast contract drift
|
|
102
|
+
arg_keys days, include, location, unit → days, location
|
|
103
|
+
arg_nesting:days integer → string
|
|
104
|
+
arguments_encoding json_string → object
|
|
105
|
+
tool_call_id_format openai → custom
|
|
106
|
+
finish_reason tool_calls → stop
|
|
107
|
+
✗ get_weather contract drift
|
|
108
|
+
arguments_encoding json_string → object
|
|
109
|
+
tool_call_id_format openai → custom
|
|
110
|
+
finish_reason tool_calls → stop
|
|
111
|
+
╭──────────────────────────────────────────────────╮
|
|
112
|
+
│ FAIL — BREAKING drift in 2 of 2 tool(s). Exit 1. │
|
|
113
|
+
╰──────────────────────────────────────────────────╯
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
</details>
|
|
117
|
+
|
|
118
|
+
接真实端点:把各家 key 写进环境变量(`DEEPSEEK_API_KEY`、`DASHSCOPE_API_KEY`…,见 [`examples/contract.yaml`](./examples/contract.yaml)),去掉 `--from-fixtures` 即可。
|
|
119
|
+
|
|
120
|
+
## <img src="https://api.iconify.design/tabler:terminal-2.svg?color=%230071E3&width=24" height="22" align="absmiddle" alt=""> 用法
|
|
121
|
+
|
|
122
|
+
四个子命令,对应 OSS 核心:
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
# 1) snapshot —— 抓一家的契约快照,落 JSON(契约绑定到具体版本)
|
|
126
|
+
tooldrift snapshot --base deepseek --suite examples/suite.weather.yaml -o snapshots/deepseek.json
|
|
127
|
+
|
|
128
|
+
# 2) diff —— 纯离线比对两份已落地的快照(无网络),drift 即非零退出
|
|
129
|
+
tooldrift diff snapshots/deepseek.json snapshots/qwen.json
|
|
130
|
+
|
|
131
|
+
# 3) run —— 一行式 CI 入口:探测 old vs new、比对、红绿报告 + 退出码
|
|
132
|
+
tooldrift run --old deepseek --new qwen --suite examples/suite.weather.yaml
|
|
133
|
+
|
|
134
|
+
# 4) compare-table —— 跨五家产出可传播的 Markdown 对比表
|
|
135
|
+
tooldrift compare-table --from-fixtures -o COMPARISON.md
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
更多示例见 [`examples/`](./examples/)。把第 3 行直接写进 CI(见下方路线图里的 `.github/workflows/tooldrift.yml`),换模型 PR 上 schema 不等价就 fail。
|
|
139
|
+
|
|
140
|
+
## <img src="https://api.iconify.design/tabler:photo.svg?color=%230071E3&width=24" height="22" align="absmiddle" alt=""> Demo
|
|
141
|
+
|
|
142
|
+

|
|
143
|
+
|
|
144
|
+
> 同样的 30 秒流程也有 asciinema 录像:[`assets/demo.cast`](./assets/demo.cast)。
|
|
145
|
+
|
|
146
|
+
## <img src="https://api.iconify.design/tabler:table.svg?color=%230071E3&width=24" height="22" align="absmiddle" alt=""> 五家 `tool_calls` 对比表
|
|
147
|
+
|
|
148
|
+
`tooldrift compare-table` 跑一次的副产品——这张表本身就是最好的传播钩子(下表为离线 fixture 实测,标 `Δ` 的行即跨家不等价点):
|
|
149
|
+
|
|
150
|
+
### `tool_calls` contract comparison — suite `weather`
|
|
151
|
+
|
|
152
|
+
| tool | field | deepseek | qwen |
|
|
153
|
+
|---|---|---|---|
|
|
154
|
+
| **get_forecast** | emitted | ✓ | ✓ |
|
|
155
|
+
| | **Δ arg_keys** | `days, include, location, unit` | `days, location` |
|
|
156
|
+
| | **Δ args_encoding** | `json_string` | `object` |
|
|
157
|
+
| | parallel_arity | 1 | 1 |
|
|
158
|
+
| | **Δ id_format** | `openai` | `custom` |
|
|
159
|
+
| | **Δ finish_reason** | `tool_calls` | `stop` |
|
|
160
|
+
| **get_weather** | emitted | ✓ | ✓ |
|
|
161
|
+
| | arg_keys | `location, unit` | `location, unit` |
|
|
162
|
+
| | **Δ args_encoding** | `json_string` | `object` |
|
|
163
|
+
| | parallel_arity | 1 | 1 |
|
|
164
|
+
| | **Δ id_format** | `openai` | `custom` |
|
|
165
|
+
| | **Δ finish_reason** | `tool_calls` | `stop` |
|
|
166
|
+
|
|
167
|
+
> 接上五家真实 key 后,`tooldrift compare-table` 会把 kimi / glm / minimax 三列也填满。
|
|
168
|
+
|
|
169
|
+
## <img src="https://api.iconify.design/tabler:adjustments.svg?color=%230071E3&width=24" height="22" align="absmiddle" alt=""> 配置
|
|
170
|
+
|
|
171
|
+
`contract.yaml` 顶层键(完整示例见 [`examples/contract.yaml`](./examples/contract.yaml)):
|
|
172
|
+
|
|
173
|
+
| 键 | 类型 | 默认 | 含义 |
|
|
174
|
+
|---|---|---|---|
|
|
175
|
+
| `version` | int | `1` | 契约文件格式版本 |
|
|
176
|
+
| `suite` | path | — | 引用的工具套件 YAML(prompt + 工具定义) |
|
|
177
|
+
| `providers` | map | — | 受测 provider 列表:每家 `base_url` / `model_id` / `api_key_env` |
|
|
178
|
+
| `providers.<p>.api_key_env` | str | — | 读 key 的环境变量名——key **从不**写进文件或快照 |
|
|
179
|
+
| `expected` | map | *(可选)* | 钉死一份「已知良好」契约,让 `run` 回归每家是否仍满足它 |
|
|
180
|
+
|
|
181
|
+
## <img src="https://api.iconify.design/tabler:building-bank.svg?color=%230071E3&width=24" height="22" align="absmiddle" alt=""> 付费层 · 托管契约漂移看板
|
|
182
|
+
|
|
183
|
+
OSS 核心(`snapshot / diff / run / compare-table` CLI)**永久免费**,护城河是开放的契约快照格式。商业层是**托管的「契约漂移看板」**——持续对五家最新 API 定时回归,新版本一发就推送告警(如「GLM 又改了 `arguments` 字符串化」),按团队订阅:
|
|
184
|
+
|
|
185
|
+
| 档位 | 价格 | 内容 |
|
|
186
|
+
|---|---|---|
|
|
187
|
+
| **Team** | ¥499/月 | 托管定时回归 + 五家变更告警(邮件 / 飞书 / 钉钉 webhook)+ 私有 contract 托管 |
|
|
188
|
+
| **Enterprise** | ¥2,999/月起 | 私有部署、报告留痕(信创 / 政企合规交付)、五家之外按需适配(豆包 / 百川) |
|
|
189
|
+
|
|
190
|
+
首付费客户来自**承诺「支持多家国产模型」的 agent 中间件 / 框架团队**——他们每接一家新模型都是盲跳,最有动机为现成等价性测试 + 协议变更订阅付费。看板本身不在本仓库范围内(CLI 已埋好开关与文档接缝);想试用托管层请提 issue 联系。
|
|
191
|
+
|
|
192
|
+
## <img src="https://api.iconify.design/tabler:map-2.svg?color=%230071E3&width=24" height="22" align="absmiddle" alt=""> 路线图
|
|
193
|
+
|
|
194
|
+
- [x] **m1** · `snapshot` 探测一家、归一化 `ContractSnapshot`、落 JSON
|
|
195
|
+
- [x] **m2** · `diff` 纯函数 + `run` 红绿报告 + CI 非零退出码
|
|
196
|
+
- [x] **m3** · `compare-table` 跨五家产出对比 Markdown 表
|
|
197
|
+
- [x] **m4** · CI 模板(`.github/workflows/tooldrift.yml`)+ demo + 双语 polished README + 货币化接缝
|
|
198
|
+
- [ ] 流式 tool-call(SSE delta)重组
|
|
199
|
+
- [ ] 五家之外的模型适配(豆包 / 百川…,付费按需)
|
|
200
|
+
- [ ] 托管「契约漂移看板」SaaS(付费层)
|
|
201
|
+
|
|
202
|
+
## <img src="https://api.iconify.design/tabler:git-compare.svg?color=%230071E3&width=24" height="22" align="absmiddle" alt=""> 对比 DeepSeek-Reasonix
|
|
203
|
+
|
|
204
|
+
诚实定位——ToolDrift 站在迁移方,是绑死方的对偶,不踩对方赛道:
|
|
205
|
+
|
|
206
|
+
| 维度 | ToolDrift | [DeepSeek-Reasonix](https://github.com/esengine/DeepSeek-Reasonix) |
|
|
207
|
+
|---|---|---|
|
|
208
|
+
| 目标 | 跨国产模型 tool-call 契约**等价性回归** | 围绕 DeepSeek **一家**把 agent 工程化到极致 |
|
|
209
|
+
| 单模型深度 / prefix-cache 工程 | — | ✓(这是它 2.5 万星的护城河) |
|
|
210
|
+
| 跨模型迁移安全(换走时是否等价) | ✓ | —(结构上不做,做了会消解 DeepSeek-native 卖点) |
|
|
211
|
+
| 即开即用的 agent 终端体验 | partial(CLI 工具,非 agent) | ✓ |
|
|
212
|
+
| 进 CI 的红绿退出码 | ✓ | — |
|
|
213
|
+
|
|
214
|
+
## <img src="https://api.iconify.design/tabler:license.svg?color=%230071E3&width=24" height="22" align="absmiddle" alt=""> License
|
|
215
|
+
|
|
216
|
+
[MIT](./LICENSE)。欢迎提 [issue](https://github.com/SuperMarioYL/tooldrift/issues) 描述你的真实迁移场景,或 PR 贡献一家新 provider 的适配。
|
|
217
|
+
|
|
218
|
+
## Share this
|
|
219
|
+
|
|
220
|
+
```
|
|
221
|
+
ToolDrift — 国产模型 tool-call 契约回归哨兵。换 DeepSeek/Qwen/Kimi/GLM/MiniMax 前,
|
|
222
|
+
CI 先红灯告诉你哪个工具 schema 不再等价。Agent 迁移方的对偶。 https://github.com/SuperMarioYL/tooldrift
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
<p align="center"><sub><a href="./LICENSE">MIT</a> © 2026 SuperMarioYL</sub></p>
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# contract.yaml
|
|
2
|
+
# A ToolDrift contract definition. It declares the providers under test
|
|
3
|
+
# (base_url + model_id), points at the tool suite to probe with, and pins the
|
|
4
|
+
# normalized tool-call contract that each provider is expected to honour.
|
|
5
|
+
#
|
|
6
|
+
# tooldrift run --contract examples/contract.yaml --base deepseek --base qwen
|
|
7
|
+
#
|
|
8
|
+
# `tooldrift run` probes the named --base providers, normalizes their tool_calls
|
|
9
|
+
# into a ContractSnapshot, and diffs them against each other (and, when
|
|
10
|
+
# `expected` is present below, against the pinned contract). Drift => non-zero
|
|
11
|
+
# exit so it red-lights CI.
|
|
12
|
+
#
|
|
13
|
+
# No API key on hand? Every command also accepts --from-fixtures to replay the
|
|
14
|
+
# offline samples under tests/fixtures/ so you can see red/green with zero setup.
|
|
15
|
+
|
|
16
|
+
version: 1
|
|
17
|
+
suite: examples/suite.weather.yaml
|
|
18
|
+
|
|
19
|
+
# Providers under test. API keys are read from the env var named in `api_key_env`
|
|
20
|
+
# — keys are NEVER written into this file or any snapshot. ToolDrift only reads
|
|
21
|
+
# these endpoints; it never proxies traffic through them.
|
|
22
|
+
providers:
|
|
23
|
+
deepseek:
|
|
24
|
+
base_url: https://api.deepseek.com/v1
|
|
25
|
+
model_id: deepseek-chat
|
|
26
|
+
api_key_env: DEEPSEEK_API_KEY
|
|
27
|
+
|
|
28
|
+
qwen:
|
|
29
|
+
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
|
30
|
+
model_id: qwen-plus
|
|
31
|
+
api_key_env: DASHSCOPE_API_KEY
|
|
32
|
+
|
|
33
|
+
kimi:
|
|
34
|
+
base_url: https://api.moonshot.cn/v1
|
|
35
|
+
model_id: moonshot-v1-8k
|
|
36
|
+
api_key_env: MOONSHOT_API_KEY
|
|
37
|
+
|
|
38
|
+
glm:
|
|
39
|
+
base_url: https://open.bigmodel.cn/api/paas/v4
|
|
40
|
+
model_id: glm-4-plus
|
|
41
|
+
api_key_env: ZHIPUAI_API_KEY
|
|
42
|
+
|
|
43
|
+
minimax:
|
|
44
|
+
base_url: https://api.minimaxi.com/v1
|
|
45
|
+
model_id: MiniMax-M2
|
|
46
|
+
api_key_env: MINIMAX_API_KEY
|
|
47
|
+
|
|
48
|
+
# Optional: the pinned contract each tool's tool_calls is expected to satisfy.
|
|
49
|
+
# This is the same shape `tooldrift snapshot` emits, so you can freeze a known-
|
|
50
|
+
# good snapshot here and let `tooldrift run` regress every provider against it.
|
|
51
|
+
# Omit this block to run a pure cross-provider equivalence check instead.
|
|
52
|
+
expected:
|
|
53
|
+
get_weather:
|
|
54
|
+
emitted: true
|
|
55
|
+
arg_keys: [location, unit]
|
|
56
|
+
arg_nesting:
|
|
57
|
+
location: string
|
|
58
|
+
unit: string
|
|
59
|
+
arguments_encoding: json_string # OpenAI-style: arguments is a JSON-encoded string
|
|
60
|
+
parallel_arity: 1
|
|
61
|
+
tool_call_id_format: openai
|
|
62
|
+
finish_reason: tool_calls
|
|
63
|
+
get_forecast:
|
|
64
|
+
emitted: true
|
|
65
|
+
arg_keys: [days, include, location, unit]
|
|
66
|
+
arg_nesting:
|
|
67
|
+
location: string
|
|
68
|
+
days: integer
|
|
69
|
+
unit: string
|
|
70
|
+
include: object
|
|
71
|
+
arguments_encoding: json_string
|
|
72
|
+
parallel_arity: 1
|
|
73
|
+
tool_call_id_format: openai
|
|
74
|
+
finish_reason: tool_calls
|