xrouter-llm 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. xrouter_llm-0.1.0/LICENSE +159 -0
  2. xrouter_llm-0.1.0/PKG-INFO +351 -0
  3. xrouter_llm-0.1.0/README.md +161 -0
  4. xrouter_llm-0.1.0/pyproject.toml +59 -0
  5. xrouter_llm-0.1.0/setup.cfg +4 -0
  6. xrouter_llm-0.1.0/src/xrouter_llm/__init__.py +97 -0
  7. xrouter_llm-0.1.0/src/xrouter_llm/agentic.py +95 -0
  8. xrouter_llm-0.1.0/src/xrouter_llm/catalog.py +48 -0
  9. xrouter_llm-0.1.0/src/xrouter_llm/cli.py +451 -0
  10. xrouter_llm-0.1.0/src/xrouter_llm/data.py +102 -0
  11. xrouter_llm-0.1.0/src/xrouter_llm/encoders.py +281 -0
  12. xrouter_llm-0.1.0/src/xrouter_llm/evaluation.py +814 -0
  13. xrouter_llm-0.1.0/src/xrouter_llm/features.py +78 -0
  14. xrouter_llm-0.1.0/src/xrouter_llm/fusion.py +53 -0
  15. xrouter_llm-0.1.0/src/xrouter_llm/irt_router.py +265 -0
  16. xrouter_llm-0.1.0/src/xrouter_llm/llmrouterbench.py +568 -0
  17. xrouter_llm-0.1.0/src/xrouter_llm/paths.py +42 -0
  18. xrouter_llm-0.1.0/src/xrouter_llm/policy.py +129 -0
  19. xrouter_llm-0.1.0/src/xrouter_llm/profiles.py +215 -0
  20. xrouter_llm-0.1.0/src/xrouter_llm/prompt_conditioned_irt.py +324 -0
  21. xrouter_llm-0.1.0/src/xrouter_llm/resources/config/models/claude-opus-4-8.yaml +20 -0
  22. xrouter_llm-0.1.0/src/xrouter_llm/resources/config/models/claude-sonnet-4-6.yaml +22 -0
  23. xrouter_llm-0.1.0/src/xrouter_llm/resources/config/models/deepseek-v4-flash.yaml +25 -0
  24. xrouter_llm-0.1.0/src/xrouter_llm/resources/config/models/deepseek-v4-pro.yaml +28 -0
  25. xrouter_llm-0.1.0/src/xrouter_llm/resources/config/models/google-gemini-2.5-flash-lite.yaml +20 -0
  26. xrouter_llm-0.1.0/src/xrouter_llm/resources/config/models/google-gemini-3-flash-preview.yaml +23 -0
  27. xrouter_llm-0.1.0/src/xrouter_llm/resources/config/models/minimax-minimax-m3.yaml +25 -0
  28. xrouter_llm-0.1.0/src/xrouter_llm/resources/config/models/openai-gpt-5.4-mini.yaml +21 -0
  29. xrouter_llm-0.1.0/src/xrouter_llm/resources/config/models/openai-gpt-5.5.yaml +21 -0
  30. xrouter_llm-0.1.0/src/xrouter_llm/resources/config/models/z-ai-glm-4.7.yaml +16 -0
  31. xrouter_llm-0.1.0/src/xrouter_llm/resources/config/models/z-ai-glm-5.2.yaml +18 -0
  32. xrouter_llm-0.1.0/src/xrouter_llm/resources/config/routers/auto.yaml +20 -0
  33. xrouter_llm-0.1.0/src/xrouter_llm/resources/config/routers/cheap-pair.yaml +9 -0
  34. xrouter_llm-0.1.0/src/xrouter_llm/resources/config/routers/single-opus.yaml +9 -0
  35. xrouter_llm-0.1.0/src/xrouter_llm/resources/models/irt_router_350k.joblib +0 -0
  36. xrouter_llm-0.1.0/src/xrouter_llm/resources/routerbench_public_benchmarks.json +165 -0
  37. xrouter_llm-0.1.0/src/xrouter_llm/router.py +70 -0
  38. xrouter_llm-0.1.0/src/xrouter_llm/routerbench.py +158 -0
  39. xrouter_llm-0.1.0/src/xrouter_llm/score.py +33 -0
  40. xrouter_llm-0.1.0/src/xrouter_llm/server.py +205 -0
  41. xrouter_llm-0.1.0/src/xrouter_llm/serving.py +193 -0
  42. xrouter_llm-0.1.0/src/xrouter_llm/store.py +109 -0
  43. xrouter_llm-0.1.0/src/xrouter_llm/types.py +74 -0
  44. xrouter_llm-0.1.0/src/xrouter_llm.egg-info/PKG-INFO +351 -0
  45. xrouter_llm-0.1.0/src/xrouter_llm.egg-info/SOURCES.txt +58 -0
  46. xrouter_llm-0.1.0/src/xrouter_llm.egg-info/dependency_links.txt +1 -0
  47. xrouter_llm-0.1.0/src/xrouter_llm.egg-info/entry_points.txt +2 -0
  48. xrouter_llm-0.1.0/src/xrouter_llm.egg-info/requires.txt +10 -0
  49. xrouter_llm-0.1.0/src/xrouter_llm.egg-info/top_level.txt +1 -0
  50. xrouter_llm-0.1.0/tests/test_agentic.py +47 -0
  51. xrouter_llm-0.1.0/tests/test_encoders.py +51 -0
  52. xrouter_llm-0.1.0/tests/test_irt_router.py +90 -0
  53. xrouter_llm-0.1.0/tests/test_llmrouterbench.py +138 -0
  54. xrouter_llm-0.1.0/tests/test_model_holdout_and_cost.py +88 -0
  55. xrouter_llm-0.1.0/tests/test_policy.py +40 -0
  56. xrouter_llm-0.1.0/tests/test_profiles.py +33 -0
  57. xrouter_llm-0.1.0/tests/test_router_and_evaluation.py +158 -0
  58. xrouter_llm-0.1.0/tests/test_routerbench.py +32 -0
  59. xrouter_llm-0.1.0/tests/test_serving.py +107 -0
  60. xrouter_llm-0.1.0/tests/test_yaml_profiles.py +60 -0
@@ -0,0 +1,159 @@
1
+ # Xagent Source License
2
+
3
+ **Effective Date:** February 15, 2026
4
+
5
+ Copyright © 2026 Xorbits Inc.
6
+
7
+ ---
8
+
9
+ ## 1. Overview
10
+
11
+ The Xagent software, source code, and associated materials (the **“Software”**) are provided under this Xagent Source License (the **“License”**).
12
+
13
+ This License provides source-available rights for use, modification, and internal commercial deployment, while restricting certain hosted/service and competitive uses.
14
+
15
+ > **Note:** This License is **not** an OSI-approved open source license.
16
+
17
+ ---
18
+
19
+ ## 2. Acceptance
20
+
21
+ By using, copying, modifying, distributing, or making available the Software, you agree to be bound by this License.
22
+
23
+ ---
24
+
25
+ ## 3. Grant of Rights
26
+
27
+ Subject to the terms and conditions of this License, the licensor (**“Licensor”**) grants you a non-exclusive, worldwide, royalty-free, non-transferable, non-sublicensable license to:
28
+
29
+ 1. **Use** the Software;
30
+ 2. **Copy** the Software;
31
+ 3. **Modify** the Software and create derivative works;
32
+ 4. **Distribute** the Software (including derivative works) in source and/or object form; and
33
+ 5. **Deploy** the Software for internal business purposes.
34
+
35
+ All rights not expressly granted are reserved.
36
+
37
+ ---
38
+
39
+ ## 4. Restrictions
40
+
41
+ ### 4.1 Hosted / Managed Service Restriction
42
+
43
+ Except as expressly permitted below, you may not provide the Software, or any **Restricted Functionality** of the Software, to any **Third Party** as a hosted service, managed service, or otherwise make it available for use over a network.
44
+
45
+ This prohibition includes (without limitation):
46
+
47
+ * offering the Software as “Xagent-as-a-Service” or a shared agent execution platform for multiple Third Parties;
48
+ * providing multiple Third Parties access to a shared runtime, orchestration, execution, scheduling, workflow, or UI environment powered by the Software; or
49
+ * operating a multi-tenant service in which Third Parties can create, run, manage, or monitor agents or workflows using the Software.
50
+
51
+ ### Permitted Single-Tenant Deployment
52
+
53
+ You may deploy and operate the Software on behalf of a single Third Party customer, provided that:
54
+
55
+ 1. the deployment is dedicated to that customer (single-tenant);
56
+ 2. the customer does not share access with other Third Parties;
57
+ 3. the Software is not offered as a generalized or reusable platform service to multiple customers;
58
+ 4. such deployment is limited to that specific customer’s internal use; and
59
+ 5. all Xagent trademarks, product names, copyright notices, and branding elements remain visible and unaltered within the Software and related user interfaces.
60
+
61
+ Removal, replacement, white-labeling, or obscuring of Xagent branding in a single-tenant deployment is prohibited unless you have obtained a separate commercial license or written authorization from the Licensor.
62
+
63
+ For clarity, internal deployment within your own organization and your Affiliated Entities is permitted.
64
+
65
+ ### 4.2 Competitive Use Restriction
66
+
67
+ You may not use the Software to develop, offer, or operate a product or service whose primary purpose is to provide an agent orchestration runtime or agent execution platform that competes directly with the Licensor’s commercial Xagent offering.
68
+
69
+ ### 4.3 License Protection / Technical Restrictions
70
+
71
+ You may not remove, disable, circumvent, or materially alter any license verification, usage limitation, feature gating, entitlement checking, or similar functionality included in the Software that is intended to enforce this License or commercial terms.
72
+
73
+ ### 4.4 Notice and Attribution
74
+
75
+ You may not alter, remove, or obscure any licensing, copyright, attribution, or other notices included in the Software.
76
+
77
+ If you distribute a modified version of the Software, you must include prominent notices stating that you have modified the Software.
78
+
79
+ ---
80
+
81
+ ## 5. Trademarks
82
+
83
+ This License does not grant you any rights to use the Licensor’s trademarks, service marks, trade names, logos, or product names (including **“Xagent”**), except as required for reasonable and customary use in describing the origin of the Software.
84
+
85
+ ---
86
+
87
+ ## 6. Patents
88
+
89
+ The Licensor grants you a license under any patent claims the Licensor can license, or becomes able to license, to make, have made, use, sell, offer for sale, import, and have imported the Software, subject to the restrictions in this License.
90
+
91
+ This patent license does not apply to any patent claims infringed by your modifications or additions.
92
+
93
+ If you or your company make any written claim (including in a lawsuit or administrative proceeding) that the Software infringes or contributes to infringement of any patent, then your patent license under this License terminates immediately.
94
+
95
+ ---
96
+
97
+ ## 7. Distribution Conditions
98
+
99
+ If you distribute any copy of the Software (modified or unmodified), you must ensure that recipients receive a copy of this License.
100
+
101
+ ---
102
+
103
+ ## 8. Termination and Reinstatement
104
+
105
+ If you violate this License, your rights under this License terminate automatically.
106
+
107
+ If the Licensor provides notice of the violation and you cure the violation within **30 days** of receiving notice, your rights will be reinstated retroactively.
108
+
109
+ If you violate this License after reinstatement, your rights terminate automatically and permanently.
110
+
111
+ ---
112
+
113
+ ## 9. Disclaimer of Warranty
114
+
115
+ TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED **“AS IS”**, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.
116
+
117
+ ---
118
+
119
+ ## 10. Limitation of Liability
120
+
121
+ TO THE MAXIMUM EXTENT PERMITTED BY LAW, IN NO EVENT WILL THE LICENSOR BE LIABLE FOR ANY DAMAGES ARISING OUT OF OR RELATING TO THIS LICENSE OR THE SOFTWARE, WHETHER IN CONTRACT, TORT, OR OTHERWISE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
122
+
123
+ ---
124
+
125
+ ## 11. Definitions
126
+
127
+ **“Affiliated Entities”** means any entity that controls, is controlled by, or is under common control with you.
128
+
129
+ **“Control”** means ownership of more than 50% of the voting power or equity interests, or the power to direct management or policies.
130
+
131
+ **“Restricted Functionality”** means the core runtime and orchestration capabilities of the Software, including (without limitation):
132
+
133
+ * agent orchestration and task execution runtime;
134
+ * multi-agent coordination and scheduling;
135
+ * workflow execution and planning engine;
136
+ * tool integration runtime and connectors;
137
+ * management UI used to create, run, manage, or monitor agents/workflows.
138
+
139
+ **“Third Party”** means any person or entity other than you and your Affiliated Entities.
140
+
141
+ **“You”** means the individual or entity exercising rights under this License.
142
+
143
+ ---
144
+
145
+ ## 12. Commercial Licensing
146
+
147
+ If you wish to use the Software in a way not permitted under this License (including offering a hosted or managed service), you may obtain a commercial license from the Licensor.
148
+
149
+ ---
150
+
151
+ ## 13. Miscellaneous
152
+
153
+ If any provision of this License is held unenforceable, the remaining provisions will remain in effect.
154
+
155
+ This License is the entire agreement regarding the Software and supersedes any prior or contemporaneous agreements relating to the Software.
156
+
157
+ ---
158
+
159
+ **Version 1.0 — Effective February 15, 2026**
@@ -0,0 +1,351 @@
1
+ Metadata-Version: 2.4
2
+ Name: xrouter-llm
3
+ Version: 0.1.0
4
+ Summary: Prompt-aware LLM routing-decision service: predicts which model can complete a prompt and picks the cheapest one.
5
+ Author: Xorbits Inc.
6
+ License: # Xagent Source License
7
+
8
+ **Effective Date:** February 15, 2026
9
+
10
+ Copyright © 2026 Xorbits Inc.
11
+
12
+ ---
13
+
14
+ ## 1. Overview
15
+
16
+ The Xagent software, source code, and associated materials (the **“Software”**) are provided under this Xagent Source License (the **“License”**).
17
+
18
+ This License provides source-available rights for use, modification, and internal commercial deployment, while restricting certain hosted/service and competitive uses.
19
+
20
+ > **Note:** This License is **not** an OSI-approved open source license.
21
+
22
+ ---
23
+
24
+ ## 2. Acceptance
25
+
26
+ By using, copying, modifying, distributing, or making available the Software, you agree to be bound by this License.
27
+
28
+ ---
29
+
30
+ ## 3. Grant of Rights
31
+
32
+ Subject to the terms and conditions of this License, the licensor (**“Licensor”**) grants you a non-exclusive, worldwide, royalty-free, non-transferable, non-sublicensable license to:
33
+
34
+ 1. **Use** the Software;
35
+ 2. **Copy** the Software;
36
+ 3. **Modify** the Software and create derivative works;
37
+ 4. **Distribute** the Software (including derivative works) in source and/or object form; and
38
+ 5. **Deploy** the Software for internal business purposes.
39
+
40
+ All rights not expressly granted are reserved.
41
+
42
+ ---
43
+
44
+ ## 4. Restrictions
45
+
46
+ ### 4.1 Hosted / Managed Service Restriction
47
+
48
+ Except as expressly permitted below, you may not provide the Software, or any **Restricted Functionality** of the Software, to any **Third Party** as a hosted service, managed service, or otherwise make it available for use over a network.
49
+
50
+ This prohibition includes (without limitation):
51
+
52
+ * offering the Software as “Xagent-as-a-Service” or a shared agent execution platform for multiple Third Parties;
53
+ * providing multiple Third Parties access to a shared runtime, orchestration, execution, scheduling, workflow, or UI environment powered by the Software; or
54
+ * operating a multi-tenant service in which Third Parties can create, run, manage, or monitor agents or workflows using the Software.
55
+
56
+ ### Permitted Single-Tenant Deployment
57
+
58
+ You may deploy and operate the Software on behalf of a single Third Party customer, provided that:
59
+
60
+ 1. the deployment is dedicated to that customer (single-tenant);
61
+ 2. the customer does not share access with other Third Parties;
62
+ 3. the Software is not offered as a generalized or reusable platform service to multiple customers;
63
+ 4. such deployment is limited to that specific customer’s internal use; and
64
+ 5. all Xagent trademarks, product names, copyright notices, and branding elements remain visible and unaltered within the Software and related user interfaces.
65
+
66
+ Removal, replacement, white-labeling, or obscuring of Xagent branding in a single-tenant deployment is prohibited unless you have obtained a separate commercial license or written authorization from the Licensor.
67
+
68
+ For clarity, internal deployment within your own organization and your Affiliated Entities is permitted.
69
+
70
+ ### 4.2 Competitive Use Restriction
71
+
72
+ You may not use the Software to develop, offer, or operate a product or service whose primary purpose is to provide an agent orchestration runtime or agent execution platform that competes directly with the Licensor’s commercial Xagent offering.
73
+
74
+ ### 4.3 License Protection / Technical Restrictions
75
+
76
+ You may not remove, disable, circumvent, or materially alter any license verification, usage limitation, feature gating, entitlement checking, or similar functionality included in the Software that is intended to enforce this License or commercial terms.
77
+
78
+ ### 4.4 Notice and Attribution
79
+
80
+ You may not alter, remove, or obscure any licensing, copyright, attribution, or other notices included in the Software.
81
+
82
+ If you distribute a modified version of the Software, you must include prominent notices stating that you have modified the Software.
83
+
84
+ ---
85
+
86
+ ## 5. Trademarks
87
+
88
+ This License does not grant you any rights to use the Licensor’s trademarks, service marks, trade names, logos, or product names (including **“Xagent”**), except as required for reasonable and customary use in describing the origin of the Software.
89
+
90
+ ---
91
+
92
+ ## 6. Patents
93
+
94
+ The Licensor grants you a license under any patent claims the Licensor can license, or becomes able to license, to make, have made, use, sell, offer for sale, import, and have imported the Software, subject to the restrictions in this License.
95
+
96
+ This patent license does not apply to any patent claims infringed by your modifications or additions.
97
+
98
+ If you or your company make any written claim (including in a lawsuit or administrative proceeding) that the Software infringes or contributes to infringement of any patent, then your patent license under this License terminates immediately.
99
+
100
+ ---
101
+
102
+ ## 7. Distribution Conditions
103
+
104
+ If you distribute any copy of the Software (modified or unmodified), you must ensure that recipients receive a copy of this License.
105
+
106
+ ---
107
+
108
+ ## 8. Termination and Reinstatement
109
+
110
+ If you violate this License, your rights under this License terminate automatically.
111
+
112
+ If the Licensor provides notice of the violation and you cure the violation within **30 days** of receiving notice, your rights will be reinstated retroactively.
113
+
114
+ If you violate this License after reinstatement, your rights terminate automatically and permanently.
115
+
116
+ ---
117
+
118
+ ## 9. Disclaimer of Warranty
119
+
120
+ TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED **“AS IS”**, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.
121
+
122
+ ---
123
+
124
+ ## 10. Limitation of Liability
125
+
126
+ TO THE MAXIMUM EXTENT PERMITTED BY LAW, IN NO EVENT WILL THE LICENSOR BE LIABLE FOR ANY DAMAGES ARISING OUT OF OR RELATING TO THIS LICENSE OR THE SOFTWARE, WHETHER IN CONTRACT, TORT, OR OTHERWISE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
127
+
128
+ ---
129
+
130
+ ## 11. Definitions
131
+
132
+ **“Affiliated Entities”** means any entity that controls, is controlled by, or is under common control with you.
133
+
134
+ **“Control”** means ownership of more than 50% of the voting power or equity interests, or the power to direct management or policies.
135
+
136
+ **“Restricted Functionality”** means the core runtime and orchestration capabilities of the Software, including (without limitation):
137
+
138
+ * agent orchestration and task execution runtime;
139
+ * multi-agent coordination and scheduling;
140
+ * workflow execution and planning engine;
141
+ * tool integration runtime and connectors;
142
+ * management UI used to create, run, manage, or monitor agents/workflows.
143
+
144
+ **“Third Party”** means any person or entity other than you and your Affiliated Entities.
145
+
146
+ **“You”** means the individual or entity exercising rights under this License.
147
+
148
+ ---
149
+
150
+ ## 12. Commercial Licensing
151
+
152
+ If you wish to use the Software in a way not permitted under this License (including offering a hosted or managed service), you may obtain a commercial license from the Licensor.
153
+
154
+ ---
155
+
156
+ ## 13. Miscellaneous
157
+
158
+ If any provision of this License is held unenforceable, the remaining provisions will remain in effect.
159
+
160
+ This License is the entire agreement regarding the Software and supersedes any prior or contemporaneous agreements relating to the Software.
161
+
162
+ ---
163
+
164
+ **Version 1.0 — Effective February 15, 2026**
165
+
166
+ Project-URL: Homepage, https://github.com/xorbitsai/xrouter-llm
167
+ Project-URL: Repository, https://github.com/xorbitsai/xrouter-llm
168
+ Keywords: llm,router,routing,model-selection,irt,openrouter
169
+ Classifier: License :: Other/Proprietary License
170
+ Classifier: Programming Language :: Python :: 3
171
+ Classifier: Programming Language :: Python :: 3.10
172
+ Classifier: Programming Language :: Python :: 3.11
173
+ Classifier: Programming Language :: Python :: 3.12
174
+ Classifier: Programming Language :: Python :: 3.13
175
+ Classifier: Intended Audience :: Developers
176
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
177
+ Requires-Python: >=3.10
178
+ Description-Content-Type: text/markdown
179
+ License-File: LICENSE
180
+ Requires-Dist: huggingface-hub>=0.23
181
+ Requires-Dist: joblib>=1.3
182
+ Requires-Dist: numpy>=1.24
183
+ Requires-Dist: pandas>=2.0
184
+ Requires-Dist: pyyaml>=6.0
185
+ Requires-Dist: scikit-learn>=1.3
186
+ Requires-Dist: scipy>=1.10
187
+ Provides-Extra: dev
188
+ Requires-Dist: pytest>=7.4; extra == "dev"
189
+ Dynamic: license-file
190
+
191
+ <div align="center">
192
+ <img src="./assets/xorbits-logo.png" width="180px" alt="xorbits" />
193
+
194
+ # xrouter-llm
195
+
196
+ </div>
197
+
198
+ `xrouter-llm` is a prompt-aware LLM **routing-decision** service. It answers
199
+ "which model should serve this prompt?" and records the choice — it does NOT
200
+ call the underlying LLMs.
201
+
202
+ ## Invariant
203
+
204
+ ```text
205
+ Do not train: prompt -> selected model
206
+ Train: prompt + model -> probability the model completes the prompt
207
+ Decide: predicted completion + cost -> cheapest model that can complete
208
+ ```
209
+
210
+ Completion is factored into two decoupled axes (an IRT-style model):
211
+
212
+ ```text
213
+ P(complete) = sigmoid(a * capability(model) + b * difficulty(prompt) + c)
214
+ ```
215
+
216
+ - **capability(model)** = the mean of the model's published `gpqa_diamond` and
217
+ `livecodebench` (both full-coverage on the training side). Going wider doesn't
218
+ help at this data scale — a flat mean dilutes and learned weights overfit at
219
+ 37 profiled models; see AGENTS.md "Capability benchmarks". Used directly, so a
220
+ brand-new model's benchmarks drive its ranking.
221
+ - **difficulty(prompt)** = a Ridge regressor on a multilingual embedding
222
+ (`Qwen/Qwen3-Embedding-0.6B`), trained on each prompt's empirical pass-rate.
223
+ Multilingual (Chinese transfers from English training data). Picked over
224
+ `bge-m3` by a controlled probe (`scripts/probe_qwen_difficulty.py`): higher
225
+ held-out Pearson and it no longer rates trivial prompts ("1+1=?") as maximally
226
+ hard.
227
+
228
+ This factoring is the key lesson: a single joint classifier could not rank
229
+ unseen models by their benchmarks (on this data, model capability barely
230
+ explains completion *marginally* — but it does once difficulty is controlled,
231
+ which is exactly what the factored model exploits).
232
+
233
+ ## Components
234
+
235
+ - `IRTRouter` (`irt_router.py`): the predictor (difficulty x capability).
236
+ - `RoutingPolicy` (`policy.py`): "cheapest model whose predicted completion
237
+ clears `completion_threshold`; else the highest predicted completion".
238
+ - `serving.py` / `server.py`: HTTP routing-decision API + single-page web UI.
239
+ - `resources/config/models/`: a per-model YAML registry of capability profiles
240
+ (bundled in the package; resolve with `default_models_dir()`).
241
+ - `resources/config/routers/`: named "auto configs" — a candidate model set +
242
+ policy (bundled; `default_routers_dir()`).
243
+ - `resources/models/irt_router_350k.joblib`: the trained router shipped with the
244
+ package (`default_model_path()`).
245
+
246
+ ## Install
247
+
248
+ ```bash
249
+ pip install xrouter-llm # ships a trained router + model registry
250
+ # or, for development:
251
+ pip install -e ".[dev]"
252
+ ```
253
+
254
+ The wheel bundles a trained router artifact, the model-profile registry, and the
255
+ router configs, so a fresh install can serve immediately with no extra files.
256
+
257
+ ## Datasets
258
+
259
+ The production difficulty model is trained on **multiple datasets combined**
260
+ (all feed the difficulty axis; only profiled models feed the capability axis):
261
+
262
+ | Source | Type | Scale | In production train? |
263
+ | --- | --- | --- | --- |
264
+ | `NPULH/LLMRouterBench` (350k stream sample) | single-turn QA / code / math (22 tasks) | 37 models x ~13.8k prompts | ✅ |
265
+ | agent-psychometrics — Terminal-Bench 2.0 | terminal agent | 89 tasks x 112 subjects | ✅ `--dataset agentic:agentic/terminalbench` |
266
+ | agent-psychometrics — SWE-bench Verified | coding agent | 500 tasks x 134 subjects | ✅ task text joined from `princeton-nlp/SWE-bench_Verified` |
267
+ | agent-psychometrics — SWE-bench Pro / GSO | coding agent | 730x14 / 102x15 | ⛔ ship no local task text, external join needed |
268
+
269
+ The current artifact trains on LLMRouterBench 350k **+ Terminal-Bench +
270
+ SWE-bench Verified** (377,997 rows / ~14,364 prompts / 283 subjects). The
271
+ agentic matrices come from
272
+ [agent-psychometrics](https://github.com/dariakryvosheieva/agent-psychometrics)
273
+ (MIT) via `agentic.py`. Only the 37 profiled llmrouterbench models feed the
274
+ capability axis; agentic subjects feed difficulty only. RouterBench
275
+ (`withmartian/routerbench`) remains a smaller legacy baseline. Local datasets and
276
+ trained artifacts are not committed (`data/`, `artifacts/` are gitignored).
277
+
278
+ Adding more agentic prompt types (e.g. your own traffic) is the only way to make
279
+ difficulty accurate for task mixes outside coding/terminal — see AGENTS.md.
280
+
281
+ ## Train
282
+
283
+ ```bash
284
+ xrouter-llm train-irt \
285
+ --dataset llmrouterbench:data/raw/llmrouterbench_stream_sample_350k \
286
+ --dataset agentic:agentic/terminalbench \
287
+ --dataset agentic:agentic/swebench_verified \
288
+ --benchmark-profiles artifacts/profiles/llmrouterbench_350k_profiles_priority_collected.json \
289
+ --output artifacts/models/irt_router_350k.joblib
290
+ ```
291
+
292
+ Diagnostics: `sweep-thresholds` (cost/completion frontier + calibration) and
293
+ `eval-model-holdout` (leave-one-model-out generalization).
294
+
295
+ ## Serve
296
+
297
+ The bundled router, registry, and configs are the defaults, so a bare invocation
298
+ works out of the box:
299
+
300
+ ```bash
301
+ xrouter-llm serve --port 8080
302
+ ```
303
+
304
+ Override any of them to use your own trained model or registry:
305
+
306
+ ```bash
307
+ xrouter-llm serve \
308
+ --model artifacts/models/irt_router_350k.joblib \
309
+ --models-dir config/models --routers-dir config/routers \
310
+ --db artifacts/calls.db --port 8080
311
+ ```
312
+
313
+ - `GET /` — single-page UI (prompt box, config picker, decision table, history)
314
+ - `GET /api/configs`, `POST /api/route` (`{prompt, config, task?}`),
315
+ `GET /api/history?limit=N`
316
+ - Every decision is logged to SQLite (`*.db`/`*.sqlite` are gitignored — the log
317
+ holds user prompts).
318
+
319
+ ## Model registry
320
+
321
+ One YAML per supported model, bundled under
322
+ `src/xrouter_llm/resources/config/models/` (capability profile: provider, costs,
323
+ context, published benchmarks as 0-100 percentages). `model_id` is the model's
324
+ canonical OpenRouter slug (e.g. `anthropic/claude-opus-4.8`). The bundled
325
+ registry is the default for `--benchmark-profiles`; point it at your own
326
+ directory or file to extend it. Add a model = add a file.
327
+
328
+ ```python
329
+ from xrouter_llm import IRTRouter, default_model_path, default_models_dir, load_benchmark_profiles
330
+
331
+ router = IRTRouter.load(default_model_path())
332
+ for profile in load_benchmark_profiles(default_models_dir()).profiles():
333
+ router.add_benchmark_profile(profile)
334
+
335
+ preds = router.predict("实现一个分布式一致性算法", model_ids=["claude-opus-4-8", "deepseek-v4-pro"])
336
+ print({p.model_id: round(p.mu, 3) for p in preds})
337
+ ```
338
+
339
+ ## License
340
+
341
+ `xrouter-llm` is released under the **Xagent Source License** (© Xorbits Inc.) —
342
+ see [LICENSE](LICENSE). It is source-available, **not** an OSI-approved open
343
+ source license.
344
+
345
+ The license text is shared verbatim with [Xagent](https://github.com/xorbitsai/xagent);
346
+ for this project the licensed "Software" is `xrouter-llm`, and the
347
+ "Restricted Functionality" / hosted-service and competitive-use clauses apply to
348
+ its routing-decision and model-selection capabilities. In short: use,
349
+ modification, and internal/single-tenant deployment are permitted; offering it as
350
+ a multi-tenant hosted/managed service, or a directly competing service, is not.
351
+ See [LICENSE](LICENSE) for the controlling terms.
@@ -0,0 +1,161 @@
1
+ <div align="center">
2
+ <img src="./assets/xorbits-logo.png" width="180px" alt="xorbits" />
3
+
4
+ # xrouter-llm
5
+
6
+ </div>
7
+
8
+ `xrouter-llm` is a prompt-aware LLM **routing-decision** service. It answers
9
+ "which model should serve this prompt?" and records the choice — it does NOT
10
+ call the underlying LLMs.
11
+
12
+ ## Invariant
13
+
14
+ ```text
15
+ Do not train: prompt -> selected model
16
+ Train: prompt + model -> probability the model completes the prompt
17
+ Decide: predicted completion + cost -> cheapest model that can complete
18
+ ```
19
+
20
+ Completion is factored into two decoupled axes (an IRT-style model):
21
+
22
+ ```text
23
+ P(complete) = sigmoid(a * capability(model) + b * difficulty(prompt) + c)
24
+ ```
25
+
26
+ - **capability(model)** = the mean of the model's published `gpqa_diamond` and
27
+ `livecodebench` (both full-coverage on the training side). Going wider doesn't
28
+ help at this data scale — a flat mean dilutes and learned weights overfit at
29
+ 37 profiled models; see AGENTS.md "Capability benchmarks". Used directly, so a
30
+ brand-new model's benchmarks drive its ranking.
31
+ - **difficulty(prompt)** = a Ridge regressor on a multilingual embedding
32
+ (`Qwen/Qwen3-Embedding-0.6B`), trained on each prompt's empirical pass-rate.
33
+ Multilingual (Chinese transfers from English training data). Picked over
34
+ `bge-m3` by a controlled probe (`scripts/probe_qwen_difficulty.py`): higher
35
+ held-out Pearson and it no longer rates trivial prompts ("1+1=?") as maximally
36
+ hard.
37
+
38
+ This factoring is the key lesson: a single joint classifier could not rank
39
+ unseen models by their benchmarks (on this data, model capability barely
40
+ explains completion *marginally* — but it does once difficulty is controlled,
41
+ which is exactly what the factored model exploits).
42
+
43
+ ## Components
44
+
45
+ - `IRTRouter` (`irt_router.py`): the predictor (difficulty x capability).
46
+ - `RoutingPolicy` (`policy.py`): "cheapest model whose predicted completion
47
+ clears `completion_threshold`; else the highest predicted completion".
48
+ - `serving.py` / `server.py`: HTTP routing-decision API + single-page web UI.
49
+ - `resources/config/models/`: a per-model YAML registry of capability profiles
50
+ (bundled in the package; resolve with `default_models_dir()`).
51
+ - `resources/config/routers/`: named "auto configs" — a candidate model set +
52
+ policy (bundled; `default_routers_dir()`).
53
+ - `resources/models/irt_router_350k.joblib`: the trained router shipped with the
54
+ package (`default_model_path()`).
55
+
56
+ ## Install
57
+
58
+ ```bash
59
+ pip install xrouter-llm # ships a trained router + model registry
60
+ # or, for development:
61
+ pip install -e ".[dev]"
62
+ ```
63
+
64
+ The wheel bundles a trained router artifact, the model-profile registry, and the
65
+ router configs, so a fresh install can serve immediately with no extra files.
66
+
67
+ ## Datasets
68
+
69
+ The production difficulty model is trained on **multiple datasets combined**
70
+ (all feed the difficulty axis; only profiled models feed the capability axis):
71
+
72
+ | Source | Type | Scale | In production train? |
73
+ | --- | --- | --- | --- |
74
+ | `NPULH/LLMRouterBench` (350k stream sample) | single-turn QA / code / math (22 tasks) | 37 models x ~13.8k prompts | ✅ |
75
+ | agent-psychometrics — Terminal-Bench 2.0 | terminal agent | 89 tasks x 112 subjects | ✅ `--dataset agentic:agentic/terminalbench` |
76
+ | agent-psychometrics — SWE-bench Verified | coding agent | 500 tasks x 134 subjects | ✅ task text joined from `princeton-nlp/SWE-bench_Verified` |
77
+ | agent-psychometrics — SWE-bench Pro / GSO | coding agent | 730x14 / 102x15 | ⛔ ship no local task text, external join needed |
78
+
79
+ The current artifact trains on LLMRouterBench 350k **+ Terminal-Bench +
80
+ SWE-bench Verified** (377,997 rows / ~14,364 prompts / 283 subjects). The
81
+ agentic matrices come from
82
+ [agent-psychometrics](https://github.com/dariakryvosheieva/agent-psychometrics)
83
+ (MIT) via `agentic.py`. Only the 37 profiled llmrouterbench models feed the
84
+ capability axis; agentic subjects feed difficulty only. RouterBench
85
+ (`withmartian/routerbench`) remains a smaller legacy baseline. Local datasets and
86
+ trained artifacts are not committed (`data/`, `artifacts/` are gitignored).
87
+
88
+ Adding more agentic prompt types (e.g. your own traffic) is the only way to make
89
+ difficulty accurate for task mixes outside coding/terminal — see AGENTS.md.
90
+
91
+ ## Train
92
+
93
+ ```bash
94
+ xrouter-llm train-irt \
95
+ --dataset llmrouterbench:data/raw/llmrouterbench_stream_sample_350k \
96
+ --dataset agentic:agentic/terminalbench \
97
+ --dataset agentic:agentic/swebench_verified \
98
+ --benchmark-profiles artifacts/profiles/llmrouterbench_350k_profiles_priority_collected.json \
99
+ --output artifacts/models/irt_router_350k.joblib
100
+ ```
101
+
102
+ Diagnostics: `sweep-thresholds` (cost/completion frontier + calibration) and
103
+ `eval-model-holdout` (leave-one-model-out generalization).
104
+
105
+ ## Serve
106
+
107
+ The bundled router, registry, and configs are the defaults, so a bare invocation
108
+ works out of the box:
109
+
110
+ ```bash
111
+ xrouter-llm serve --port 8080
112
+ ```
113
+
114
+ Override any of them to use your own trained model or registry:
115
+
116
+ ```bash
117
+ xrouter-llm serve \
118
+ --model artifacts/models/irt_router_350k.joblib \
119
+ --models-dir config/models --routers-dir config/routers \
120
+ --db artifacts/calls.db --port 8080
121
+ ```
122
+
123
+ - `GET /` — single-page UI (prompt box, config picker, decision table, history)
124
+ - `GET /api/configs`, `POST /api/route` (`{prompt, config, task?}`),
125
+ `GET /api/history?limit=N`
126
+ - Every decision is logged to SQLite (`*.db`/`*.sqlite` are gitignored — the log
127
+ holds user prompts).
128
+
129
+ ## Model registry
130
+
131
+ One YAML per supported model, bundled under
132
+ `src/xrouter_llm/resources/config/models/` (capability profile: provider, costs,
133
+ context, published benchmarks as 0-100 percentages). `model_id` is the model's
134
+ canonical OpenRouter slug (e.g. `anthropic/claude-opus-4.8`). The bundled
135
+ registry is the default for `--benchmark-profiles`; point it at your own
136
+ directory or file to extend it. Add a model = add a file.
137
+
138
+ ```python
139
+ from xrouter_llm import IRTRouter, default_model_path, default_models_dir, load_benchmark_profiles
140
+
141
+ router = IRTRouter.load(default_model_path())
142
+ for profile in load_benchmark_profiles(default_models_dir()).profiles():
143
+ router.add_benchmark_profile(profile)
144
+
145
+ preds = router.predict("实现一个分布式一致性算法", model_ids=["claude-opus-4-8", "deepseek-v4-pro"])
146
+ print({p.model_id: round(p.mu, 3) for p in preds})
147
+ ```
148
+
149
+ ## License
150
+
151
+ `xrouter-llm` is released under the **Xagent Source License** (© Xorbits Inc.) —
152
+ see [LICENSE](LICENSE). It is source-available, **not** an OSI-approved open
153
+ source license.
154
+
155
+ The license text is shared verbatim with [Xagent](https://github.com/xorbitsai/xagent);
156
+ for this project the licensed "Software" is `xrouter-llm`, and the
157
+ "Restricted Functionality" / hosted-service and competitive-use clauses apply to
158
+ its routing-decision and model-selection capabilities. In short: use,
159
+ modification, and internal/single-tenant deployment are permitted; offering it as
160
+ a multi-tenant hosted/managed service, or a directly competing service, is not.
161
+ See [LICENSE](LICENSE) for the controlling terms.