thinkrouter 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thinkrouter-0.1.0/LICENSE +21 -0
- thinkrouter-0.1.0/PKG-INFO +322 -0
- thinkrouter-0.1.0/README.md +277 -0
- thinkrouter-0.1.0/pyproject.toml +84 -0
- thinkrouter-0.1.0/setup.cfg +4 -0
- thinkrouter-0.1.0/tests/test_thinkrouter.py +401 -0
- thinkrouter-0.1.0/thinkrouter/__init__.py +57 -0
- thinkrouter-0.1.0/thinkrouter/classifier.py +321 -0
- thinkrouter-0.1.0/thinkrouter/constants.py +48 -0
- thinkrouter-0.1.0/thinkrouter/py.typed +0 -0
- thinkrouter-0.1.0/thinkrouter/router.py +429 -0
- thinkrouter-0.1.0/thinkrouter/usage.py +182 -0
- thinkrouter-0.1.0/thinkrouter.egg-info/PKG-INFO +322 -0
- thinkrouter-0.1.0/thinkrouter.egg-info/SOURCES.txt +15 -0
- thinkrouter-0.1.0/thinkrouter.egg-info/dependency_links.txt +1 -0
- thinkrouter-0.1.0/thinkrouter.egg-info/requires.txt +23 -0
- thinkrouter-0.1.0/thinkrouter.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 ThinkRouter Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: thinkrouter
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Cut LLM reasoning-token costs by 60% with one line of code — pre-inference query difficulty routing.
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/thinkrouter/thinkrouter
|
|
7
|
+
Project-URL: Documentation, https://github.com/thinkrouter/thinkrouter#readme
|
|
8
|
+
Project-URL: Repository, https://github.com/thinkrouter/thinkrouter
|
|
9
|
+
Project-URL: Issues, https://github.com/thinkrouter/thinkrouter/issues
|
|
10
|
+
Project-URL: Changelog, https://github.com/thinkrouter/thinkrouter/blob/main/CHANGELOG.md
|
|
11
|
+
Keywords: llm,openai,anthropic,inference,optimization,routing,reasoning,cost-reduction,chain-of-thought,distilbert,nlp,machine-learning
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
+
Requires-Python: >=3.9
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Requires-Dist: httpx>=0.25.0
|
|
27
|
+
Requires-Dist: pydantic>=2.0.0
|
|
28
|
+
Requires-Dist: typing-extensions>=4.5.0
|
|
29
|
+
Provides-Extra: classifier
|
|
30
|
+
Requires-Dist: transformers>=4.40.0; extra == "classifier"
|
|
31
|
+
Requires-Dist: torch>=2.0.0; extra == "classifier"
|
|
32
|
+
Provides-Extra: openai
|
|
33
|
+
Requires-Dist: openai>=1.0.0; extra == "openai"
|
|
34
|
+
Provides-Extra: anthropic
|
|
35
|
+
Requires-Dist: anthropic>=0.25.0; extra == "anthropic"
|
|
36
|
+
Provides-Extra: all
|
|
37
|
+
Requires-Dist: thinkrouter[anthropic,classifier,openai]; extra == "all"
|
|
38
|
+
Provides-Extra: dev
|
|
39
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
40
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
|
|
41
|
+
Requires-Dist: black>=24.0.0; extra == "dev"
|
|
42
|
+
Requires-Dist: ruff>=0.4.0; extra == "dev"
|
|
43
|
+
Requires-Dist: mypy>=1.9.0; extra == "dev"
|
|
44
|
+
Dynamic: license-file
|
|
45
|
+
|
|
46
|
+
# ThinkRouter
|
|
47
|
+
|
|
48
|
+
[](https://github.com/thinkrouter/thinkrouter/actions)
|
|
49
|
+
[](https://pypi.org/project/thinkrouter)
|
|
50
|
+
[](https://pypi.org/project/thinkrouter)
|
|
51
|
+
[](LICENSE)
|
|
52
|
+
[](https://colab.research.google.com/YOUR_DEMO_LINK)
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
**Reasoning models charge you 8,000 thinking tokens for "What is 2+3?"**
|
|
57
|
+
**ThinkRouter fixes that with one line of code.**
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from thinkrouter import ThinkRouter
|
|
61
|
+
|
|
62
|
+
client = ThinkRouter(provider="openai")
|
|
63
|
+
response = client.chat("What is the capital of France?")
|
|
64
|
+
# → routed to NO_THINK — 50 tokens used, not 8,000
|
|
65
|
+
|
|
66
|
+
client.usage.print_dashboard()
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
```
|
|
70
|
+
ThinkRouter — Usage Dashboard
|
|
71
|
+
────────────────────────────────────────────
|
|
72
|
+
Total calls : 1,247
|
|
73
|
+
Tokens saved : 8,734,750
|
|
74
|
+
Compute savings : 61.3%
|
|
75
|
+
Avg classifier time : 0.4 ms
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
**Validated on 1,319 real queries (GSM8K benchmark):**
|
|
79
|
+
|
|
80
|
+
| Threshold | Savings | Quality retained |
|
|
81
|
+
|-----------|---------|-----------------|
|
|
82
|
+
| 0.65 | 59% | 91% |
|
|
83
|
+
| **0.75** | **55%** | **93%** ← recommended |
|
|
84
|
+
| 0.85 | 44% | 96% |
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## How it works
|
|
89
|
+
|
|
90
|
+
Every reasoning model call pays for a fixed extended thinking budget regardless
|
|
91
|
+
of the question's complexity. ThinkRouter intercepts each query, runs a
|
|
92
|
+
lightweight classifier (<5ms), and applies the minimum budget needed:
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
"What is 2+3?" → NO_THINK → 50 tokens
|
|
96
|
+
"How does TCP work?" → SHORT → 800 tokens
|
|
97
|
+
"Prove sqrt(2) is irrational" → FULL → 8,000 tokens
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
The classifier adds under 5ms per query. At production scale that overhead
|
|
101
|
+
is negligible compared to the token savings it generates.
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
## Installation
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
# Base install — works immediately, zero ML dependencies
|
|
109
|
+
pip install thinkrouter
|
|
110
|
+
|
|
111
|
+
# With fine-tuned DistilBERT classifier (higher accuracy)
|
|
112
|
+
pip install thinkrouter[classifier]
|
|
113
|
+
|
|
114
|
+
# With OpenAI client
|
|
115
|
+
pip install thinkrouter[openai]
|
|
116
|
+
|
|
117
|
+
# With Anthropic client
|
|
118
|
+
pip install thinkrouter[anthropic]
|
|
119
|
+
|
|
120
|
+
# Everything
|
|
121
|
+
pip install thinkrouter[all]
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## Quick start
|
|
127
|
+
|
|
128
|
+
### OpenAI
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
from thinkrouter import ThinkRouter
|
|
132
|
+
|
|
133
|
+
client = ThinkRouter(
|
|
134
|
+
provider="openai",
|
|
135
|
+
api_key="sk-...", # or set OPENAI_API_KEY
|
|
136
|
+
model="gpt-4o",
|
|
137
|
+
verbose=True, # prints routing decision per call
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
response = client.chat("Explain how merge sort works.")
|
|
141
|
+
print(response.content)
|
|
142
|
+
print(response.routing)
|
|
143
|
+
# ClassifierResult(tier=FULL, confidence=0.87, budget=8000 tokens, latency=1.2ms)
|
|
144
|
+
|
|
145
|
+
client.usage.print_dashboard()
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### Anthropic
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
client = ThinkRouter(
|
|
152
|
+
provider="anthropic",
|
|
153
|
+
api_key="sk-ant-...", # or set ANTHROPIC_API_KEY
|
|
154
|
+
model="claude-haiku-4-5-20251001",
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
response = client.chat("What is 144 divided by 12?")
|
|
158
|
+
# → NO_THINK — 50 tokens, not 8,000
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
### Streaming
|
|
162
|
+
|
|
163
|
+
```python
|
|
164
|
+
for chunk in client.stream("Explain quantum entanglement step by step."):
|
|
165
|
+
print(chunk, end="", flush=True)
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### Classify without an API call
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
results = client.classify_batch([
|
|
172
|
+
"What is 7 * 8?",
|
|
173
|
+
"Design a distributed caching system.",
|
|
174
|
+
"How many days are in a leap year?",
|
|
175
|
+
])
|
|
176
|
+
for r in results:
|
|
177
|
+
print(f"{r.tier.name:<12} conf={r.confidence:.2f} budget={r.token_budget}")
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
```
|
|
181
|
+
NO_THINK conf=0.88 budget=50
|
|
182
|
+
FULL conf=0.85 budget=8000
|
|
183
|
+
NO_THINK conf=0.80 budget=50
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
## Classifier backends
|
|
189
|
+
|
|
190
|
+
### Heuristic (default)
|
|
191
|
+
|
|
192
|
+
Zero external dependencies. Regex patterns + word-count heuristics.
|
|
193
|
+
Runs in under 1ms. Recommended for development and latency-sensitive production.
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
client = ThinkRouter(classifier_backend="heuristic")
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
### DistilBERT (production accuracy)
|
|
200
|
+
|
|
201
|
+
Fine-tuned on the GSM8K mathematical reasoning dataset.
|
|
202
|
+
Achieves 93%+ quality retention at 60% compute savings.
|
|
203
|
+
Requires `pip install thinkrouter[classifier]`.
|
|
204
|
+
|
|
205
|
+
```python
|
|
206
|
+
client = ThinkRouter(
|
|
207
|
+
classifier_backend="distilbert",
|
|
208
|
+
confidence_threshold=0.75,
|
|
209
|
+
)
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
---
|
|
213
|
+
|
|
214
|
+
## Confidence threshold
|
|
215
|
+
|
|
216
|
+
Queries where the classifier's confidence is below the threshold fall back
|
|
217
|
+
conservatively to FULL — the safe default that never degrades output quality.
|
|
218
|
+
|
|
219
|
+
```python
|
|
220
|
+
client = ThinkRouter(confidence_threshold=0.80) # more conservative
|
|
221
|
+
client = ThinkRouter(confidence_threshold=0.65) # more aggressive savings
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
---
|
|
225
|
+
|
|
226
|
+
## API reference
|
|
227
|
+
|
|
228
|
+
### ThinkRouter
|
|
229
|
+
|
|
230
|
+
```python
|
|
231
|
+
ThinkRouter(
|
|
232
|
+
provider = "openai", # "openai" | "anthropic" | "generic"
|
|
233
|
+
api_key = None, # falls back to env var
|
|
234
|
+
model = None, # default model for all calls
|
|
235
|
+
classifier_backend = "heuristic", # "heuristic" | "distilbert"
|
|
236
|
+
confidence_threshold = 0.75,
|
|
237
|
+
max_records = 10_000, # usage tracker record limit
|
|
238
|
+
verbose = False,
|
|
239
|
+
**client_kwargs, # passed to provider SDK client
|
|
240
|
+
)
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
### ThinkRouter.chat()
|
|
244
|
+
|
|
245
|
+
```python
|
|
246
|
+
response = client.chat(
|
|
247
|
+
query, # str — the user query
|
|
248
|
+
model = None, # override default model
|
|
249
|
+
messages = None, # full message history (list of dicts)
|
|
250
|
+
system = None, # system prompt
|
|
251
|
+
temperature = 0.7,
|
|
252
|
+
**extra, # forwarded to provider API
|
|
253
|
+
)
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
### RouterResponse
|
|
257
|
+
|
|
258
|
+
```python
|
|
259
|
+
response.content # str — generated text
|
|
260
|
+
response.routing # ClassifierResult
|
|
261
|
+
response.raw # original provider response object
|
|
262
|
+
response.provider # "openai" | "anthropic"
|
|
263
|
+
response.model # model identifier string
|
|
264
|
+
response.usage_tokens # {"prompt_tokens": N, "completion_tokens": M, ...}
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
### ClassifierResult
|
|
268
|
+
|
|
269
|
+
```python
|
|
270
|
+
result.tier # Tier.NO_THINK | Tier.SHORT | Tier.FULL
|
|
271
|
+
result.confidence # float in [0, 1]
|
|
272
|
+
result.token_budget # int — thinking tokens assigned
|
|
273
|
+
result.latency_ms # classifier wall-clock time
|
|
274
|
+
result.backend # "heuristic" | "distilbert:cuda" | "distilbert:cpu"
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
---
|
|
278
|
+
|
|
279
|
+
## Running the tests
|
|
280
|
+
|
|
281
|
+
```bash
|
|
282
|
+
pip install thinkrouter[dev]
|
|
283
|
+
pytest tests/ -v
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
---
|
|
287
|
+
|
|
288
|
+
## Roadmap
|
|
289
|
+
|
|
290
|
+
- [x] Heuristic classifier (v0.1)
|
|
291
|
+
- [x] OpenAI and Anthropic adapters
|
|
292
|
+
- [x] Streaming support
|
|
293
|
+
- [x] Usage dashboard
|
|
294
|
+
- [x] GitHub Actions CI (Python 3.9–3.12)
|
|
295
|
+
- [ ] DistilBERT model on HuggingFace Hub
|
|
296
|
+
- [ ] Multi-domain training (MMLU, HumanEval, ARC-Challenge)
|
|
297
|
+
- [ ] Async support (`achat()`, `astream()`)
|
|
298
|
+
- [ ] Continuous budget regression
|
|
299
|
+
- [ ] Hosted API proxy (api.thinkrouter.ai)
|
|
300
|
+
|
|
301
|
+
---
|
|
302
|
+
|
|
303
|
+
## Research basis
|
|
304
|
+
|
|
305
|
+
ThinkRouter is grounded in published research:
|
|
306
|
+
|
|
307
|
+
- Zhao et al. (2025). *SelfBudgeter*. arXiv:2505.11274 — 74.47% savings validated
|
|
308
|
+
- Wang et al. (2025). *TALE-EP*. ACL Findings 2025 — 67% output token reduction
|
|
309
|
+
- Sanh et al. (2019). *DistilBERT*. arXiv:1910.01108 — classifier backbone
|
|
310
|
+
- Cobbe et al. (2021). *GSM8K*. arXiv:2110.14168 — training dataset
|
|
311
|
+
|
|
312
|
+
---
|
|
313
|
+
|
|
314
|
+
## Contributing
|
|
315
|
+
|
|
316
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md). Issues and PRs welcome.
|
|
317
|
+
|
|
318
|
+
---
|
|
319
|
+
|
|
320
|
+
## License
|
|
321
|
+
|
|
322
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
# ThinkRouter
|
|
2
|
+
|
|
3
|
+
[](https://github.com/thinkrouter/thinkrouter/actions)
|
|
4
|
+
[](https://pypi.org/project/thinkrouter)
|
|
5
|
+
[](https://pypi.org/project/thinkrouter)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
[](https://colab.research.google.com/YOUR_DEMO_LINK)
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
**Reasoning models charge you 8,000 thinking tokens for "What is 2+3?"**
|
|
12
|
+
**ThinkRouter fixes that with one line of code.**
|
|
13
|
+
|
|
14
|
+
```python
|
|
15
|
+
from thinkrouter import ThinkRouter
|
|
16
|
+
|
|
17
|
+
client = ThinkRouter(provider="openai")
|
|
18
|
+
response = client.chat("What is the capital of France?")
|
|
19
|
+
# → routed to NO_THINK — 50 tokens used, not 8,000
|
|
20
|
+
|
|
21
|
+
client.usage.print_dashboard()
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
```
|
|
25
|
+
ThinkRouter — Usage Dashboard
|
|
26
|
+
────────────────────────────────────────────
|
|
27
|
+
Total calls : 1,247
|
|
28
|
+
Tokens saved : 8,734,750
|
|
29
|
+
Compute savings : 61.3%
|
|
30
|
+
Avg classifier time : 0.4 ms
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
**Validated on 1,319 real queries (GSM8K benchmark):**
|
|
34
|
+
|
|
35
|
+
| Threshold | Savings | Quality retained |
|
|
36
|
+
|-----------|---------|-----------------|
|
|
37
|
+
| 0.65 | 59% | 91% |
|
|
38
|
+
| **0.75** | **55%** | **93%** ← recommended |
|
|
39
|
+
| 0.85 | 44% | 96% |
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## How it works
|
|
44
|
+
|
|
45
|
+
Every reasoning model call pays for a fixed extended thinking budget regardless
|
|
46
|
+
of the question's complexity. ThinkRouter intercepts each query, runs a
|
|
47
|
+
lightweight classifier (<5ms), and applies the minimum budget needed:
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
"What is 2+3?" → NO_THINK → 50 tokens
|
|
51
|
+
"How does TCP work?" → SHORT → 800 tokens
|
|
52
|
+
"Prove sqrt(2) is irrational" → FULL → 8,000 tokens
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
The classifier adds under 5ms per query. At production scale that overhead
|
|
56
|
+
is negligible compared to the token savings it generates.
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## Installation
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
# Base install — works immediately, zero ML dependencies
|
|
64
|
+
pip install thinkrouter
|
|
65
|
+
|
|
66
|
+
# With fine-tuned DistilBERT classifier (higher accuracy)
|
|
67
|
+
pip install thinkrouter[classifier]
|
|
68
|
+
|
|
69
|
+
# With OpenAI client
|
|
70
|
+
pip install thinkrouter[openai]
|
|
71
|
+
|
|
72
|
+
# With Anthropic client
|
|
73
|
+
pip install thinkrouter[anthropic]
|
|
74
|
+
|
|
75
|
+
# Everything
|
|
76
|
+
pip install thinkrouter[all]
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## Quick start
|
|
82
|
+
|
|
83
|
+
### OpenAI
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from thinkrouter import ThinkRouter
|
|
87
|
+
|
|
88
|
+
client = ThinkRouter(
|
|
89
|
+
provider="openai",
|
|
90
|
+
api_key="sk-...", # or set OPENAI_API_KEY
|
|
91
|
+
model="gpt-4o",
|
|
92
|
+
verbose=True, # prints routing decision per call
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
response = client.chat("Explain how merge sort works.")
|
|
96
|
+
print(response.content)
|
|
97
|
+
print(response.routing)
|
|
98
|
+
# ClassifierResult(tier=FULL, confidence=0.87, budget=8000 tokens, latency=1.2ms)
|
|
99
|
+
|
|
100
|
+
client.usage.print_dashboard()
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Anthropic
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
client = ThinkRouter(
|
|
107
|
+
provider="anthropic",
|
|
108
|
+
api_key="sk-ant-...", # or set ANTHROPIC_API_KEY
|
|
109
|
+
model="claude-haiku-4-5-20251001",
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
response = client.chat("What is 144 divided by 12?")
|
|
113
|
+
# → NO_THINK — 50 tokens, not 8,000
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### Streaming
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
for chunk in client.stream("Explain quantum entanglement step by step."):
|
|
120
|
+
print(chunk, end="", flush=True)
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Classify without an API call
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
results = client.classify_batch([
|
|
127
|
+
"What is 7 * 8?",
|
|
128
|
+
"Design a distributed caching system.",
|
|
129
|
+
"How many days are in a leap year?",
|
|
130
|
+
])
|
|
131
|
+
for r in results:
|
|
132
|
+
print(f"{r.tier.name:<12} conf={r.confidence:.2f} budget={r.token_budget}")
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
```
|
|
136
|
+
NO_THINK conf=0.88 budget=50
|
|
137
|
+
FULL conf=0.85 budget=8000
|
|
138
|
+
NO_THINK conf=0.80 budget=50
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
## Classifier backends
|
|
144
|
+
|
|
145
|
+
### Heuristic (default)
|
|
146
|
+
|
|
147
|
+
Zero external dependencies. Regex patterns + word-count heuristics.
|
|
148
|
+
Runs in under 1ms. Recommended for development and latency-sensitive production.
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
client = ThinkRouter(classifier_backend="heuristic")
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### DistilBERT (production accuracy)
|
|
155
|
+
|
|
156
|
+
Fine-tuned on the GSM8K mathematical reasoning dataset.
|
|
157
|
+
Achieves 93%+ quality retention at 60% compute savings.
|
|
158
|
+
Requires `pip install thinkrouter[classifier]`.
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
client = ThinkRouter(
|
|
162
|
+
classifier_backend="distilbert",
|
|
163
|
+
confidence_threshold=0.75,
|
|
164
|
+
)
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## Confidence threshold
|
|
170
|
+
|
|
171
|
+
Queries where the classifier's confidence is below the threshold fall back
|
|
172
|
+
conservatively to FULL — the safe default that never degrades output quality.
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
client = ThinkRouter(confidence_threshold=0.80) # more conservative
|
|
176
|
+
client = ThinkRouter(confidence_threshold=0.65) # more aggressive savings
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
---
|
|
180
|
+
|
|
181
|
+
## API reference
|
|
182
|
+
|
|
183
|
+
### ThinkRouter
|
|
184
|
+
|
|
185
|
+
```python
|
|
186
|
+
ThinkRouter(
|
|
187
|
+
provider = "openai", # "openai" | "anthropic" | "generic"
|
|
188
|
+
api_key = None, # falls back to env var
|
|
189
|
+
model = None, # default model for all calls
|
|
190
|
+
classifier_backend = "heuristic", # "heuristic" | "distilbert"
|
|
191
|
+
confidence_threshold = 0.75,
|
|
192
|
+
max_records = 10_000, # usage tracker record limit
|
|
193
|
+
verbose = False,
|
|
194
|
+
**client_kwargs, # passed to provider SDK client
|
|
195
|
+
)
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
### ThinkRouter.chat()
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
response = client.chat(
|
|
202
|
+
query, # str — the user query
|
|
203
|
+
model = None, # override default model
|
|
204
|
+
messages = None, # full message history (list of dicts)
|
|
205
|
+
system = None, # system prompt
|
|
206
|
+
temperature = 0.7,
|
|
207
|
+
**extra, # forwarded to provider API
|
|
208
|
+
)
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
### RouterResponse
|
|
212
|
+
|
|
213
|
+
```python
|
|
214
|
+
response.content # str — generated text
|
|
215
|
+
response.routing # ClassifierResult
|
|
216
|
+
response.raw # original provider response object
|
|
217
|
+
response.provider # "openai" | "anthropic"
|
|
218
|
+
response.model # model identifier string
|
|
219
|
+
response.usage_tokens # {"prompt_tokens": N, "completion_tokens": M, ...}
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
### ClassifierResult
|
|
223
|
+
|
|
224
|
+
```python
|
|
225
|
+
result.tier # Tier.NO_THINK | Tier.SHORT | Tier.FULL
|
|
226
|
+
result.confidence # float in [0, 1]
|
|
227
|
+
result.token_budget # int — thinking tokens assigned
|
|
228
|
+
result.latency_ms # classifier wall-clock time
|
|
229
|
+
result.backend # "heuristic" | "distilbert:cuda" | "distilbert:cpu"
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
---
|
|
233
|
+
|
|
234
|
+
## Running the tests
|
|
235
|
+
|
|
236
|
+
```bash
|
|
237
|
+
pip install thinkrouter[dev]
|
|
238
|
+
pytest tests/ -v
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
---
|
|
242
|
+
|
|
243
|
+
## Roadmap
|
|
244
|
+
|
|
245
|
+
- [x] Heuristic classifier (v0.1)
|
|
246
|
+
- [x] OpenAI and Anthropic adapters
|
|
247
|
+
- [x] Streaming support
|
|
248
|
+
- [x] Usage dashboard
|
|
249
|
+
- [x] GitHub Actions CI (Python 3.9–3.12)
|
|
250
|
+
- [ ] DistilBERT model on HuggingFace Hub
|
|
251
|
+
- [ ] Multi-domain training (MMLU, HumanEval, ARC-Challenge)
|
|
252
|
+
- [ ] Async support (`achat()`, `astream()`)
|
|
253
|
+
- [ ] Continuous budget regression
|
|
254
|
+
- [ ] Hosted API proxy (api.thinkrouter.ai)
|
|
255
|
+
|
|
256
|
+
---
|
|
257
|
+
|
|
258
|
+
## Research basis
|
|
259
|
+
|
|
260
|
+
ThinkRouter is grounded in published research:
|
|
261
|
+
|
|
262
|
+
- Zhao et al. (2025). *SelfBudgeter*. arXiv:2505.11274 — 74.47% savings validated
|
|
263
|
+
- Wang et al. (2025). *TALE-EP*. ACL Findings 2025 — 67% output token reduction
|
|
264
|
+
- Sanh et al. (2019). *DistilBERT*. arXiv:1910.01108 — classifier backbone
|
|
265
|
+
- Cobbe et al. (2021). *GSM8K*. arXiv:2110.14168 — training dataset
|
|
266
|
+
|
|
267
|
+
---
|
|
268
|
+
|
|
269
|
+
## Contributing
|
|
270
|
+
|
|
271
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md). Issues and PRs welcome.
|
|
272
|
+
|
|
273
|
+
---
|
|
274
|
+
|
|
275
|
+
## License
|
|
276
|
+
|
|
277
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "thinkrouter"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Cut LLM reasoning-token costs by 60% with one line of code — pre-inference query difficulty routing."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
keywords = [
|
|
13
|
+
"llm", "openai", "anthropic", "inference", "optimization",
|
|
14
|
+
"routing", "reasoning", "cost-reduction", "chain-of-thought",
|
|
15
|
+
"distilbert", "nlp", "machine-learning"
|
|
16
|
+
]
|
|
17
|
+
classifiers = [
|
|
18
|
+
"Development Status :: 3 - Alpha",
|
|
19
|
+
"Intended Audience :: Developers",
|
|
20
|
+
"Intended Audience :: Science/Research",
|
|
21
|
+
"License :: OSI Approved :: MIT License",
|
|
22
|
+
"Programming Language :: Python :: 3",
|
|
23
|
+
"Programming Language :: Python :: 3.9",
|
|
24
|
+
"Programming Language :: Python :: 3.10",
|
|
25
|
+
"Programming Language :: Python :: 3.11",
|
|
26
|
+
"Programming Language :: Python :: 3.12",
|
|
27
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
28
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
29
|
+
]
|
|
30
|
+
dependencies = [
|
|
31
|
+
"httpx>=0.25.0",
|
|
32
|
+
"pydantic>=2.0.0",
|
|
33
|
+
"typing-extensions>=4.5.0",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
[project.optional-dependencies]
|
|
37
|
+
classifier = [
|
|
38
|
+
"transformers>=4.40.0",
|
|
39
|
+
"torch>=2.0.0",
|
|
40
|
+
]
|
|
41
|
+
openai = [
|
|
42
|
+
"openai>=1.0.0",
|
|
43
|
+
]
|
|
44
|
+
anthropic = [
|
|
45
|
+
"anthropic>=0.25.0",
|
|
46
|
+
]
|
|
47
|
+
all = [
|
|
48
|
+
"thinkrouter[classifier,openai,anthropic]",
|
|
49
|
+
]
|
|
50
|
+
dev = [
|
|
51
|
+
"pytest>=8.0.0",
|
|
52
|
+
"pytest-asyncio>=0.23.0",
|
|
53
|
+
"black>=24.0.0",
|
|
54
|
+
"ruff>=0.4.0",
|
|
55
|
+
"mypy>=1.9.0",
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
[project.urls]
|
|
59
|
+
Homepage = "https://github.com/thinkrouter/thinkrouter"
|
|
60
|
+
Documentation = "https://github.com/thinkrouter/thinkrouter#readme"
|
|
61
|
+
Repository = "https://github.com/thinkrouter/thinkrouter"
|
|
62
|
+
Issues = "https://github.com/thinkrouter/thinkrouter/issues"
|
|
63
|
+
Changelog = "https://github.com/thinkrouter/thinkrouter/blob/main/CHANGELOG.md"
|
|
64
|
+
|
|
65
|
+
[tool.setuptools.packages.find]
|
|
66
|
+
where = ["."]
|
|
67
|
+
include = ["thinkrouter*"]
|
|
68
|
+
|
|
69
|
+
[tool.setuptools.package-data]
|
|
70
|
+
thinkrouter = ["py.typed"]
|
|
71
|
+
|
|
72
|
+
[tool.ruff]
|
|
73
|
+
line-length = 100
|
|
74
|
+
target-version = "py39"
|
|
75
|
+
select = ["E", "F", "I", "N", "W"]
|
|
76
|
+
|
|
77
|
+
[tool.mypy]
|
|
78
|
+
python_version = "3.9"
|
|
79
|
+
strict = true
|
|
80
|
+
ignore_missing_imports = true
|
|
81
|
+
|
|
82
|
+
[tool.pytest.ini_options]
|
|
83
|
+
testpaths = ["tests"]
|
|
84
|
+
python_files = ["test_*.py"]
|