tokenable 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tokenable/__init__.py +3 -0
- tokenable/__main__.py +5 -0
- tokenable/calibration/__init__.py +129 -0
- tokenable/calibration/providers.py +202 -0
- tokenable/cli/__init__.py +695 -0
- tokenable/config/__init__.py +155 -0
- tokenable/enforcer/__init__.py +124 -0
- tokenable/estimator/__init__.py +192 -0
- tokenable/fixer/__init__.py +101 -0
- tokenable/mcp/__init__.py +249 -0
- tokenable/models/__init__.py +308 -0
- tokenable/pricing_sync.py +145 -0
- tokenable/providers/__init__.py +485 -0
- tokenable/providers/data/anthropic.json +452 -0
- tokenable/providers/data/benchmarks.json +324 -0
- tokenable/providers/data/google.json +318 -0
- tokenable/providers/data/openai.json +507 -0
- tokenable/providers/data/perplexity.json +88 -0
- tokenable/providers/data/xai.json +263 -0
- tokenable/py.typed +1 -0
- tokenable/recommender/__init__.py +209 -0
- tokenable/scanner/__init__.py +303 -0
- tokenable/telemetry/__init__.py +92 -0
- tokenable/utils/__init__.py +19 -0
- tokenable-1.0.0.dist-info/METADATA +196 -0
- tokenable-1.0.0.dist-info/RECORD +29 -0
- tokenable-1.0.0.dist-info/WHEEL +4 -0
- tokenable-1.0.0.dist-info/entry_points.txt +2 -0
- tokenable-1.0.0.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
"""MCP Server — Model Context Protocol integration for AI agents."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import sys
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from tokenable.models import Provider
|
|
10
|
+
from tokenable.providers import (
|
|
11
|
+
calculate_cost,
|
|
12
|
+
get_all_models,
|
|
13
|
+
get_all_providers,
|
|
14
|
+
get_min_quality_score,
|
|
15
|
+
get_model,
|
|
16
|
+
infer_required_capabilities,
|
|
17
|
+
suggest_alternatives,
|
|
18
|
+
suggest_model_for_task,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# ── Tool handlers ────────────────────────────────────────────────────
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def handle_suggest_model(
|
|
25
|
+
task: str, provider: str | None = None, max_cost_per_million_tokens: float | None = None
|
|
26
|
+
) -> dict[str, Any]:
|
|
27
|
+
"""Suggest the cheapest capable model for a task."""
|
|
28
|
+
prov = Provider(provider) if provider else None
|
|
29
|
+
suggestion = suggest_model_for_task(
|
|
30
|
+
task, provider=prov, max_cost_per_million=max_cost_per_million_tokens
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
if not suggestion:
|
|
34
|
+
return {"error": "No model found matching the inferred capabilities for this task."}
|
|
35
|
+
|
|
36
|
+
alts = suggest_alternatives(
|
|
37
|
+
suggestion.model.id, suggestion.model.provider, suggestion.inferred_capabilities
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
rec_quality = get_min_quality_score(
|
|
41
|
+
suggestion.model.provider, suggestion.model.id, suggestion.inferred_capabilities
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
return {
|
|
45
|
+
"recommended": {
|
|
46
|
+
"provider": suggestion.model.provider.value,
|
|
47
|
+
"model": suggestion.model.id,
|
|
48
|
+
"name": suggestion.model.name,
|
|
49
|
+
"inputCostPerMillion": suggestion.model.input_cost_per_million,
|
|
50
|
+
"outputCostPerMillion": suggestion.model.output_cost_per_million,
|
|
51
|
+
"capabilities": [c.value for c in suggestion.model.capabilities],
|
|
52
|
+
**({"qualityScore": rec_quality} if rec_quality is not None else {}),
|
|
53
|
+
},
|
|
54
|
+
"alternatives": [
|
|
55
|
+
{
|
|
56
|
+
"provider": a.model.provider.value,
|
|
57
|
+
"model": a.model.id,
|
|
58
|
+
"name": a.model.name,
|
|
59
|
+
"outputCostPerMillion": a.model.output_cost_per_million,
|
|
60
|
+
"savingsPercent": a.savings_percent,
|
|
61
|
+
**({"qualityScore": a.quality_score} if a.quality_score is not None else {}),
|
|
62
|
+
}
|
|
63
|
+
for a in alts
|
|
64
|
+
],
|
|
65
|
+
"inferredCapabilities": [c.value for c in suggestion.inferred_capabilities],
|
|
66
|
+
"reasoning": suggestion.reasoning,
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def handle_estimate_cost(
|
|
71
|
+
provider: str,
|
|
72
|
+
model: str,
|
|
73
|
+
input_tokens: int,
|
|
74
|
+
output_tokens: int,
|
|
75
|
+
requests_per_day: int | None = None,
|
|
76
|
+
use_batch: bool = False,
|
|
77
|
+
use_cache: bool = False,
|
|
78
|
+
) -> dict[str, Any]:
|
|
79
|
+
"""Estimate cost for an LLM API call."""
|
|
80
|
+
try:
|
|
81
|
+
prov = Provider(provider)
|
|
82
|
+
except ValueError:
|
|
83
|
+
return {
|
|
84
|
+
"error": f"Unknown provider '{provider}'. Valid: {[p.value for p in get_all_providers()]}"
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
pricing = get_model(prov, model)
|
|
88
|
+
if not pricing:
|
|
89
|
+
return {"error": f"Unknown model '{model}' for provider '{provider}'."}
|
|
90
|
+
|
|
91
|
+
cost = calculate_cost(
|
|
92
|
+
pricing,
|
|
93
|
+
input_tokens,
|
|
94
|
+
output_tokens,
|
|
95
|
+
cached_input_tokens=input_tokens if use_cache else 0,
|
|
96
|
+
use_batch=use_batch,
|
|
97
|
+
)
|
|
98
|
+
monthly = cost * requests_per_day * 30 if requests_per_day else None
|
|
99
|
+
|
|
100
|
+
return {
|
|
101
|
+
"provider": provider,
|
|
102
|
+
"model": pricing.id,
|
|
103
|
+
"modelName": pricing.name,
|
|
104
|
+
"inputTokens": input_tokens,
|
|
105
|
+
"outputTokens": output_tokens,
|
|
106
|
+
"costPerCall": cost,
|
|
107
|
+
"monthlyCost": monthly,
|
|
108
|
+
"requestsPerDay": requests_per_day,
|
|
109
|
+
"pricing": {
|
|
110
|
+
"inputCostPerMillion": pricing.input_cost_per_million,
|
|
111
|
+
"outputCostPerMillion": pricing.output_cost_per_million,
|
|
112
|
+
},
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def handle_audit(directory: str, volume: int = 1000) -> dict[str, Any]:
|
|
117
|
+
"""Scan directory and return cost estimates with recommendations."""
|
|
118
|
+
from tokenable.config import load_config
|
|
119
|
+
from tokenable.estimator import build_estimate_rows
|
|
120
|
+
from tokenable.recommender import detect_smart_alternatives
|
|
121
|
+
from tokenable.scanner import scan_directory
|
|
122
|
+
|
|
123
|
+
config = load_config()
|
|
124
|
+
results = scan_directory(directory, config.ignore)
|
|
125
|
+
rows, unknown_models = build_estimate_rows(results, config, volume, False, None, None)
|
|
126
|
+
total = sum(r.monthly_cost for r in rows)
|
|
127
|
+
|
|
128
|
+
# Get recommendations
|
|
129
|
+
findings = detect_smart_alternatives(results, volume)
|
|
130
|
+
recommendations = [
|
|
131
|
+
{
|
|
132
|
+
"file": f.file,
|
|
133
|
+
"line": f.line,
|
|
134
|
+
"currentProvider": f.current_provider.value,
|
|
135
|
+
"currentModel": f.current_model,
|
|
136
|
+
"suggestedProvider": f.suggested_provider.value,
|
|
137
|
+
"suggestedModel": f.suggested_model,
|
|
138
|
+
"reasoning": f.reasoning,
|
|
139
|
+
"savingsPercent": round((f.monthly_savings / f.current_monthly_cost) * 100)
|
|
140
|
+
if f.current_monthly_cost > 0
|
|
141
|
+
else 0,
|
|
142
|
+
}
|
|
143
|
+
for f in findings
|
|
144
|
+
]
|
|
145
|
+
|
|
146
|
+
return {
|
|
147
|
+
"directory": directory,
|
|
148
|
+
"volume": volume,
|
|
149
|
+
"totalMonthlyCost": total,
|
|
150
|
+
"callSites": len(rows),
|
|
151
|
+
"unknownModels": list(unknown_models),
|
|
152
|
+
"recommendations": recommendations,
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def handle_apply_recommendations(
|
|
157
|
+
directory: str,
|
|
158
|
+
volume: int = 1000,
|
|
159
|
+
dry_run: bool = False,
|
|
160
|
+
recommendations: list[dict] | None = None,
|
|
161
|
+
) -> dict[str, Any]:
|
|
162
|
+
"""Apply model swap recommendations to source files."""
|
|
163
|
+
from tokenable.fixer import apply_recommendations
|
|
164
|
+
from tokenable.models import ModelSwap
|
|
165
|
+
|
|
166
|
+
if recommendations:
|
|
167
|
+
swaps = [
|
|
168
|
+
ModelSwap(
|
|
169
|
+
file=r["file"],
|
|
170
|
+
line=r["line"],
|
|
171
|
+
current_model=r["currentModel"],
|
|
172
|
+
suggested_model=r["suggestedModel"],
|
|
173
|
+
)
|
|
174
|
+
for r in recommendations
|
|
175
|
+
]
|
|
176
|
+
else:
|
|
177
|
+
# Auto-detect from audit
|
|
178
|
+
audit_result = handle_audit(directory, volume)
|
|
179
|
+
swaps = [
|
|
180
|
+
ModelSwap(
|
|
181
|
+
file=r["file"],
|
|
182
|
+
line=r["line"],
|
|
183
|
+
current_model=r["currentModel"],
|
|
184
|
+
suggested_model=r["suggestedModel"],
|
|
185
|
+
)
|
|
186
|
+
for r in audit_result.get("recommendations", [])
|
|
187
|
+
]
|
|
188
|
+
|
|
189
|
+
result = apply_recommendations(swaps, directory, dry_run)
|
|
190
|
+
return result.model_dump()
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# ── MCP Server (stdio JSON-RPC) ─────────────────────────────────────
|
|
194
|
+
|
|
195
|
+
TOOLS = {
|
|
196
|
+
"suggest_model": {
|
|
197
|
+
"description": "Suggest the cheapest LLM model capable of handling a given task.",
|
|
198
|
+
"handler": handle_suggest_model,
|
|
199
|
+
},
|
|
200
|
+
"estimate_cost": {
|
|
201
|
+
"description": "Estimate the cost of an LLM API call.",
|
|
202
|
+
"handler": handle_estimate_cost,
|
|
203
|
+
},
|
|
204
|
+
"audit": {
|
|
205
|
+
"description": "Scan a directory for LLM API calls and suggest cheaper models.",
|
|
206
|
+
"handler": handle_audit,
|
|
207
|
+
},
|
|
208
|
+
"apply_recommendations": {
|
|
209
|
+
"description": "Apply model swap recommendations to source files.",
|
|
210
|
+
"handler": handle_apply_recommendations,
|
|
211
|
+
},
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def run_server() -> None:
|
|
216
|
+
"""Run MCP server on stdio (JSON-RPC)."""
|
|
217
|
+
sys.stderr.write("TokEnable MCP server running on stdio\n")
|
|
218
|
+
|
|
219
|
+
for line in sys.stdin:
|
|
220
|
+
line = line.strip()
|
|
221
|
+
if not line:
|
|
222
|
+
continue
|
|
223
|
+
try:
|
|
224
|
+
request = json.loads(line)
|
|
225
|
+
method = request.get("method", "")
|
|
226
|
+
params = request.get("params", {})
|
|
227
|
+
req_id = request.get("id")
|
|
228
|
+
|
|
229
|
+
if method == "tools/list":
|
|
230
|
+
result = [
|
|
231
|
+
{"name": name, "description": info["description"]}
|
|
232
|
+
for name, info in TOOLS.items()
|
|
233
|
+
]
|
|
234
|
+
elif method == "tools/call":
|
|
235
|
+
tool_name = params.get("name", "")
|
|
236
|
+
tool_args = params.get("arguments", {})
|
|
237
|
+
if tool_name in TOOLS:
|
|
238
|
+
result = TOOLS[tool_name]["handler"](**tool_args)
|
|
239
|
+
else:
|
|
240
|
+
result = {"error": f"Unknown tool: {tool_name}"}
|
|
241
|
+
else:
|
|
242
|
+
result = {"error": f"Unknown method: {method}"}
|
|
243
|
+
|
|
244
|
+
response = {"jsonrpc": "2.0", "id": req_id, "result": result}
|
|
245
|
+
except Exception as e:
|
|
246
|
+
response = {"jsonrpc": "2.0", "id": None, "error": {"message": str(e)}}
|
|
247
|
+
|
|
248
|
+
sys.stdout.write(json.dumps(response) + "\n")
|
|
249
|
+
sys.stdout.flush()
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
"""Pydantic data models for TokEnable."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from enum import Enum, StrEnum
|
|
6
|
+
from typing import Literal
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel, Field
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Provider(StrEnum):
|
|
12
|
+
"""Supported LLM providers."""
|
|
13
|
+
|
|
14
|
+
ANTHROPIC = "anthropic"
|
|
15
|
+
OPENAI = "openai"
|
|
16
|
+
GOOGLE = "google"
|
|
17
|
+
XAI = "xai"
|
|
18
|
+
PERPLEXITY = "perplexity"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ModelTier(StrEnum):
|
|
22
|
+
"""Cost tier derived from output pricing."""
|
|
23
|
+
|
|
24
|
+
BUDGET = "budget"
|
|
25
|
+
MID = "mid"
|
|
26
|
+
PREMIUM = "premium"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ModelStatus(StrEnum):
|
|
30
|
+
"""Model availability status."""
|
|
31
|
+
|
|
32
|
+
CURRENT = "current"
|
|
33
|
+
LEGACY = "legacy"
|
|
34
|
+
DEPRECATED = "deprecated"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class Capability(StrEnum):
|
|
38
|
+
"""Model capabilities."""
|
|
39
|
+
|
|
40
|
+
CODE = "code"
|
|
41
|
+
REASONING = "reasoning"
|
|
42
|
+
GENERAL = "general"
|
|
43
|
+
CREATIVE = "creative"
|
|
44
|
+
VISION = "vision"
|
|
45
|
+
SEARCH = "search"
|
|
46
|
+
AUDIO = "audio"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class ModelPricing(BaseModel):
|
|
50
|
+
"""Pricing and metadata for a single model."""
|
|
51
|
+
|
|
52
|
+
id: str
|
|
53
|
+
name: str
|
|
54
|
+
provider: Provider
|
|
55
|
+
aliases: list[str] = Field(default_factory=list)
|
|
56
|
+
status: ModelStatus = ModelStatus.CURRENT
|
|
57
|
+
input_cost_per_million: float
|
|
58
|
+
output_cost_per_million: float
|
|
59
|
+
cache_read_input_cost_per_million: float | None = None
|
|
60
|
+
cache_write_input_cost_per_million: float | None = None
|
|
61
|
+
batch_input_cost_per_million: float | None = None
|
|
62
|
+
batch_output_cost_per_million: float | None = None
|
|
63
|
+
fast_input_cost_per_million: float | None = None
|
|
64
|
+
fast_output_cost_per_million: float | None = None
|
|
65
|
+
input_cost_above_200k_per_million: float | None = None
|
|
66
|
+
output_cost_above_200k_per_million: float | None = None
|
|
67
|
+
context_window: int
|
|
68
|
+
max_output_tokens: int
|
|
69
|
+
supports_vision: bool = False
|
|
70
|
+
supports_tools: bool = False
|
|
71
|
+
supports_prompt_caching: bool = False
|
|
72
|
+
supports_reasoning: bool = False
|
|
73
|
+
supports_computer_use: bool = False
|
|
74
|
+
tier: ModelTier = ModelTier.MID
|
|
75
|
+
capabilities: list[Capability] = Field(default_factory=list)
|
|
76
|
+
knowledge_cutoff: str | None = None
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class ScanResult(BaseModel):
|
|
80
|
+
"""A single detected LLM API call site."""
|
|
81
|
+
|
|
82
|
+
file_path: str
|
|
83
|
+
line_number: int
|
|
84
|
+
provider: Provider
|
|
85
|
+
model: str | None = None
|
|
86
|
+
system_prompt: str | None = None
|
|
87
|
+
user_prompt: str | None = None
|
|
88
|
+
max_output_tokens: int | None = None
|
|
89
|
+
is_dynamic: bool = False
|
|
90
|
+
framework: str = ""
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class TokenSource(StrEnum):
|
|
94
|
+
"""How token counts were determined."""
|
|
95
|
+
|
|
96
|
+
CODE = "code"
|
|
97
|
+
MODEL_LIMIT = "model_limit"
|
|
98
|
+
TYPICAL = "typical"
|
|
99
|
+
PRODUCTION = "production"
|
|
100
|
+
CALIBRATED = "calibrated"
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class EstimateRow(BaseModel):
|
|
104
|
+
"""Cost estimate for a single call site."""
|
|
105
|
+
|
|
106
|
+
file: str
|
|
107
|
+
line: int
|
|
108
|
+
provider: str
|
|
109
|
+
model: str
|
|
110
|
+
input_tokens: int
|
|
111
|
+
input_token_source: TokenSource
|
|
112
|
+
output_tokens: int
|
|
113
|
+
output_token_source: TokenSource
|
|
114
|
+
cost_per_call: float
|
|
115
|
+
monthly_cost: float
|
|
116
|
+
system_prompt: str | None = None
|
|
117
|
+
user_prompt: str | None = None
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class EstimateSummary(BaseModel):
|
|
121
|
+
"""Summary of all cost estimates."""
|
|
122
|
+
|
|
123
|
+
rows: list[EstimateRow]
|
|
124
|
+
total_monthly_cost: float
|
|
125
|
+
volume: int
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class ModelCalibration(BaseModel):
|
|
129
|
+
"""Calibration factors for a single model."""
|
|
130
|
+
|
|
131
|
+
input_ratio: float = Field(ge=0.001, le=100)
|
|
132
|
+
output_ratio: float = Field(ge=0.001, le=100)
|
|
133
|
+
sample_size: int = Field(ge=0)
|
|
134
|
+
confidence: Literal["low", "medium", "high"]
|
|
135
|
+
actual_avg_input: float = Field(ge=0)
|
|
136
|
+
actual_avg_output: float = Field(ge=0)
|
|
137
|
+
estimated_avg_input: float = Field(ge=0)
|
|
138
|
+
estimated_avg_output: float = Field(ge=0)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class CalibrationData(BaseModel):
|
|
142
|
+
"""Stored calibration data."""
|
|
143
|
+
|
|
144
|
+
version: Literal[1] = 1
|
|
145
|
+
calibrated_at: str
|
|
146
|
+
models: dict[str, ModelCalibration] = Field(default_factory=dict)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class ModelStats(BaseModel):
|
|
150
|
+
"""Production usage stats for a model."""
|
|
151
|
+
|
|
152
|
+
provider: str
|
|
153
|
+
model: str
|
|
154
|
+
request_count: int
|
|
155
|
+
avg_input_tokens: float
|
|
156
|
+
avg_output_tokens: float
|
|
157
|
+
p50_input_tokens: float
|
|
158
|
+
p50_output_tokens: float
|
|
159
|
+
avg_cost_per_request: float = 0.0
|
|
160
|
+
total_cost: float = 0.0
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class ProviderUsageRecord(BaseModel):
|
|
164
|
+
"""Usage data for a single model from a provider API."""
|
|
165
|
+
|
|
166
|
+
model: str
|
|
167
|
+
request_count: int
|
|
168
|
+
total_input_tokens: int
|
|
169
|
+
total_output_tokens: int
|
|
170
|
+
avg_input_tokens: int
|
|
171
|
+
avg_output_tokens: int
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class ProviderUsageResult(BaseModel):
|
|
175
|
+
"""Usage data from a provider API."""
|
|
176
|
+
|
|
177
|
+
provider: Provider
|
|
178
|
+
records: list[ProviderUsageRecord]
|
|
179
|
+
period_start: str
|
|
180
|
+
period_end: str
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
TelemetryBackend = Literal["otlp", "grafana-tempo", "inferwise-cloud"]
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class TelemetryConfig(BaseModel):
|
|
187
|
+
"""Telemetry backend configuration."""
|
|
188
|
+
|
|
189
|
+
backend: TelemetryBackend
|
|
190
|
+
endpoint: str
|
|
191
|
+
headers: dict[str, str] = Field(default_factory=dict)
|
|
192
|
+
api_key: str | None = None
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class BudgetConfig(BaseModel):
|
|
196
|
+
"""Budget thresholds for enforcement."""
|
|
197
|
+
|
|
198
|
+
warn: float | None = None
|
|
199
|
+
block: float | None = None
|
|
200
|
+
require_approval: float | None = None
|
|
201
|
+
approvers: list[str] = Field(default_factory=list)
|
|
202
|
+
max_monthly_cost: float | None = None
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
class OverrideConfig(BaseModel):
|
|
206
|
+
"""Volume override for a file pattern."""
|
|
207
|
+
|
|
208
|
+
pattern: str
|
|
209
|
+
volume: int | None = None
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
class TokEnableConfig(BaseModel):
|
|
213
|
+
"""Main configuration file schema."""
|
|
214
|
+
|
|
215
|
+
default_volume: int | None = None
|
|
216
|
+
ignore: list[str] = Field(default_factory=list)
|
|
217
|
+
overrides: list[OverrideConfig] = Field(default_factory=list)
|
|
218
|
+
budgets: BudgetConfig | None = None
|
|
219
|
+
telemetry: TelemetryConfig | None = None
|
|
220
|
+
# Deprecated fields
|
|
221
|
+
api_url: str | None = None
|
|
222
|
+
api_key: str | None = None
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
class ModelSwap(BaseModel):
|
|
226
|
+
"""A model swap recommendation."""
|
|
227
|
+
|
|
228
|
+
file: str
|
|
229
|
+
line: int
|
|
230
|
+
current_model: str
|
|
231
|
+
suggested_model: str
|
|
232
|
+
monthly_savings: float = 0.0
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
class AppliedSwap(BaseModel):
|
|
236
|
+
"""A successfully applied model swap."""
|
|
237
|
+
|
|
238
|
+
file: str
|
|
239
|
+
line: int
|
|
240
|
+
from_model: str
|
|
241
|
+
to_model: str
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
class SkippedSwap(BaseModel):
|
|
245
|
+
"""A skipped model swap with reason."""
|
|
246
|
+
|
|
247
|
+
file: str
|
|
248
|
+
line: int
|
|
249
|
+
from_model: str
|
|
250
|
+
to_model: str
|
|
251
|
+
reason: str
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
class ApplyResult(BaseModel):
|
|
255
|
+
"""Result of applying model swaps."""
|
|
256
|
+
|
|
257
|
+
applied: list[AppliedSwap] = Field(default_factory=list)
|
|
258
|
+
skipped: list[SkippedSwap] = Field(default_factory=list)
|
|
259
|
+
total_applied: int = 0
|
|
260
|
+
total_skipped: int = 0
|
|
261
|
+
estimated_monthly_savings: float = 0.0
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
class SmartAlternativeFinding(BaseModel):
|
|
265
|
+
"""A smart model alternative recommendation."""
|
|
266
|
+
|
|
267
|
+
type: Literal["smart-alternative"] = "smart-alternative"
|
|
268
|
+
file: str
|
|
269
|
+
line: int
|
|
270
|
+
current_provider: Provider
|
|
271
|
+
current_model: str
|
|
272
|
+
current_monthly_cost: float
|
|
273
|
+
suggested_provider: Provider
|
|
274
|
+
suggested_model: str
|
|
275
|
+
suggested_monthly_cost: float
|
|
276
|
+
monthly_savings: float
|
|
277
|
+
required_capabilities: list[Capability]
|
|
278
|
+
confidence: Literal["high", "medium", "low"]
|
|
279
|
+
reasoning: str
|
|
280
|
+
quality_score: float | None = None
|
|
281
|
+
current_quality_score: float | None = None
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
class CachingFinding(BaseModel):
|
|
285
|
+
"""A prompt caching opportunity."""
|
|
286
|
+
|
|
287
|
+
type: Literal["caching"] = "caching"
|
|
288
|
+
system_prompt: str
|
|
289
|
+
locations: list[dict[str, str | int]]
|
|
290
|
+
provider: Provider
|
|
291
|
+
model: str
|
|
292
|
+
current_monthly_cost: float
|
|
293
|
+
monthly_savings: float
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
class BatchFinding(BaseModel):
|
|
297
|
+
"""A batch API opportunity."""
|
|
298
|
+
|
|
299
|
+
type: Literal["batch"] = "batch"
|
|
300
|
+
file: str
|
|
301
|
+
model: str
|
|
302
|
+
provider: Provider
|
|
303
|
+
call_count: int
|
|
304
|
+
current_monthly_cost: float
|
|
305
|
+
monthly_savings: float
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
AuditFinding = SmartAlternativeFinding | CachingFinding | BatchFinding
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""Runtime pricing sync from LiteLLM's community database."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from datetime import date
|
|
7
|
+
from importlib.resources import files
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import httpx
|
|
12
|
+
|
|
13
|
+
LITELLM_URL = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
|
|
14
|
+
|
|
15
|
+
# Models we track per provider — LiteLLM key → our model ID
|
|
16
|
+
TRACKED_MODELS: dict[str, dict[str, str]] = {
|
|
17
|
+
"anthropic": {
|
|
18
|
+
"claude-opus-4-6": "claude-opus-4-6",
|
|
19
|
+
"claude-sonnet-4-6": "claude-sonnet-4-6",
|
|
20
|
+
"claude-haiku-4-5-20251001": "claude-haiku-4-5-20251001",
|
|
21
|
+
"claude-3-5-haiku-20241022": "claude-3-5-haiku-20241022",
|
|
22
|
+
},
|
|
23
|
+
"openai": {
|
|
24
|
+
"gpt-4o": "gpt-4o",
|
|
25
|
+
"gpt-4o-mini": "gpt-4o-mini",
|
|
26
|
+
"gpt-4.1": "gpt-4.1",
|
|
27
|
+
"gpt-4.1-mini": "gpt-4.1-mini",
|
|
28
|
+
"gpt-4.1-nano": "gpt-4.1-nano",
|
|
29
|
+
"o3": "o3",
|
|
30
|
+
"o3-mini": "o3-mini",
|
|
31
|
+
"o4-mini": "o4-mini",
|
|
32
|
+
},
|
|
33
|
+
"google": {
|
|
34
|
+
"gemini/gemini-2.5-pro": "gemini-2.5-pro",
|
|
35
|
+
"gemini/gemini-2.5-flash": "gemini-2.5-flash",
|
|
36
|
+
"gemini/gemini-2.0-flash": "gemini-2.0-flash",
|
|
37
|
+
"gemini/gemini-2.0-flash-lite": "gemini-2.0-flash-lite",
|
|
38
|
+
},
|
|
39
|
+
"xai": {
|
|
40
|
+
"xai/grok-3": "grok-3",
|
|
41
|
+
"xai/grok-3-mini": "grok-3-mini",
|
|
42
|
+
},
|
|
43
|
+
"perplexity": {
|
|
44
|
+
"perplexity/sonar-pro": "sonar-pro",
|
|
45
|
+
"perplexity/sonar": "sonar",
|
|
46
|
+
},
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _get_providers_dir() -> Path:
|
|
51
|
+
"""Get the path to the installed providers/data directory."""
|
|
52
|
+
data_dir = files("tokenable.providers") / "data"
|
|
53
|
+
# importlib.resources may return a Traversable; get the actual path
|
|
54
|
+
return Path(str(data_dir))
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _find_entry(prices: dict, key: str) -> dict | None:
|
|
58
|
+
"""Find a model in LiteLLM data."""
|
|
59
|
+
if key in prices:
|
|
60
|
+
return prices[key]
|
|
61
|
+
for full_key in prices:
|
|
62
|
+
if full_key.endswith(f"/{key}") or full_key == key:
|
|
63
|
+
return prices[full_key]
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def sync_pricing(
|
|
68
|
+
provider: str | None = None,
|
|
69
|
+
dry_run: bool = False,
|
|
70
|
+
) -> dict[str, Any]:
|
|
71
|
+
"""Fetch latest pricing from LiteLLM and update local JSON files.
|
|
72
|
+
|
|
73
|
+
Returns dict with 'total_changes' and 'changes' list.
|
|
74
|
+
"""
|
|
75
|
+
resp = httpx.get(LITELLM_URL, timeout=30, follow_redirects=True)
|
|
76
|
+
resp.raise_for_status()
|
|
77
|
+
prices = resp.json()
|
|
78
|
+
|
|
79
|
+
providers_dir = _get_providers_dir()
|
|
80
|
+
providers_to_sync = [provider] if provider else list(TRACKED_MODELS.keys())
|
|
81
|
+
all_changes: list[dict[str, Any]] = []
|
|
82
|
+
|
|
83
|
+
for prov in providers_to_sync:
|
|
84
|
+
tracked = TRACKED_MODELS.get(prov, {})
|
|
85
|
+
file_path = providers_dir / f"{prov}.json"
|
|
86
|
+
if not file_path.exists():
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
data = json.loads(file_path.read_text())
|
|
90
|
+
models = data.get("models", [])
|
|
91
|
+
file_modified = False
|
|
92
|
+
|
|
93
|
+
for litellm_key, our_id in tracked.items():
|
|
94
|
+
entry = _find_entry(prices, litellm_key)
|
|
95
|
+
if not entry:
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
our_model = next((m for m in models if m["id"] == our_id), None)
|
|
99
|
+
if not our_model:
|
|
100
|
+
continue
|
|
101
|
+
|
|
102
|
+
# Convert per-token → per-million
|
|
103
|
+
new_input = round((entry.get("input_cost_per_token", 0) or 0) * 1_000_000, 2)
|
|
104
|
+
new_output = round((entry.get("output_cost_per_token", 0) or 0) * 1_000_000, 2)
|
|
105
|
+
|
|
106
|
+
if new_input == 0 and new_output == 0:
|
|
107
|
+
continue
|
|
108
|
+
|
|
109
|
+
old_input = our_model.get("input_cost_per_million", 0)
|
|
110
|
+
old_output = our_model.get("output_cost_per_million", 0)
|
|
111
|
+
|
|
112
|
+
if new_input != old_input:
|
|
113
|
+
all_changes.append({"model": f"{prov}/{our_id}", "field": "input/M", "old": old_input, "new": new_input})
|
|
114
|
+
our_model["input_cost_per_million"] = new_input
|
|
115
|
+
file_modified = True
|
|
116
|
+
|
|
117
|
+
if new_output != old_output:
|
|
118
|
+
all_changes.append({"model": f"{prov}/{our_id}", "field": "output/M", "old": old_output, "new": new_output})
|
|
119
|
+
our_model["output_cost_per_million"] = new_output
|
|
120
|
+
file_modified = True
|
|
121
|
+
|
|
122
|
+
# Update context/output limits
|
|
123
|
+
if entry.get("max_input_tokens") and entry["max_input_tokens"] != our_model.get("context_window"):
|
|
124
|
+
our_model["context_window"] = entry["max_input_tokens"]
|
|
125
|
+
file_modified = True
|
|
126
|
+
|
|
127
|
+
max_out = entry.get("max_output_tokens") or entry.get("max_tokens")
|
|
128
|
+
if max_out and max_out != our_model.get("max_output_tokens"):
|
|
129
|
+
our_model["max_output_tokens"] = max_out
|
|
130
|
+
file_modified = True
|
|
131
|
+
|
|
132
|
+
# Cache pricing
|
|
133
|
+
if entry.get("cache_read_input_token_cost"):
|
|
134
|
+
our_model["cache_read_input_cost_per_million"] = round(entry["cache_read_input_token_cost"] * 1_000_000, 2)
|
|
135
|
+
file_modified = True
|
|
136
|
+
if entry.get("cache_creation_input_token_cost"):
|
|
137
|
+
our_model["cache_write_input_cost_per_million"] = round(entry["cache_creation_input_token_cost"] * 1_000_000, 2)
|
|
138
|
+
file_modified = True
|
|
139
|
+
|
|
140
|
+
if file_modified and not dry_run:
|
|
141
|
+
data["last_updated"] = date.today().isoformat()
|
|
142
|
+
data["last_verified"] = date.today().isoformat()
|
|
143
|
+
file_path.write_text(json.dumps(data, indent=2) + "\n")
|
|
144
|
+
|
|
145
|
+
return {"total_changes": len(all_changes), "changes": all_changes}
|