vllm-sr 0.1.0b2.dev20260128023742__tar.gz → 0.1.0b2.dev20260128175701__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vllm_sr-0.1.0b2.dev20260128023742/vllm_sr.egg-info → vllm_sr-0.1.0b2.dev20260128175701}/PKG-INFO +1 -1
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/merger.py +39 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/models.py +23 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/templates/config.template.yaml +56 -2
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/pyproject.toml +1 -1
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701/vllm_sr.egg-info}/PKG-INFO +1 -1
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/MANIFEST.in +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/README.md +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/__init__.py +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/commands/__init__.py +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/commands/config.py +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/commands/generate.py +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/commands/init.py +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/commands/serve.py +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/commands/show_config.py +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/commands/show_defaults.py +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/commands/validate.py +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/config_generator.py +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/consts.py +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/core.py +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/defaults.py +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/docker_cli.py +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/logo.py +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/main.py +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/parser.py +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/templates/envoy.template.yaml +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/templates/generate_dashboard.py +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/templates/grafana-dashboard.serve.yaml +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/templates/grafana-datasource-jaeger.serve.yaml +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/templates/grafana-datasource.serve.yaml +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/templates/grafana.serve.ini +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/templates/llm-router-dashboard.serve.json +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/templates/prometheus.serve.yaml +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/templates/router-defaults.yaml +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/templates/tools_db.json +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/utils.py +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/validator.py +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/requirements.txt +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/setup.cfg +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/tests/test_plugin_parsing.py +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/tests/test_plugin_yaml_generation.py +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/vllm_sr.egg-info/SOURCES.txt +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/vllm_sr.egg-info/dependency_links.txt +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/vllm_sr.egg-info/entry_points.txt +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/vllm_sr.egg-info/requires.txt +0 -0
- {vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/vllm_sr.egg-info/top_level.txt +0 -0
|
@@ -185,6 +185,37 @@ def translate_context_signals(context_rules: list) -> list:
|
|
|
185
185
|
return rules
|
|
186
186
|
|
|
187
187
|
|
|
188
|
+
def translate_complexity_signals(complexity_rules: list) -> list:
|
|
189
|
+
"""
|
|
190
|
+
Translate complexity signals to router format.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
complexity_rules: List of ComplexityRule objects
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
list: Router complexity rules
|
|
197
|
+
"""
|
|
198
|
+
rules = []
|
|
199
|
+
for signal in complexity_rules:
|
|
200
|
+
rule = {
|
|
201
|
+
"name": signal.name,
|
|
202
|
+
"threshold": signal.threshold,
|
|
203
|
+
"hard": {"candidates": signal.hard.candidates},
|
|
204
|
+
"easy": {"candidates": signal.easy.candidates},
|
|
205
|
+
}
|
|
206
|
+
if signal.description:
|
|
207
|
+
rule["description"] = signal.description
|
|
208
|
+
if signal.composer:
|
|
209
|
+
rule["composer"] = {
|
|
210
|
+
"operator": signal.composer.operator,
|
|
211
|
+
"conditions": [
|
|
212
|
+
{"type": c.type, "name": c.name} for c in signal.composer.conditions
|
|
213
|
+
],
|
|
214
|
+
}
|
|
215
|
+
rules.append(rule)
|
|
216
|
+
return rules
|
|
217
|
+
|
|
218
|
+
|
|
188
219
|
def translate_external_models(external_models: list) -> list:
|
|
189
220
|
"""
|
|
190
221
|
Translate external models to router format.
|
|
@@ -441,6 +472,14 @@ def merge_configs(user_config: UserConfig, defaults: Dict[str, Any]) -> Dict[str
|
|
|
441
472
|
)
|
|
442
473
|
log.info(f" Added {len(user_config.signals.context)} context signals")
|
|
443
474
|
|
|
475
|
+
if user_config.signals.complexity and len(user_config.signals.complexity) > 0:
|
|
476
|
+
merged["complexity_rules"] = translate_complexity_signals(
|
|
477
|
+
user_config.signals.complexity
|
|
478
|
+
)
|
|
479
|
+
log.info(
|
|
480
|
+
f" Added {len(user_config.signals.complexity)} complexity signals"
|
|
481
|
+
)
|
|
482
|
+
|
|
444
483
|
# Translate domains to categories
|
|
445
484
|
if user_config.signals.domains:
|
|
446
485
|
merged["categories"] = translate_domains_to_categories(
|
|
@@ -85,6 +85,28 @@ class ContextRule(BaseModel):
|
|
|
85
85
|
description: Optional[str] = None
|
|
86
86
|
|
|
87
87
|
|
|
88
|
+
class ComplexityCandidates(BaseModel):
|
|
89
|
+
"""Complexity candidates configuration."""
|
|
90
|
+
|
|
91
|
+
candidates: List[str]
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class ComplexityRule(BaseModel):
|
|
95
|
+
"""Complexity-based signal configuration using embedding similarity.
|
|
96
|
+
|
|
97
|
+
The composer field allows filtering based on other signals (e.g., only apply
|
|
98
|
+
code_complexity when domain is "computer_science"). This is evaluated after
|
|
99
|
+
all signals are computed in parallel, enabling signal dependencies.
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
name: str
|
|
103
|
+
threshold: float = 0.1
|
|
104
|
+
hard: ComplexityCandidates
|
|
105
|
+
easy: ComplexityCandidates
|
|
106
|
+
description: Optional[str] = None
|
|
107
|
+
composer: Optional["Rules"] = None # Forward reference, defined below
|
|
108
|
+
|
|
109
|
+
|
|
88
110
|
class Signals(BaseModel):
|
|
89
111
|
"""All signal configurations."""
|
|
90
112
|
|
|
@@ -97,6 +119,7 @@ class Signals(BaseModel):
|
|
|
97
119
|
language: Optional[List[Language]] = []
|
|
98
120
|
latency: Optional[List[Latency]] = []
|
|
99
121
|
context: Optional[List[ContextRule]] = []
|
|
122
|
+
complexity: Optional[List[ComplexityRule]] = []
|
|
100
123
|
|
|
101
124
|
|
|
102
125
|
class Condition(BaseModel):
|
|
@@ -186,8 +186,8 @@ signals:
|
|
|
186
186
|
max_tpot: 0.15 # 150ms per token
|
|
187
187
|
description: "For standard applications with moderate latency tolerance"
|
|
188
188
|
|
|
189
|
-
#
|
|
190
|
-
|
|
189
|
+
# context - Context length signals (Token Count)
|
|
190
|
+
context:
|
|
191
191
|
- name: "low_token_count"
|
|
192
192
|
min_tokens: "0"
|
|
193
193
|
max_tokens: "1K"
|
|
@@ -197,6 +197,60 @@ signals:
|
|
|
197
197
|
max_tokens: "128K"
|
|
198
198
|
description: "Long requests requiring large context window"
|
|
199
199
|
|
|
200
|
+
# complexity - Complexity signals (Embedding-based difficulty detection)
|
|
201
|
+
# IMPORTANT: It is strongly recommended to configure a composer for each complexity rule
|
|
202
|
+
# to filter based on other signals (e.g., domain). This prevents misclassification where
|
|
203
|
+
# a math question might match code_complexity or vice versa.
|
|
204
|
+
complexity:
|
|
205
|
+
- name: "code_complexity"
|
|
206
|
+
composer:
|
|
207
|
+
operator: "AND"
|
|
208
|
+
conditions:
|
|
209
|
+
- type: "domain"
|
|
210
|
+
name: "computer science"
|
|
211
|
+
threshold: 0.1
|
|
212
|
+
hard:
|
|
213
|
+
candidates:
|
|
214
|
+
- "design distributed system"
|
|
215
|
+
- "implement consensus algorithm"
|
|
216
|
+
- "optimize for scale"
|
|
217
|
+
- "architect microservices"
|
|
218
|
+
- "fix race condition"
|
|
219
|
+
- "implement garbage collector"
|
|
220
|
+
easy:
|
|
221
|
+
candidates:
|
|
222
|
+
- "print hello world"
|
|
223
|
+
- "loop through array"
|
|
224
|
+
- "read file"
|
|
225
|
+
- "sort list"
|
|
226
|
+
- "string concatenation"
|
|
227
|
+
- "simple function"
|
|
228
|
+
description: "Detects code complexity level"
|
|
229
|
+
- name: "math_complexity"
|
|
230
|
+
composer:
|
|
231
|
+
operator: "AND"
|
|
232
|
+
conditions:
|
|
233
|
+
- type: "domain"
|
|
234
|
+
name: "math"
|
|
235
|
+
threshold: 0.1
|
|
236
|
+
hard:
|
|
237
|
+
candidates:
|
|
238
|
+
- "prove mathematically"
|
|
239
|
+
- "derive the equation"
|
|
240
|
+
- "formal proof"
|
|
241
|
+
- "solve differential equation"
|
|
242
|
+
- "prove by induction"
|
|
243
|
+
- "analyze convergence"
|
|
244
|
+
easy:
|
|
245
|
+
candidates:
|
|
246
|
+
- "what is 2+2"
|
|
247
|
+
- "simple arithmetic"
|
|
248
|
+
- "basic calculation"
|
|
249
|
+
- "count numbers"
|
|
250
|
+
- "add two numbers"
|
|
251
|
+
- "multiply values"
|
|
252
|
+
description: "Detects mathematical complexity level"
|
|
253
|
+
|
|
200
254
|
# Decisions - Routing logic
|
|
201
255
|
decisions:
|
|
202
256
|
# Highest priority: Preference-based routing via external LLM
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "vllm-sr"
|
|
7
|
-
version = "0.1.0.beta.2.
|
|
7
|
+
version = "0.1.0.beta.2.dev20260128175701"
|
|
8
8
|
description = "vLLM Semantic Router - Intelligent routing for Mixture-of-Models"
|
|
9
9
|
authors = [{name = "vLLM-SR Team"}]
|
|
10
10
|
readme = "README.md"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/commands/__init__.py
RENAMED
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/commands/config.py
RENAMED
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/commands/generate.py
RENAMED
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/commands/init.py
RENAMED
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/commands/serve.py
RENAMED
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/commands/show_config.py
RENAMED
|
File without changes
|
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/commands/validate.py
RENAMED
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/config_generator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/cli/templates/tools_db.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/tests/test_plugin_parsing.py
RENAMED
|
File without changes
|
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260128023742 → vllm_sr-0.1.0b2.dev20260128175701}/vllm_sr.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|