vllm-sr 0.1.0b2.dev20260127131328__tar.gz → 0.1.0b2.dev20260128094358__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {vllm_sr-0.1.0b2.dev20260127131328/vllm_sr.egg-info → vllm_sr-0.1.0b2.dev20260128094358}/PKG-INFO +1 -1
  2. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/merger.py +39 -0
  3. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/models.py +23 -0
  4. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/templates/config.template.yaml +56 -2
  5. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/pyproject.toml +1 -1
  6. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358/vllm_sr.egg-info}/PKG-INFO +1 -1
  7. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/MANIFEST.in +0 -0
  8. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/README.md +0 -0
  9. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/__init__.py +0 -0
  10. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/commands/__init__.py +0 -0
  11. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/commands/config.py +0 -0
  12. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/commands/generate.py +0 -0
  13. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/commands/init.py +0 -0
  14. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/commands/serve.py +0 -0
  15. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/commands/show_config.py +0 -0
  16. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/commands/show_defaults.py +0 -0
  17. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/commands/validate.py +0 -0
  18. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/config_generator.py +0 -0
  19. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/consts.py +0 -0
  20. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/core.py +0 -0
  21. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/defaults.py +0 -0
  22. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/docker_cli.py +0 -0
  23. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/logo.py +0 -0
  24. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/main.py +0 -0
  25. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/parser.py +0 -0
  26. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/templates/envoy.template.yaml +0 -0
  27. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/templates/generate_dashboard.py +0 -0
  28. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/templates/grafana-dashboard.serve.yaml +0 -0
  29. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/templates/grafana-datasource-jaeger.serve.yaml +0 -0
  30. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/templates/grafana-datasource.serve.yaml +0 -0
  31. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/templates/grafana.serve.ini +0 -0
  32. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/templates/llm-router-dashboard.serve.json +0 -0
  33. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/templates/prometheus.serve.yaml +0 -0
  34. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/templates/router-defaults.yaml +0 -0
  35. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/templates/tools_db.json +0 -0
  36. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/utils.py +0 -0
  37. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/cli/validator.py +0 -0
  38. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/requirements.txt +0 -0
  39. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/setup.cfg +0 -0
  40. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/tests/test_plugin_parsing.py +0 -0
  41. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/tests/test_plugin_yaml_generation.py +0 -0
  42. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/vllm_sr.egg-info/SOURCES.txt +0 -0
  43. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/vllm_sr.egg-info/dependency_links.txt +0 -0
  44. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/vllm_sr.egg-info/entry_points.txt +0 -0
  45. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/vllm_sr.egg-info/requires.txt +0 -0
  46. {vllm_sr-0.1.0b2.dev20260127131328 → vllm_sr-0.1.0b2.dev20260128094358}/vllm_sr.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vllm-sr
3
- Version: 0.1.0b2.dev20260127131328
3
+ Version: 0.1.0b2.dev20260128094358
4
4
  Summary: vLLM Semantic Router - Intelligent routing for Mixture-of-Models
5
5
  Author: vLLM-SR Team
6
6
  License: Apache-2.0
@@ -185,6 +185,37 @@ def translate_context_signals(context_rules: list) -> list:
185
185
  return rules
186
186
 
187
187
 
188
+ def translate_complexity_signals(complexity_rules: list) -> list:
189
+ """
190
+ Translate complexity signals to router format.
191
+
192
+ Args:
193
+ complexity_rules: List of ComplexityRule objects
194
+
195
+ Returns:
196
+ list: Router complexity rules
197
+ """
198
+ rules = []
199
+ for signal in complexity_rules:
200
+ rule = {
201
+ "name": signal.name,
202
+ "threshold": signal.threshold,
203
+ "hard": {"candidates": signal.hard.candidates},
204
+ "easy": {"candidates": signal.easy.candidates},
205
+ }
206
+ if signal.description:
207
+ rule["description"] = signal.description
208
+ if signal.composer:
209
+ rule["composer"] = {
210
+ "operator": signal.composer.operator,
211
+ "conditions": [
212
+ {"type": c.type, "name": c.name} for c in signal.composer.conditions
213
+ ],
214
+ }
215
+ rules.append(rule)
216
+ return rules
217
+
218
+
188
219
  def translate_external_models(external_models: list) -> list:
189
220
  """
190
221
  Translate external models to router format.
@@ -441,6 +472,14 @@ def merge_configs(user_config: UserConfig, defaults: Dict[str, Any]) -> Dict[str
441
472
  )
442
473
  log.info(f" Added {len(user_config.signals.context)} context signals")
443
474
 
475
+ if user_config.signals.complexity and len(user_config.signals.complexity) > 0:
476
+ merged["complexity_rules"] = translate_complexity_signals(
477
+ user_config.signals.complexity
478
+ )
479
+ log.info(
480
+ f" Added {len(user_config.signals.complexity)} complexity signals"
481
+ )
482
+
444
483
  # Translate domains to categories
445
484
  if user_config.signals.domains:
446
485
  merged["categories"] = translate_domains_to_categories(
@@ -85,6 +85,28 @@ class ContextRule(BaseModel):
85
85
  description: Optional[str] = None
86
86
 
87
87
 
88
+ class ComplexityCandidates(BaseModel):
89
+ """Complexity candidates configuration."""
90
+
91
+ candidates: List[str]
92
+
93
+
94
+ class ComplexityRule(BaseModel):
95
+ """Complexity-based signal configuration using embedding similarity.
96
+
97
+ The composer field allows filtering based on other signals (e.g., only apply
98
+ code_complexity when domain is "computer_science"). This is evaluated after
99
+ all signals are computed in parallel, enabling signal dependencies.
100
+ """
101
+
102
+ name: str
103
+ threshold: float = 0.1
104
+ hard: ComplexityCandidates
105
+ easy: ComplexityCandidates
106
+ description: Optional[str] = None
107
+ composer: Optional["Rules"] = None # Forward reference, defined below
108
+
109
+
88
110
  class Signals(BaseModel):
89
111
  """All signal configurations."""
90
112
 
@@ -97,6 +119,7 @@ class Signals(BaseModel):
97
119
  language: Optional[List[Language]] = []
98
120
  latency: Optional[List[Latency]] = []
99
121
  context: Optional[List[ContextRule]] = []
122
+ complexity: Optional[List[ComplexityRule]] = []
100
123
 
101
124
 
102
125
  class Condition(BaseModel):
@@ -186,8 +186,8 @@ signals:
186
186
  max_tpot: 0.15 # 150ms per token
187
187
  description: "For standard applications with moderate latency tolerance"
188
188
 
189
- # context_rules - Context length signals (Token Count)
190
- context_rules:
189
+ # context - Context length signals (Token Count)
190
+ context:
191
191
  - name: "low_token_count"
192
192
  min_tokens: "0"
193
193
  max_tokens: "1K"
@@ -197,6 +197,60 @@ signals:
197
197
  max_tokens: "128K"
198
198
  description: "Long requests requiring large context window"
199
199
 
200
+ # complexity - Complexity signals (Embedding-based difficulty detection)
201
+ # IMPORTANT: It is strongly recommended to configure a composer for each complexity rule
202
+ # to filter based on other signals (e.g., domain). This prevents misclassification where
203
+ # a math question might match code_complexity or vice versa.
204
+ complexity:
205
+ - name: "code_complexity"
206
+ composer:
207
+ operator: "AND"
208
+ conditions:
209
+ - type: "domain"
210
+ name: "computer science"
211
+ threshold: 0.1
212
+ hard:
213
+ candidates:
214
+ - "design distributed system"
215
+ - "implement consensus algorithm"
216
+ - "optimize for scale"
217
+ - "architect microservices"
218
+ - "fix race condition"
219
+ - "implement garbage collector"
220
+ easy:
221
+ candidates:
222
+ - "print hello world"
223
+ - "loop through array"
224
+ - "read file"
225
+ - "sort list"
226
+ - "string concatenation"
227
+ - "simple function"
228
+ description: "Detects code complexity level"
229
+ - name: "math_complexity"
230
+ composer:
231
+ operator: "AND"
232
+ conditions:
233
+ - type: "domain"
234
+ name: "math"
235
+ threshold: 0.1
236
+ hard:
237
+ candidates:
238
+ - "prove mathematically"
239
+ - "derive the equation"
240
+ - "formal proof"
241
+ - "solve differential equation"
242
+ - "prove by induction"
243
+ - "analyze convergence"
244
+ easy:
245
+ candidates:
246
+ - "what is 2+2"
247
+ - "simple arithmetic"
248
+ - "basic calculation"
249
+ - "count numbers"
250
+ - "add two numbers"
251
+ - "multiply values"
252
+ description: "Detects mathematical complexity level"
253
+
200
254
  # Decisions - Routing logic
201
255
  decisions:
202
256
  # Highest priority: Preference-based routing via external LLM
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "vllm-sr"
7
- version = "0.1.0.beta.2.dev20260127131328"
7
+ version = "0.1.0.beta.2.dev20260128094358"
8
8
  description = "vLLM Semantic Router - Intelligent routing for Mixture-of-Models"
9
9
  authors = [{name = "vLLM-SR Team"}]
10
10
  readme = "README.md"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vllm-sr
3
- Version: 0.1.0b2.dev20260127131328
3
+ Version: 0.1.0b2.dev20260128094358
4
4
  Summary: vLLM Semantic Router - Intelligent routing for Mixture-of-Models
5
5
  Author: vLLM-SR Team
6
6
  License: Apache-2.0