vllm-sr 0.1.0b2.dev20260202075204__tar.gz → 0.1.0b2.dev20260202181733__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vllm_sr-0.1.0b2.dev20260202075204/vllm_sr.egg-info → vllm_sr-0.1.0b2.dev20260202181733}/PKG-INFO +1 -1
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/templates/router-defaults.yaml +13 -24
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/pyproject.toml +1 -1
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733/vllm_sr.egg-info}/PKG-INFO +1 -1
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/MANIFEST.in +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/README.md +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/__init__.py +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/commands/__init__.py +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/commands/config.py +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/commands/generate.py +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/commands/init.py +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/commands/serve.py +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/commands/show_config.py +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/commands/show_defaults.py +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/commands/validate.py +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/config_generator.py +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/consts.py +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/core.py +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/defaults.py +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/docker_cli.py +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/logo.py +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/main.py +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/merger.py +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/models.py +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/parser.py +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/templates/config.template.yaml +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/templates/envoy.template.yaml +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/templates/generate_dashboard.py +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/templates/grafana-dashboard.serve.yaml +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/templates/grafana-datasource-jaeger.serve.yaml +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/templates/grafana-datasource.serve.yaml +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/templates/grafana.serve.ini +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/templates/llm-router-dashboard.serve.json +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/templates/prometheus.serve.yaml +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/templates/tools_db.json +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/utils.py +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/validator.py +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/requirements.txt +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/setup.cfg +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/tests/test_plugin_parsing.py +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/tests/test_plugin_yaml_generation.py +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/vllm_sr.egg-info/SOURCES.txt +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/vllm_sr.egg-info/dependency_links.txt +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/vllm_sr.egg-info/entry_points.txt +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/vllm_sr.egg-info/requires.txt +0 -0
- {vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/vllm_sr.egg-info/top_level.txt +0 -0
|
@@ -1,17 +1,3 @@
|
|
|
1
|
-
# You can override by specifying your own mappings below:
|
|
2
|
-
# Uncomment and customize if you need different model mappings:
|
|
3
|
-
# mom_registry:
|
|
4
|
-
# "models/mom-domain-classifier": "LLM-Semantic-Router/lora_intent_classifier_bert-base-uncased_model"
|
|
5
|
-
# "models/mom-pii-classifier": "LLM-Semantic-Router/lora_pii_detector_bert-base-uncased_model"
|
|
6
|
-
# "models/mom-jailbreak-classifier": "LLM-Semantic-Router/lora_jailbreak_classifier_bert-base-uncased_model"
|
|
7
|
-
# "models/mom-halugate-detector": "KRLabsOrg/lettucedect-base-modernbert-en-v1"
|
|
8
|
-
# "models/mom-halugate-sentinel": "LLM-Semantic-Router/halugate-sentinel"
|
|
9
|
-
# "models/mom-halugate-explainer": "tasksource/ModernBERT-base-nli"
|
|
10
|
-
# "models/mom-feedback-detector": "llm-semantic-router/feedback-detector"
|
|
11
|
-
# "models/mom-embedding-pro": "Qwen/Qwen3-Embedding-0.6B"
|
|
12
|
-
# "models/mom-embedding-flash": "google/embeddinggemma-300m"
|
|
13
|
-
# "models/mom-embedding-ultra": "llm-semantic-router/mmbert-embed-32k-2d-matryoshka"
|
|
14
|
-
|
|
15
1
|
# Response API Configuration
|
|
16
2
|
# Enables OpenAI Response API support with conversation chaining
|
|
17
3
|
response_api:
|
|
@@ -59,25 +45,26 @@ tools:
|
|
|
59
45
|
|
|
60
46
|
prompt_guard:
|
|
61
47
|
enabled: true # Global default - can be overridden per category with jailbreak_enabled
|
|
62
|
-
|
|
63
|
-
model_id: "models/
|
|
64
|
-
jailbreak_mapping_path: "models/
|
|
48
|
+
use_mmbert_32k: true
|
|
49
|
+
model_id: "models/mmbert32k-jailbreak-detector-merged"
|
|
50
|
+
jailbreak_mapping_path: "models/mmbert32k-jailbreak-detector-merged/jailbreak_type_mapping.json"
|
|
65
51
|
threshold: 0.7
|
|
66
52
|
use_cpu: true
|
|
67
53
|
|
|
68
54
|
# Classifier configuration
|
|
69
55
|
classifier:
|
|
70
56
|
category_model:
|
|
71
|
-
model_id: "models/
|
|
57
|
+
model_id: "models/mmbert32k-intent-classifier-merged"
|
|
58
|
+
use_mmbert_32k: true
|
|
72
59
|
threshold: 0.6
|
|
73
60
|
use_cpu: true
|
|
74
|
-
category_mapping_path: "models/
|
|
61
|
+
category_mapping_path: "models/mmbert32k-intent-classifier-merged/category_mapping.json"
|
|
75
62
|
pii_model:
|
|
76
|
-
model_id: "models/
|
|
77
|
-
|
|
63
|
+
model_id: "models/mmbert32k-pii-detector-merged"
|
|
64
|
+
use_mmbert_32k: true
|
|
78
65
|
threshold: 0.9
|
|
79
66
|
use_cpu: true
|
|
80
|
-
pii_mapping_path: "models/
|
|
67
|
+
pii_mapping_path: "models/mmbert32k-pii-detector-merged/pii_type_mapping.json"
|
|
81
68
|
|
|
82
69
|
# Hallucination mitigation configuration
|
|
83
70
|
# Disabled by default - enable in decisions via hallucination plugin
|
|
@@ -85,9 +72,10 @@ hallucination_mitigation:
|
|
|
85
72
|
enabled: false
|
|
86
73
|
# Fact-check classifier: determines if a prompt needs fact verification
|
|
87
74
|
fact_check_model:
|
|
88
|
-
model_id: "models/
|
|
75
|
+
model_id: "models/mmbert32k-factcheck-classifier-merged"
|
|
89
76
|
threshold: 0.6
|
|
90
77
|
use_cpu: true
|
|
78
|
+
use_mmbert_32k: true
|
|
91
79
|
# Hallucination detector: verifies if LLM response is grounded in context
|
|
92
80
|
hallucination_model:
|
|
93
81
|
model_id: "models/mom-halugate-detector"
|
|
@@ -109,9 +97,10 @@ hallucination_mitigation:
|
|
|
109
97
|
# Classifies user feedback into 4 types: satisfied, need_clarification, wrong_answer, want_different
|
|
110
98
|
feedback_detector:
|
|
111
99
|
enabled: true
|
|
112
|
-
model_id: "models/
|
|
100
|
+
model_id: "models/mmbert32k-feedback-detector-merged"
|
|
113
101
|
threshold: 0.7
|
|
114
102
|
use_cpu: true
|
|
103
|
+
use_mmbert_32k: true
|
|
115
104
|
|
|
116
105
|
# External models configuration
|
|
117
106
|
# Used for advanced routing signals like preference-based routing via external LLM
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "vllm-sr"
|
|
7
|
-
version = "0.1.0.beta.2.
|
|
7
|
+
version = "0.1.0.beta.2.dev20260202181733"
|
|
8
8
|
description = "vLLM Semantic Router - Intelligent routing for Mixture-of-Models"
|
|
9
9
|
authors = [{name = "vLLM-SR Team"}]
|
|
10
10
|
readme = "README.md"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/commands/__init__.py
RENAMED
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/commands/config.py
RENAMED
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/commands/generate.py
RENAMED
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/commands/init.py
RENAMED
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/commands/serve.py
RENAMED
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/commands/show_config.py
RENAMED
|
File without changes
|
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/commands/validate.py
RENAMED
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/config_generator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/cli/templates/tools_db.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/tests/test_plugin_parsing.py
RENAMED
|
File without changes
|
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260202075204 → vllm_sr-0.1.0b2.dev20260202181733}/vllm_sr.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|