vllm-sr 0.1.0b2.dev20260202073049__py3-none-any.whl → 0.1.0b2.dev20260202094318__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,17 +1,3 @@
1
- # You can override by specifying your own mappings below:
2
- # Uncomment and customize if you need different model mappings:
3
- # mom_registry:
4
- # "models/mom-domain-classifier": "LLM-Semantic-Router/lora_intent_classifier_bert-base-uncased_model"
5
- # "models/mom-pii-classifier": "LLM-Semantic-Router/lora_pii_detector_bert-base-uncased_model"
6
- # "models/mom-jailbreak-classifier": "LLM-Semantic-Router/lora_jailbreak_classifier_bert-base-uncased_model"
7
- # "models/mom-halugate-detector": "KRLabsOrg/lettucedect-base-modernbert-en-v1"
8
- # "models/mom-halugate-sentinel": "LLM-Semantic-Router/halugate-sentinel"
9
- # "models/mom-halugate-explainer": "tasksource/ModernBERT-base-nli"
10
- # "models/mom-feedback-detector": "llm-semantic-router/feedback-detector"
11
- # "models/mom-embedding-pro": "Qwen/Qwen3-Embedding-0.6B"
12
- # "models/mom-embedding-flash": "google/embeddinggemma-300m"
13
- # "models/mom-embedding-ultra": "llm-semantic-router/mmbert-embed-32k-2d-matryoshka"
14
-
15
1
  # Response API Configuration
16
2
  # Enables OpenAI Response API support with conversation chaining
17
3
  response_api:
@@ -59,25 +45,26 @@ tools:
59
45
 
60
46
  prompt_guard:
61
47
  enabled: true # Global default - can be overridden per category with jailbreak_enabled
62
- use_modernbert: false
63
- model_id: "models/mom-jailbreak-classifier"
64
- jailbreak_mapping_path: "models/mom-jailbreak-classifier/jailbreak_type_mapping.json"
48
+ use_mmbert_32k: true
49
+ model_id: "models/mmbert32k-jailbreak-detector-merged"
50
+ jailbreak_mapping_path: "models/mmbert32k-jailbreak-detector-merged/jailbreak_type_mapping.json"
65
51
  threshold: 0.7
66
52
  use_cpu: true
67
53
 
68
54
  # Classifier configuration
69
55
  classifier:
70
56
  category_model:
71
- model_id: "models/mom-domain-classifier"
57
+ model_id: "models/mmbert32k-intent-classifier-merged"
58
+ use_mmbert_32k: true
72
59
  threshold: 0.6
73
60
  use_cpu: true
74
- category_mapping_path: "models/mom-domain-classifier/category_mapping.json"
61
+ category_mapping_path: "models/mmbert32k-intent-classifier-merged/category_mapping.json"
75
62
  pii_model:
76
- model_id: "models/mom-pii-classifier"
77
- use_modernbert: false
63
+ model_id: "models/mmbert32k-pii-detector-merged"
64
+ use_mmbert_32k: true
78
65
  threshold: 0.9
79
66
  use_cpu: true
80
- pii_mapping_path: "models/mom-pii-classifier/pii_type_mapping.json"
67
+ pii_mapping_path: "models/mmbert32k-pii-detector-merged/pii_type_mapping.json"
81
68
 
82
69
  # Hallucination mitigation configuration
83
70
  # Disabled by default - enable in decisions via hallucination plugin
@@ -85,9 +72,10 @@ hallucination_mitigation:
85
72
  enabled: false
86
73
  # Fact-check classifier: determines if a prompt needs fact verification
87
74
  fact_check_model:
88
- model_id: "models/mom-halugate-sentinel"
75
+ model_id: "models/mmbert32k-factcheck-classifier-merged"
89
76
  threshold: 0.6
90
77
  use_cpu: true
78
+ use_mmbert_32k: true
91
79
  # Hallucination detector: verifies if LLM response is grounded in context
92
80
  hallucination_model:
93
81
  model_id: "models/mom-halugate-detector"
@@ -109,9 +97,10 @@ hallucination_mitigation:
109
97
  # Classifies user feedback into 4 types: satisfied, need_clarification, wrong_answer, want_different
110
98
  feedback_detector:
111
99
  enabled: true
112
- model_id: "models/mom-feedback-detector"
100
+ model_id: "models/mmbert32k-feedback-detector-merged"
113
101
  threshold: 0.7
114
102
  use_cpu: true
103
+ use_mmbert_32k: true
115
104
 
116
105
  # External models configuration
117
106
  # Used for advanced routing signals like preference-based routing via external LLM
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vllm-sr
3
- Version: 0.1.0b2.dev20260202073049
3
+ Version: 0.1.0b2.dev20260202094318
4
4
  Summary: vLLM Semantic Router - Intelligent routing for Mixture-of-Models
5
5
  Author: vLLM-SR Team
6
6
  License: Apache-2.0
@@ -54,7 +54,8 @@ pip install -e .
54
54
  vllm-sr init
55
55
 
56
56
  # Start the router (includes dashboard)
57
- vllm-sr serve
57
+ # Provide your HF_TOKEN to run the evaluation tests; this is required for downloading the necessary datasets
58
+ HF_TOKEN=hf_xxx vllm-sr serve
58
59
 
59
60
  # Open dashboard in browser
60
61
  vllm-sr dashboard
@@ -28,10 +28,10 @@ cli/templates/grafana-datasource.serve.yaml,sha256=Cxjz1zVWoUdSzbSsS_iJhMHRrmRi6
28
28
  cli/templates/grafana.serve.ini,sha256=x9bCkzxqm5gC4fKToY2lhNPdWhwAaJGVe5ABMW6Dv-c,1674
29
29
  cli/templates/llm-router-dashboard.serve.json,sha256=pwnTjUh7z3_3LnIwtaLXjDWH4aHd2Mc57z0oekgt-Bk,60903
30
30
  cli/templates/prometheus.serve.yaml,sha256=MGYq8dlRq_i2m5sogQ--kwTvJpkf44QQoCNoI7oyVT8,270
31
- cli/templates/router-defaults.yaml,sha256=crPnhOGAQYMgnIjHJEU8aNtlplau8wjrvGLrjqPsnwY,8647
31
+ cli/templates/router-defaults.yaml,sha256=ar-tv3fI8nCind6Ci58gYO_ZbxNtl9BawFfeDpiYXws,7851
32
32
  cli/templates/tools_db.json,sha256=CPqPBkd5nc966m1YEozz06frrmv3Pd5rrkxKkO3rTiA,4537
33
- vllm_sr-0.1.0b2.dev20260202073049.dist-info/METADATA,sha256=yWySmdgOvteP453pmYwdRlqWQWyQVnmo-OTpYUaeVkY,7173
34
- vllm_sr-0.1.0b2.dev20260202073049.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
35
- vllm_sr-0.1.0b2.dev20260202073049.dist-info/entry_points.txt,sha256=WhlBPbLHUpWUsMuUQX9cnvsYMf0ih5i57vvJ1jJNi0k,42
36
- vllm_sr-0.1.0b2.dev20260202073049.dist-info/top_level.txt,sha256=2ImG917oaVHlm0nP9oJE-Qrgs-fq_fGWgba2H1f8fpE,4
37
- vllm_sr-0.1.0b2.dev20260202073049.dist-info/RECORD,,
33
+ vllm_sr-0.1.0b2.dev20260202094318.dist-info/METADATA,sha256=iaG3L_p5hWOvRaCgZNIcjrzbao7Kw9dUYS-sz-J5j_M,7298
34
+ vllm_sr-0.1.0b2.dev20260202094318.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
35
+ vllm_sr-0.1.0b2.dev20260202094318.dist-info/entry_points.txt,sha256=WhlBPbLHUpWUsMuUQX9cnvsYMf0ih5i57vvJ1jJNi0k,42
36
+ vllm_sr-0.1.0b2.dev20260202094318.dist-info/top_level.txt,sha256=2ImG917oaVHlm0nP9oJE-Qrgs-fq_fGWgba2H1f8fpE,4
37
+ vllm_sr-0.1.0b2.dev20260202094318.dist-info/RECORD,,