vllm-sr 0.1.0b2.dev20260204090051__tar.gz → 0.1.0b2.dev20260204165623__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vllm_sr-0.1.0b2.dev20260204090051/vllm_sr.egg-info → vllm_sr-0.1.0b2.dev20260204165623}/PKG-INFO +1 -1
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/templates/envoy.template.yaml +10 -10
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/templates/router-defaults.yaml +1 -1
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/pyproject.toml +1 -1
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623/vllm_sr.egg-info}/PKG-INFO +1 -1
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/MANIFEST.in +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/README.md +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/__init__.py +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/commands/__init__.py +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/commands/config.py +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/commands/generate.py +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/commands/init.py +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/commands/serve.py +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/commands/show_config.py +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/commands/show_defaults.py +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/commands/validate.py +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/config_generator.py +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/consts.py +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/core.py +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/defaults.py +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/docker_cli.py +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/logo.py +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/main.py +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/merger.py +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/models.py +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/parser.py +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/templates/config.template.yaml +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/templates/generate_dashboard.py +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/templates/grafana-dashboard.serve.yaml +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/templates/grafana-datasource-jaeger.serve.yaml +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/templates/grafana-datasource.serve.yaml +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/templates/grafana.serve.ini +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/templates/llm-router-dashboard.serve.json +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/templates/prometheus.serve.yaml +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/templates/tools_db.json +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/utils.py +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/validator.py +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/requirements.txt +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/setup.cfg +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/tests/test_plugin_parsing.py +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/tests/test_plugin_yaml_generation.py +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/vllm_sr.egg-info/SOURCES.txt +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/vllm_sr.egg-info/dependency_links.txt +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/vllm_sr.egg-info/entry_points.txt +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/vllm_sr.egg-info/requires.txt +0 -0
- {vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/vllm_sr.egg-info/top_level.txt +0 -0
|
@@ -36,8 +36,8 @@ static_resources:
|
|
|
36
36
|
exact: "{{ model.name }}"
|
|
37
37
|
route:
|
|
38
38
|
cluster: {{ model.cluster_name }}_cluster
|
|
39
|
-
timeout: {{ listener.timeout | default('
|
|
40
|
-
idleTimeout:
|
|
39
|
+
timeout: {{ listener.timeout | default('1200s') }}
|
|
40
|
+
idleTimeout: 1200s
|
|
41
41
|
# Rewrite Host header to match upstream server
|
|
42
42
|
host_rewrite_literal: "{{ model.endpoints[0].address }}"
|
|
43
43
|
{% if model.path_prefix %}
|
|
@@ -59,8 +59,8 @@ static_resources:
|
|
|
59
59
|
exact: "{{ model.name }}"
|
|
60
60
|
route:
|
|
61
61
|
cluster: anthropic_api_cluster
|
|
62
|
-
timeout: {{ listener.timeout | default('
|
|
63
|
-
idleTimeout:
|
|
62
|
+
timeout: {{ listener.timeout | default('1200s') }}
|
|
63
|
+
idleTimeout: 1200s
|
|
64
64
|
host_rewrite_literal: "api.anthropic.com"
|
|
65
65
|
{% endfor %}
|
|
66
66
|
# Default route (no x-selected-model header)
|
|
@@ -73,7 +73,7 @@ static_resources:
|
|
|
73
73
|
{% else %}
|
|
74
74
|
cluster: vllm_static_cluster
|
|
75
75
|
{% endif %}
|
|
76
|
-
timeout: {{ listener.timeout | default('
|
|
76
|
+
timeout: {{ listener.timeout | default('1200s') }}
|
|
77
77
|
{% if models %}
|
|
78
78
|
# Rewrite Host header to match upstream server
|
|
79
79
|
host_rewrite_literal: "{{ models[0].endpoints[0].address }}"
|
|
@@ -94,13 +94,13 @@ static_resources:
|
|
|
94
94
|
grpc_service:
|
|
95
95
|
envoy_grpc:
|
|
96
96
|
cluster_name: extproc_service
|
|
97
|
-
timeout:
|
|
97
|
+
timeout: 1200s
|
|
98
98
|
processing_mode:
|
|
99
99
|
request_header_mode: "SEND"
|
|
100
100
|
response_header_mode: "SEND"
|
|
101
101
|
request_body_mode: "BUFFERED"
|
|
102
102
|
response_body_mode: "BUFFERED"
|
|
103
|
-
message_timeout: {{ listener.timeout | default('
|
|
103
|
+
message_timeout: {{ listener.timeout | default('1200s') }}
|
|
104
104
|
- name: envoy.filters.http.router
|
|
105
105
|
typed_config:
|
|
106
106
|
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
|
@@ -115,7 +115,7 @@ static_resources:
|
|
|
115
115
|
clusters:
|
|
116
116
|
# ExtProc service (semantic router)
|
|
117
117
|
- name: extproc_service
|
|
118
|
-
connect_timeout:
|
|
118
|
+
connect_timeout: 1200s
|
|
119
119
|
type: STATIC
|
|
120
120
|
lb_policy: ROUND_ROBIN
|
|
121
121
|
http2_protocol_options: {}
|
|
@@ -150,7 +150,7 @@ static_resources:
|
|
|
150
150
|
{% for model in models %}
|
|
151
151
|
# Cluster for model: {{ model.name }}
|
|
152
152
|
- name: {{ model.cluster_name }}_cluster
|
|
153
|
-
connect_timeout:
|
|
153
|
+
connect_timeout: 1200s
|
|
154
154
|
type: {{ model.cluster_type }}
|
|
155
155
|
{% if model.cluster_type == 'LOGICAL_DNS' %}
|
|
156
156
|
dns_lookup_family: V4_ONLY
|
|
@@ -189,7 +189,7 @@ static_resources:
|
|
|
189
189
|
- name: anthropic_api_cluster
|
|
190
190
|
type: LOGICAL_DNS
|
|
191
191
|
dns_lookup_family: V4_ONLY
|
|
192
|
-
connect_timeout:
|
|
192
|
+
connect_timeout: 1200s
|
|
193
193
|
lb_policy: ROUND_ROBIN
|
|
194
194
|
load_assignment:
|
|
195
195
|
cluster_name: anthropic_api_cluster
|
|
@@ -173,7 +173,7 @@ looper:
|
|
|
173
173
|
# Endpoint points to Envoy (same container), which handles load balancing and auth
|
|
174
174
|
# Port should match listener port (default: 8888)
|
|
175
175
|
endpoint: "http://localhost:8899/v1/chat/completions"
|
|
176
|
-
timeout_seconds:
|
|
176
|
+
timeout_seconds: 1200 # Timeout in seconds for each model call
|
|
177
177
|
headers: {} # Optional headers (e.g., {"Authorization": "Bearer xxx"})
|
|
178
178
|
|
|
179
179
|
clear_route_cache: true
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "vllm-sr"
|
|
7
|
-
version = "0.1.0.beta.2.
|
|
7
|
+
version = "0.1.0.beta.2.dev20260204165623"
|
|
8
8
|
description = "vLLM Semantic Router - Intelligent routing for Mixture-of-Models"
|
|
9
9
|
authors = [{name = "vLLM-SR Team"}]
|
|
10
10
|
readme = "README.md"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/commands/__init__.py
RENAMED
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/commands/config.py
RENAMED
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/commands/generate.py
RENAMED
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/commands/init.py
RENAMED
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/commands/serve.py
RENAMED
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/commands/show_config.py
RENAMED
|
File without changes
|
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/commands/validate.py
RENAMED
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/config_generator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/cli/templates/tools_db.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/tests/test_plugin_parsing.py
RENAMED
|
File without changes
|
|
File without changes
|
{vllm_sr-0.1.0b2.dev20260204090051 → vllm_sr-0.1.0b2.dev20260204165623}/vllm_sr.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|