vllm-router 0.1.9__tar.gz → 0.1.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vllm_router-0.1.9 → vllm_router-0.1.10}/Cargo.toml +19 -3
- {vllm_router-0.1.9/py_src/vllm_router.egg-info → vllm_router-0.1.10}/PKG-INFO +19 -1
- {vllm_router-0.1.9 → vllm_router-0.1.10}/README.md +18 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10/py_src/vllm_router.egg-info}/PKG-INFO +19 -1
- {vllm_router-0.1.9 → vllm_router-0.1.10}/pyproject.toml +1 -1
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/config/types.rs +9 -1
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/data_connector/response_memory_store.rs +1 -1
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/lib.rs +6 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/main.rs +18 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/policies/cache_aware.rs +226 -203
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/policies/mod.rs +25 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/protocols/spec.rs +597 -1
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/protocols/validation.rs +2 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/header_utils.rs +44 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/http/dp_utils.rs +6 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/http/router.rs +4 -1
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/http/vllm_pd_router.rs +125 -49
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/server.rs +212 -10
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/service_discovery.rs +2 -0
- vllm_router-0.1.10/src/tree.rs +2312 -0
- vllm_router-0.1.9/src/tree.rs +0 -1478
- {vllm_router-0.1.9 → vllm_router-0.1.10}/MANIFEST.in +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/build.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/py_src/vllm_router/__init__.py +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/py_src/vllm_router/launch_router.py +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/py_src/vllm_router/mini_lb.py +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/py_src/vllm_router/router.py +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/py_src/vllm_router/router_args.py +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/py_src/vllm_router/version.py +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/py_src/vllm_router.egg-info/SOURCES.txt +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/py_src/vllm_router.egg-info/dependency_links.txt +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/py_src/vllm_router.egg-info/entry_points.txt +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/py_src/vllm_router.egg-info/not-zip-safe +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/py_src/vllm_router.egg-info/requires.txt +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/py_src/vllm_router.egg-info/top_level.txt +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/setup.cfg +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/setup.py +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/config/mod.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/config/validation.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/core/circuit_breaker.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/core/error.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/core/mod.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/core/retry.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/core/token_bucket.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/core/worker.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/core/worker_registry.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/data_connector/mod.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/data_connector/response_noop_store.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/data_connector/responses.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/grpc/client.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/grpc/mod.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/handler.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/logger.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/logging.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/metrics.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/middleware.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/policies/consistent_hash.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/policies/factory.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/policies/power_of_two.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/policies/random.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/policies/registry.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/policies/round_robin.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/proto/vllm_scheduler.proto +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/protocols/mod.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/protocols/worker_spec.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/factory.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/grpc/mod.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/grpc/pd_router.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/grpc/router.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/http/logprobs_merge.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/http/mod.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/http/openai_router.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/http/pd_router.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/http/pd_types.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/http/vllm_service_discovery.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/mod.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/router_manager.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routes/interface.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routes/mod.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routes/pool_route.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routes/prefill_decode_route.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routes/round_robin_route.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routes/routing_tree_builder.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routes/single_server_route.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/tokenizer/chat_template.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/tokenizer/factory.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/tokenizer/hub.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/tokenizer/huggingface.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/tokenizer/mock.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/tokenizer/mod.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/tokenizer/sequence.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/tokenizer/stop.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/tokenizer/stream.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/tokenizer/tests.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/tokenizer/tiktoken.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/tokenizer/traits.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/types.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/utils/json.rs +0 -0
- {vllm_router-0.1.9 → vllm_router-0.1.10}/src/utils/mod.rs +0 -0
|
@@ -22,7 +22,15 @@ path = "src/main.rs"
|
|
|
22
22
|
clap = { version = "4", features = ["derive"] }
|
|
23
23
|
axum = { version = "0.8.4", features = ["macros", "ws", "tracing"] }
|
|
24
24
|
tower = { version = "0.5", features = ["full"] }
|
|
25
|
-
tower-http = { version = "0.6", features = [
|
|
25
|
+
tower-http = { version = "0.6", features = [
|
|
26
|
+
"trace",
|
|
27
|
+
"compression-gzip",
|
|
28
|
+
"cors",
|
|
29
|
+
"timeout",
|
|
30
|
+
"limit",
|
|
31
|
+
"request-id",
|
|
32
|
+
"util",
|
|
33
|
+
] }
|
|
26
34
|
serde = { version = "1.0", features = ["derive"] }
|
|
27
35
|
serde_json = "1.0"
|
|
28
36
|
bytes = "1.8.0"
|
|
@@ -36,7 +44,11 @@ http = "1.1.0"
|
|
|
36
44
|
tokio = { version = "1.42.0", features = ["full"] }
|
|
37
45
|
async-trait = "0.1"
|
|
38
46
|
tracing = "0.1"
|
|
39
|
-
tracing-subscriber = { version = "0.3", features = [
|
|
47
|
+
tracing-subscriber = { version = "0.3", features = [
|
|
48
|
+
"env-filter",
|
|
49
|
+
"json",
|
|
50
|
+
"chrono",
|
|
51
|
+
] }
|
|
40
52
|
tracing-log = "0.2"
|
|
41
53
|
tracing-appender = "0.2.3"
|
|
42
54
|
chrono = "0.4"
|
|
@@ -52,10 +64,14 @@ regex = "1.10"
|
|
|
52
64
|
url = "2.5.4"
|
|
53
65
|
tokio-stream = { version = "0.1", features = ["sync"] }
|
|
54
66
|
anyhow = "1.0"
|
|
67
|
+
dotenvy = "0.15"
|
|
55
68
|
tokenizers = { version = "0.22.2" }
|
|
56
69
|
tiktoken-rs = { version = "0.7.0" }
|
|
57
70
|
minijinja = { version = "2.0" }
|
|
58
|
-
rustls = { version = "0.23", default-features = false, features = [
|
|
71
|
+
rustls = { version = "0.23", default-features = false, features = [
|
|
72
|
+
"ring",
|
|
73
|
+
"std",
|
|
74
|
+
] }
|
|
59
75
|
hf-hub = { version = "0.4.3", features = ["tokio"] }
|
|
60
76
|
|
|
61
77
|
# gRPC and Protobuf dependencies
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vllm-router
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.10
|
|
4
4
|
Summary: High-performance Rust-based load balancer for VLLM with multiple routing algorithms and prefill-decode disaggregation support
|
|
5
5
|
Author-email: Byron Hsu <byronhsu1230@gmail.com>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -64,7 +64,12 @@ cargo build --release
|
|
|
64
64
|
```
|
|
65
65
|
|
|
66
66
|
#### Python Package
|
|
67
|
+
Install from PyPI
|
|
67
68
|
```bash
|
|
69
|
+
pip install vllm-router ```
|
|
70
|
+
|
|
71
|
+
To build from source:
|
|
72
|
+
```bash
|
|
68
73
|
pip install setuptools-rust wheel build
|
|
69
74
|
python -m build
|
|
70
75
|
pip install dist/*.whl
|
|
@@ -129,6 +134,19 @@ cargo run --release -- \
|
|
|
129
134
|
|
|
130
135
|
## Configuration
|
|
131
136
|
|
|
137
|
+
### Authentication
|
|
138
|
+
|
|
139
|
+
Enable bearer-token validation by listing validation URLs (comma-separated) in `.env` via `API_KEY_VALIDATION_URLS` or passing `--api-key-validation-urls`.
|
|
140
|
+
When set, all HTTP endpoints require `Authorization: Bearer <token>` and tokens are validated with HTTP 200 responses.
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
# .env
|
|
144
|
+
API_KEY_VALIDATION_URLS=https://codebase.helmholtz.cloud/api/v4/user
|
|
145
|
+
|
|
146
|
+
# CLI override
|
|
147
|
+
vllm-router --api-key-validation-urls https://codebase.helmholtz.cloud/api/v4/user
|
|
148
|
+
```
|
|
149
|
+
|
|
132
150
|
### Metrics
|
|
133
151
|
|
|
134
152
|
Prometheus metrics endpoint available at `127.0.0.1:29000` by default.
|
|
@@ -42,7 +42,12 @@ cargo build --release
|
|
|
42
42
|
```
|
|
43
43
|
|
|
44
44
|
#### Python Package
|
|
45
|
+
Install from PyPI
|
|
45
46
|
```bash
|
|
47
|
+
pip install vllm-router ```
|
|
48
|
+
|
|
49
|
+
To build from source:
|
|
50
|
+
```bash
|
|
46
51
|
pip install setuptools-rust wheel build
|
|
47
52
|
python -m build
|
|
48
53
|
pip install dist/*.whl
|
|
@@ -107,6 +112,19 @@ cargo run --release -- \
|
|
|
107
112
|
|
|
108
113
|
## Configuration
|
|
109
114
|
|
|
115
|
+
### Authentication
|
|
116
|
+
|
|
117
|
+
Enable bearer-token validation by listing validation URLs (comma-separated) in `.env` via `API_KEY_VALIDATION_URLS` or passing `--api-key-validation-urls`.
|
|
118
|
+
When set, all HTTP endpoints require `Authorization: Bearer <token>` and tokens are validated with HTTP 200 responses.
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
# .env
|
|
122
|
+
API_KEY_VALIDATION_URLS=https://codebase.helmholtz.cloud/api/v4/user
|
|
123
|
+
|
|
124
|
+
# CLI override
|
|
125
|
+
vllm-router --api-key-validation-urls https://codebase.helmholtz.cloud/api/v4/user
|
|
126
|
+
```
|
|
127
|
+
|
|
110
128
|
### Metrics
|
|
111
129
|
|
|
112
130
|
Prometheus metrics endpoint available at `127.0.0.1:29000` by default.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vllm-router
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.10
|
|
4
4
|
Summary: High-performance Rust-based load balancer for VLLM with multiple routing algorithms and prefill-decode disaggregation support
|
|
5
5
|
Author-email: Byron Hsu <byronhsu1230@gmail.com>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -64,7 +64,12 @@ cargo build --release
|
|
|
64
64
|
```
|
|
65
65
|
|
|
66
66
|
#### Python Package
|
|
67
|
+
Install from PyPI
|
|
67
68
|
```bash
|
|
69
|
+
pip install vllm-router ```
|
|
70
|
+
|
|
71
|
+
To build from source:
|
|
72
|
+
```bash
|
|
68
73
|
pip install setuptools-rust wheel build
|
|
69
74
|
python -m build
|
|
70
75
|
pip install dist/*.whl
|
|
@@ -129,6 +134,19 @@ cargo run --release -- \
|
|
|
129
134
|
|
|
130
135
|
## Configuration
|
|
131
136
|
|
|
137
|
+
### Authentication
|
|
138
|
+
|
|
139
|
+
Enable bearer-token validation by listing validation URLs (comma-separated) in `.env` via `API_KEY_VALIDATION_URLS` or passing `--api-key-validation-urls`.
|
|
140
|
+
When set, all HTTP endpoints require `Authorization: Bearer <token>` and tokens are validated with HTTP 200 responses.
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
# .env
|
|
144
|
+
API_KEY_VALIDATION_URLS=https://codebase.helmholtz.cloud/api/v4/user
|
|
145
|
+
|
|
146
|
+
# CLI override
|
|
147
|
+
vllm-router --api-key-validation-urls https://codebase.helmholtz.cloud/api/v4/user
|
|
148
|
+
```
|
|
149
|
+
|
|
132
150
|
### Metrics
|
|
133
151
|
|
|
134
152
|
Prometheus metrics endpoint available at `127.0.0.1:29000` by default.
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "vllm-router"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.10"
|
|
8
8
|
description = "High-performance Rust-based load balancer for VLLM with multiple routing algorithms and prefill-decode disaggregation support"
|
|
9
9
|
authors = [{name = "Byron Hsu", email = "byronhsu1230@gmail.com"}]
|
|
10
10
|
requires-python = ">=3.8"
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
use super::ConfigResult;
|
|
2
|
+
use crate::config::validation::ConfigValidator;
|
|
2
3
|
use serde::{Deserialize, Serialize};
|
|
3
4
|
use std::collections::HashMap;
|
|
4
5
|
|
|
@@ -29,6 +30,9 @@ pub struct RouterConfig {
|
|
|
29
30
|
pub intra_node_data_parallel_size: usize,
|
|
30
31
|
/// The api key used for the authorization with the worker
|
|
31
32
|
pub api_key: Option<String>,
|
|
33
|
+
/// API key validation URLs (if set, incoming requests must validate against them)
|
|
34
|
+
#[serde(default)]
|
|
35
|
+
pub api_key_validation_urls: Vec<String>,
|
|
32
36
|
/// Service discovery configuration (optional)
|
|
33
37
|
pub discovery: Option<DiscoveryConfig>,
|
|
34
38
|
/// Metrics configuration (optional)
|
|
@@ -417,6 +421,7 @@ impl Default for RouterConfig {
|
|
|
417
421
|
worker_startup_check_interval_secs: 30,
|
|
418
422
|
intra_node_data_parallel_size: 1,
|
|
419
423
|
api_key: None,
|
|
424
|
+
api_key_validation_urls: vec![],
|
|
420
425
|
discovery: None,
|
|
421
426
|
metrics: None,
|
|
422
427
|
log_dir: None,
|
|
@@ -455,7 +460,7 @@ impl RouterConfig {
|
|
|
455
460
|
|
|
456
461
|
/// Validate the configuration
|
|
457
462
|
pub fn validate(&self) -> ConfigResult<()> {
|
|
458
|
-
|
|
463
|
+
ConfigValidator::validate(self)
|
|
459
464
|
}
|
|
460
465
|
|
|
461
466
|
/// Get the routing mode type as a string
|
|
@@ -980,6 +985,7 @@ mod tests {
|
|
|
980
985
|
worker_startup_check_interval_secs: 5,
|
|
981
986
|
intra_node_data_parallel_size: 1,
|
|
982
987
|
api_key: None,
|
|
988
|
+
api_key_validation_urls: vec![],
|
|
983
989
|
discovery: Some(DiscoveryConfig {
|
|
984
990
|
enabled: true,
|
|
985
991
|
namespace: Some("vllm".to_string()),
|
|
@@ -1046,6 +1052,7 @@ mod tests {
|
|
|
1046
1052
|
worker_startup_check_interval_secs: 15,
|
|
1047
1053
|
intra_node_data_parallel_size: 1,
|
|
1048
1054
|
api_key: None,
|
|
1055
|
+
api_key_validation_urls: vec![],
|
|
1049
1056
|
discovery: Some(DiscoveryConfig {
|
|
1050
1057
|
enabled: true,
|
|
1051
1058
|
namespace: None,
|
|
@@ -1103,6 +1110,7 @@ mod tests {
|
|
|
1103
1110
|
worker_startup_check_interval_secs: 20,
|
|
1104
1111
|
intra_node_data_parallel_size: 1,
|
|
1105
1112
|
api_key: None,
|
|
1113
|
+
api_key_validation_urls: vec![],
|
|
1106
1114
|
discovery: Some(DiscoveryConfig {
|
|
1107
1115
|
enabled: true,
|
|
1108
1116
|
namespace: Some("production".to_string()),
|
|
@@ -158,7 +158,7 @@ impl ResponseStorage for MemoryResponseStorage {
|
|
|
158
158
|
.collect();
|
|
159
159
|
|
|
160
160
|
// Sort by creation time (newest first)
|
|
161
|
-
responses_with_time.
|
|
161
|
+
responses_with_time.sort_by_key(|b| std::cmp::Reverse(b.0));
|
|
162
162
|
|
|
163
163
|
// Apply limit and collect the actual responses
|
|
164
164
|
let limit = limit.unwrap_or(responses_with_time.len());
|
|
@@ -44,7 +44,9 @@ struct Router {
|
|
|
44
44
|
max_payload_size: usize,
|
|
45
45
|
intra_node_data_parallel_size: usize,
|
|
46
46
|
api_key: Option<String>,
|
|
47
|
+
api_key_validation_urls: Vec<String>,
|
|
47
48
|
log_dir: Option<String>,
|
|
49
|
+
|
|
48
50
|
log_level: Option<String>,
|
|
49
51
|
service_discovery: bool,
|
|
50
52
|
selector: HashMap<String, String>,
|
|
@@ -201,6 +203,7 @@ impl Router {
|
|
|
201
203
|
worker_startup_check_interval_secs: self.worker_startup_check_interval,
|
|
202
204
|
intra_node_data_parallel_size: self.intra_node_data_parallel_size,
|
|
203
205
|
api_key: self.api_key.clone(),
|
|
206
|
+
api_key_validation_urls: self.api_key_validation_urls.clone(),
|
|
204
207
|
discovery,
|
|
205
208
|
metrics,
|
|
206
209
|
log_dir: self.log_dir.clone(),
|
|
@@ -261,6 +264,7 @@ impl Router {
|
|
|
261
264
|
max_payload_size = 512 * 1024 * 1024, // 512MB default for large batches
|
|
262
265
|
intra_node_data_parallel_size = 1,
|
|
263
266
|
api_key = None,
|
|
267
|
+
api_key_validation_urls = vec![],
|
|
264
268
|
log_dir = None,
|
|
265
269
|
log_level = None,
|
|
266
270
|
service_discovery = false,
|
|
@@ -326,6 +330,7 @@ impl Router {
|
|
|
326
330
|
max_payload_size: usize,
|
|
327
331
|
intra_node_data_parallel_size: usize,
|
|
328
332
|
api_key: Option<String>,
|
|
333
|
+
api_key_validation_urls: Vec<String>,
|
|
329
334
|
log_dir: Option<String>,
|
|
330
335
|
log_level: Option<String>,
|
|
331
336
|
service_discovery: bool,
|
|
@@ -402,6 +407,7 @@ impl Router {
|
|
|
402
407
|
max_payload_size,
|
|
403
408
|
intra_node_data_parallel_size,
|
|
404
409
|
api_key,
|
|
410
|
+
api_key_validation_urls,
|
|
405
411
|
log_dir,
|
|
406
412
|
log_level,
|
|
407
413
|
service_discovery,
|
|
@@ -191,6 +191,10 @@ struct CliArgs {
|
|
|
191
191
|
#[arg(long)]
|
|
192
192
|
api_key: Option<String>,
|
|
193
193
|
|
|
194
|
+
/// API key validation URLs (defaults to env file)
|
|
195
|
+
#[arg(long, num_args = 0..)]
|
|
196
|
+
api_key_validation_urls: Vec<String>,
|
|
197
|
+
|
|
194
198
|
/// Backend to route requests to (vllm, trtllm, openai, anthropic)
|
|
195
199
|
#[arg(long, value_enum, default_value_t = Backend::Vllm, alias = "runtime")]
|
|
196
200
|
backend: Backend,
|
|
@@ -540,6 +544,18 @@ impl CliArgs {
|
|
|
540
544
|
_ => Self::determine_connection_mode(&all_urls),
|
|
541
545
|
};
|
|
542
546
|
|
|
547
|
+
let api_key_validation_urls = if !self.api_key_validation_urls.is_empty() {
|
|
548
|
+
self.api_key_validation_urls.clone()
|
|
549
|
+
} else if let Ok(raw_urls) = std::env::var("API_KEY_VALIDATION_URLS") {
|
|
550
|
+
raw_urls
|
|
551
|
+
.split(',')
|
|
552
|
+
.map(|url| url.trim().to_string())
|
|
553
|
+
.filter(|url| !url.is_empty())
|
|
554
|
+
.collect()
|
|
555
|
+
} else {
|
|
556
|
+
Vec::new()
|
|
557
|
+
};
|
|
558
|
+
|
|
543
559
|
// Build RouterConfig
|
|
544
560
|
Ok(RouterConfig {
|
|
545
561
|
mode,
|
|
@@ -553,6 +569,7 @@ impl CliArgs {
|
|
|
553
569
|
worker_startup_check_interval_secs: self.worker_startup_check_interval,
|
|
554
570
|
intra_node_data_parallel_size: self.intra_node_data_parallel_size,
|
|
555
571
|
api_key: self.api_key.clone(),
|
|
572
|
+
api_key_validation_urls,
|
|
556
573
|
discovery,
|
|
557
574
|
metrics,
|
|
558
575
|
log_dir: self.log_dir.clone(),
|
|
@@ -646,6 +663,7 @@ impl CliArgs {
|
|
|
646
663
|
}
|
|
647
664
|
|
|
648
665
|
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
666
|
+
dotenvy::dotenv().ok();
|
|
649
667
|
println!("DEBUG: Main function started");
|
|
650
668
|
|
|
651
669
|
// Parse prefill arguments manually before clap parsing
|