vllm-router 0.1.9__tar.gz → 0.1.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. {vllm_router-0.1.9 → vllm_router-0.1.10}/Cargo.toml +19 -3
  2. {vllm_router-0.1.9/py_src/vllm_router.egg-info → vllm_router-0.1.10}/PKG-INFO +19 -1
  3. {vllm_router-0.1.9 → vllm_router-0.1.10}/README.md +18 -0
  4. {vllm_router-0.1.9 → vllm_router-0.1.10/py_src/vllm_router.egg-info}/PKG-INFO +19 -1
  5. {vllm_router-0.1.9 → vllm_router-0.1.10}/pyproject.toml +1 -1
  6. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/config/types.rs +9 -1
  7. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/data_connector/response_memory_store.rs +1 -1
  8. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/lib.rs +6 -0
  9. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/main.rs +18 -0
  10. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/policies/cache_aware.rs +226 -203
  11. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/policies/mod.rs +25 -0
  12. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/protocols/spec.rs +597 -1
  13. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/protocols/validation.rs +2 -0
  14. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/header_utils.rs +44 -0
  15. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/http/dp_utils.rs +6 -0
  16. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/http/router.rs +4 -1
  17. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/http/vllm_pd_router.rs +125 -49
  18. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/server.rs +212 -10
  19. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/service_discovery.rs +2 -0
  20. vllm_router-0.1.10/src/tree.rs +2312 -0
  21. vllm_router-0.1.9/src/tree.rs +0 -1478
  22. {vllm_router-0.1.9 → vllm_router-0.1.10}/MANIFEST.in +0 -0
  23. {vllm_router-0.1.9 → vllm_router-0.1.10}/build.rs +0 -0
  24. {vllm_router-0.1.9 → vllm_router-0.1.10}/py_src/vllm_router/__init__.py +0 -0
  25. {vllm_router-0.1.9 → vllm_router-0.1.10}/py_src/vllm_router/launch_router.py +0 -0
  26. {vllm_router-0.1.9 → vllm_router-0.1.10}/py_src/vllm_router/mini_lb.py +0 -0
  27. {vllm_router-0.1.9 → vllm_router-0.1.10}/py_src/vllm_router/router.py +0 -0
  28. {vllm_router-0.1.9 → vllm_router-0.1.10}/py_src/vllm_router/router_args.py +0 -0
  29. {vllm_router-0.1.9 → vllm_router-0.1.10}/py_src/vllm_router/version.py +0 -0
  30. {vllm_router-0.1.9 → vllm_router-0.1.10}/py_src/vllm_router.egg-info/SOURCES.txt +0 -0
  31. {vllm_router-0.1.9 → vllm_router-0.1.10}/py_src/vllm_router.egg-info/dependency_links.txt +0 -0
  32. {vllm_router-0.1.9 → vllm_router-0.1.10}/py_src/vllm_router.egg-info/entry_points.txt +0 -0
  33. {vllm_router-0.1.9 → vllm_router-0.1.10}/py_src/vllm_router.egg-info/not-zip-safe +0 -0
  34. {vllm_router-0.1.9 → vllm_router-0.1.10}/py_src/vllm_router.egg-info/requires.txt +0 -0
  35. {vllm_router-0.1.9 → vllm_router-0.1.10}/py_src/vllm_router.egg-info/top_level.txt +0 -0
  36. {vllm_router-0.1.9 → vllm_router-0.1.10}/setup.cfg +0 -0
  37. {vllm_router-0.1.9 → vllm_router-0.1.10}/setup.py +0 -0
  38. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/config/mod.rs +0 -0
  39. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/config/validation.rs +0 -0
  40. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/core/circuit_breaker.rs +0 -0
  41. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/core/error.rs +0 -0
  42. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/core/mod.rs +0 -0
  43. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/core/retry.rs +0 -0
  44. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/core/token_bucket.rs +0 -0
  45. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/core/worker.rs +0 -0
  46. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/core/worker_registry.rs +0 -0
  47. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/data_connector/mod.rs +0 -0
  48. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/data_connector/response_noop_store.rs +0 -0
  49. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/data_connector/responses.rs +0 -0
  50. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/grpc/client.rs +0 -0
  51. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/grpc/mod.rs +0 -0
  52. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/handler.rs +0 -0
  53. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/logger.rs +0 -0
  54. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/logging.rs +0 -0
  55. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/metrics.rs +0 -0
  56. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/middleware.rs +0 -0
  57. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/policies/consistent_hash.rs +0 -0
  58. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/policies/factory.rs +0 -0
  59. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/policies/power_of_two.rs +0 -0
  60. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/policies/random.rs +0 -0
  61. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/policies/registry.rs +0 -0
  62. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/policies/round_robin.rs +0 -0
  63. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/proto/vllm_scheduler.proto +0 -0
  64. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/protocols/mod.rs +0 -0
  65. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/protocols/worker_spec.rs +0 -0
  66. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/factory.rs +0 -0
  67. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/grpc/mod.rs +0 -0
  68. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/grpc/pd_router.rs +0 -0
  69. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/grpc/router.rs +0 -0
  70. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/http/logprobs_merge.rs +0 -0
  71. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/http/mod.rs +0 -0
  72. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/http/openai_router.rs +0 -0
  73. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/http/pd_router.rs +0 -0
  74. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/http/pd_types.rs +0 -0
  75. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/http/vllm_service_discovery.rs +0 -0
  76. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/mod.rs +0 -0
  77. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routers/router_manager.rs +0 -0
  78. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routes/interface.rs +0 -0
  79. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routes/mod.rs +0 -0
  80. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routes/pool_route.rs +0 -0
  81. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routes/prefill_decode_route.rs +0 -0
  82. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routes/round_robin_route.rs +0 -0
  83. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routes/routing_tree_builder.rs +0 -0
  84. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/routes/single_server_route.rs +0 -0
  85. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/tokenizer/chat_template.rs +0 -0
  86. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/tokenizer/factory.rs +0 -0
  87. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/tokenizer/hub.rs +0 -0
  88. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/tokenizer/huggingface.rs +0 -0
  89. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/tokenizer/mock.rs +0 -0
  90. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/tokenizer/mod.rs +0 -0
  91. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/tokenizer/sequence.rs +0 -0
  92. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/tokenizer/stop.rs +0 -0
  93. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/tokenizer/stream.rs +0 -0
  94. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/tokenizer/tests.rs +0 -0
  95. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/tokenizer/tiktoken.rs +0 -0
  96. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/tokenizer/traits.rs +0 -0
  97. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/types.rs +0 -0
  98. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/utils/json.rs +0 -0
  99. {vllm_router-0.1.9 → vllm_router-0.1.10}/src/utils/mod.rs +0 -0
@@ -22,7 +22,15 @@ path = "src/main.rs"
22
22
  clap = { version = "4", features = ["derive"] }
23
23
  axum = { version = "0.8.4", features = ["macros", "ws", "tracing"] }
24
24
  tower = { version = "0.5", features = ["full"] }
25
- tower-http = { version = "0.6", features = ["trace", "compression-gzip", "cors", "timeout", "limit", "request-id", "util"] }
25
+ tower-http = { version = "0.6", features = [
26
+ "trace",
27
+ "compression-gzip",
28
+ "cors",
29
+ "timeout",
30
+ "limit",
31
+ "request-id",
32
+ "util",
33
+ ] }
26
34
  serde = { version = "1.0", features = ["derive"] }
27
35
  serde_json = "1.0"
28
36
  bytes = "1.8.0"
@@ -36,7 +44,11 @@ http = "1.1.0"
36
44
  tokio = { version = "1.42.0", features = ["full"] }
37
45
  async-trait = "0.1"
38
46
  tracing = "0.1"
39
- tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "chrono"] }
47
+ tracing-subscriber = { version = "0.3", features = [
48
+ "env-filter",
49
+ "json",
50
+ "chrono",
51
+ ] }
40
52
  tracing-log = "0.2"
41
53
  tracing-appender = "0.2.3"
42
54
  chrono = "0.4"
@@ -52,10 +64,14 @@ regex = "1.10"
52
64
  url = "2.5.4"
53
65
  tokio-stream = { version = "0.1", features = ["sync"] }
54
66
  anyhow = "1.0"
67
+ dotenvy = "0.15"
55
68
  tokenizers = { version = "0.22.2" }
56
69
  tiktoken-rs = { version = "0.7.0" }
57
70
  minijinja = { version = "2.0" }
58
- rustls = { version = "0.23", default-features = false, features = ["ring", "std"] }
71
+ rustls = { version = "0.23", default-features = false, features = [
72
+ "ring",
73
+ "std",
74
+ ] }
59
75
  hf-hub = { version = "0.4.3", features = ["tokio"] }
60
76
 
61
77
  # gRPC and Protobuf dependencies
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vllm-router
3
- Version: 0.1.9
3
+ Version: 0.1.10
4
4
  Summary: High-performance Rust-based load balancer for VLLM with multiple routing algorithms and prefill-decode disaggregation support
5
5
  Author-email: Byron Hsu <byronhsu1230@gmail.com>
6
6
  License: Apache-2.0
@@ -64,7 +64,12 @@ cargo build --release
64
64
  ```
65
65
 
66
66
  #### Python Package
67
+ Install from PyPI
67
68
  ```bash
69
+ pip install vllm-router ```
70
+
71
+ To build from source:
72
+ ```bash
68
73
  pip install setuptools-rust wheel build
69
74
  python -m build
70
75
  pip install dist/*.whl
@@ -129,6 +134,19 @@ cargo run --release -- \
129
134
 
130
135
  ## Configuration
131
136
 
137
+ ### Authentication
138
+
139
+ Enable bearer-token validation by listing validation URLs (comma-separated) in `.env` via `API_KEY_VALIDATION_URLS` or passing `--api-key-validation-urls`.
140
+ When set, all HTTP endpoints require `Authorization: Bearer <token>` and tokens are validated with HTTP 200 responses.
141
+
142
+ ```bash
143
+ # .env
144
+ API_KEY_VALIDATION_URLS=https://codebase.helmholtz.cloud/api/v4/user
145
+
146
+ # CLI override
147
+ vllm-router --api-key-validation-urls https://codebase.helmholtz.cloud/api/v4/user
148
+ ```
149
+
132
150
  ### Metrics
133
151
 
134
152
  Prometheus metrics endpoint available at `127.0.0.1:29000` by default.
@@ -42,7 +42,12 @@ cargo build --release
42
42
  ```
43
43
 
44
44
  #### Python Package
45
+ Install from PyPI
45
46
  ```bash
47
+ pip install vllm-router ```
48
+
49
+ To build from source:
50
+ ```bash
46
51
  pip install setuptools-rust wheel build
47
52
  python -m build
48
53
  pip install dist/*.whl
@@ -107,6 +112,19 @@ cargo run --release -- \
107
112
 
108
113
  ## Configuration
109
114
 
115
+ ### Authentication
116
+
117
+ Enable bearer-token validation by listing validation URLs (comma-separated) in `.env` via `API_KEY_VALIDATION_URLS` or passing `--api-key-validation-urls`.
118
+ When set, all HTTP endpoints require `Authorization: Bearer <token>` and tokens are validated with HTTP 200 responses.
119
+
120
+ ```bash
121
+ # .env
122
+ API_KEY_VALIDATION_URLS=https://codebase.helmholtz.cloud/api/v4/user
123
+
124
+ # CLI override
125
+ vllm-router --api-key-validation-urls https://codebase.helmholtz.cloud/api/v4/user
126
+ ```
127
+
110
128
  ### Metrics
111
129
 
112
130
  Prometheus metrics endpoint available at `127.0.0.1:29000` by default.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vllm-router
3
- Version: 0.1.9
3
+ Version: 0.1.10
4
4
  Summary: High-performance Rust-based load balancer for VLLM with multiple routing algorithms and prefill-decode disaggregation support
5
5
  Author-email: Byron Hsu <byronhsu1230@gmail.com>
6
6
  License: Apache-2.0
@@ -64,7 +64,12 @@ cargo build --release
64
64
  ```
65
65
 
66
66
  #### Python Package
67
+ Install from PyPI
67
68
  ```bash
69
+ pip install vllm-router ```
70
+
71
+ To build from source:
72
+ ```bash
68
73
  pip install setuptools-rust wheel build
69
74
  python -m build
70
75
  pip install dist/*.whl
@@ -129,6 +134,19 @@ cargo run --release -- \
129
134
 
130
135
  ## Configuration
131
136
 
137
+ ### Authentication
138
+
139
+ Enable bearer-token validation by listing validation URLs (comma-separated) in `.env` via `API_KEY_VALIDATION_URLS` or passing `--api-key-validation-urls`.
140
+ When set, all HTTP endpoints require `Authorization: Bearer <token>` and tokens are validated with HTTP 200 responses.
141
+
142
+ ```bash
143
+ # .env
144
+ API_KEY_VALIDATION_URLS=https://codebase.helmholtz.cloud/api/v4/user
145
+
146
+ # CLI override
147
+ vllm-router --api-key-validation-urls https://codebase.helmholtz.cloud/api/v4/user
148
+ ```
149
+
132
150
  ### Metrics
133
151
 
134
152
  Prometheus metrics endpoint available at `127.0.0.1:29000` by default.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "vllm-router"
7
- version = "0.1.9"
7
+ version = "0.1.10"
8
8
  description = "High-performance Rust-based load balancer for VLLM with multiple routing algorithms and prefill-decode disaggregation support"
9
9
  authors = [{name = "Byron Hsu", email = "byronhsu1230@gmail.com"}]
10
10
  requires-python = ">=3.8"
@@ -1,4 +1,5 @@
1
1
  use super::ConfigResult;
2
+ use crate::config::validation::ConfigValidator;
2
3
  use serde::{Deserialize, Serialize};
3
4
  use std::collections::HashMap;
4
5
 
@@ -29,6 +30,9 @@ pub struct RouterConfig {
29
30
  pub intra_node_data_parallel_size: usize,
30
31
  /// The api key used for the authorization with the worker
31
32
  pub api_key: Option<String>,
33
+ /// API key validation URLs (if set, incoming requests must validate against them)
34
+ #[serde(default)]
35
+ pub api_key_validation_urls: Vec<String>,
32
36
  /// Service discovery configuration (optional)
33
37
  pub discovery: Option<DiscoveryConfig>,
34
38
  /// Metrics configuration (optional)
@@ -417,6 +421,7 @@ impl Default for RouterConfig {
417
421
  worker_startup_check_interval_secs: 30,
418
422
  intra_node_data_parallel_size: 1,
419
423
  api_key: None,
424
+ api_key_validation_urls: vec![],
420
425
  discovery: None,
421
426
  metrics: None,
422
427
  log_dir: None,
@@ -455,7 +460,7 @@ impl RouterConfig {
455
460
 
456
461
  /// Validate the configuration
457
462
  pub fn validate(&self) -> ConfigResult<()> {
458
- crate::config::validation::ConfigValidator::validate(self)
463
+ ConfigValidator::validate(self)
459
464
  }
460
465
 
461
466
  /// Get the routing mode type as a string
@@ -980,6 +985,7 @@ mod tests {
980
985
  worker_startup_check_interval_secs: 5,
981
986
  intra_node_data_parallel_size: 1,
982
987
  api_key: None,
988
+ api_key_validation_urls: vec![],
983
989
  discovery: Some(DiscoveryConfig {
984
990
  enabled: true,
985
991
  namespace: Some("vllm".to_string()),
@@ -1046,6 +1052,7 @@ mod tests {
1046
1052
  worker_startup_check_interval_secs: 15,
1047
1053
  intra_node_data_parallel_size: 1,
1048
1054
  api_key: None,
1055
+ api_key_validation_urls: vec![],
1049
1056
  discovery: Some(DiscoveryConfig {
1050
1057
  enabled: true,
1051
1058
  namespace: None,
@@ -1103,6 +1110,7 @@ mod tests {
1103
1110
  worker_startup_check_interval_secs: 20,
1104
1111
  intra_node_data_parallel_size: 1,
1105
1112
  api_key: None,
1113
+ api_key_validation_urls: vec![],
1106
1114
  discovery: Some(DiscoveryConfig {
1107
1115
  enabled: true,
1108
1116
  namespace: Some("production".to_string()),
@@ -158,7 +158,7 @@ impl ResponseStorage for MemoryResponseStorage {
158
158
  .collect();
159
159
 
160
160
  // Sort by creation time (newest first)
161
- responses_with_time.sort_by(|a, b| b.0.cmp(&a.0));
161
+ responses_with_time.sort_by_key(|b| std::cmp::Reverse(b.0));
162
162
 
163
163
  // Apply limit and collect the actual responses
164
164
  let limit = limit.unwrap_or(responses_with_time.len());
@@ -44,7 +44,9 @@ struct Router {
44
44
  max_payload_size: usize,
45
45
  intra_node_data_parallel_size: usize,
46
46
  api_key: Option<String>,
47
+ api_key_validation_urls: Vec<String>,
47
48
  log_dir: Option<String>,
49
+
48
50
  log_level: Option<String>,
49
51
  service_discovery: bool,
50
52
  selector: HashMap<String, String>,
@@ -201,6 +203,7 @@ impl Router {
201
203
  worker_startup_check_interval_secs: self.worker_startup_check_interval,
202
204
  intra_node_data_parallel_size: self.intra_node_data_parallel_size,
203
205
  api_key: self.api_key.clone(),
206
+ api_key_validation_urls: self.api_key_validation_urls.clone(),
204
207
  discovery,
205
208
  metrics,
206
209
  log_dir: self.log_dir.clone(),
@@ -261,6 +264,7 @@ impl Router {
261
264
  max_payload_size = 512 * 1024 * 1024, // 512MB default for large batches
262
265
  intra_node_data_parallel_size = 1,
263
266
  api_key = None,
267
+ api_key_validation_urls = vec![],
264
268
  log_dir = None,
265
269
  log_level = None,
266
270
  service_discovery = false,
@@ -326,6 +330,7 @@ impl Router {
326
330
  max_payload_size: usize,
327
331
  intra_node_data_parallel_size: usize,
328
332
  api_key: Option<String>,
333
+ api_key_validation_urls: Vec<String>,
329
334
  log_dir: Option<String>,
330
335
  log_level: Option<String>,
331
336
  service_discovery: bool,
@@ -402,6 +407,7 @@ impl Router {
402
407
  max_payload_size,
403
408
  intra_node_data_parallel_size,
404
409
  api_key,
410
+ api_key_validation_urls,
405
411
  log_dir,
406
412
  log_level,
407
413
  service_discovery,
@@ -191,6 +191,10 @@ struct CliArgs {
191
191
  #[arg(long)]
192
192
  api_key: Option<String>,
193
193
 
194
+ /// API key validation URLs (defaults to env file)
195
+ #[arg(long, num_args = 0..)]
196
+ api_key_validation_urls: Vec<String>,
197
+
194
198
  /// Backend to route requests to (vllm, trtllm, openai, anthropic)
195
199
  #[arg(long, value_enum, default_value_t = Backend::Vllm, alias = "runtime")]
196
200
  backend: Backend,
@@ -540,6 +544,18 @@ impl CliArgs {
540
544
  _ => Self::determine_connection_mode(&all_urls),
541
545
  };
542
546
 
547
+ let api_key_validation_urls = if !self.api_key_validation_urls.is_empty() {
548
+ self.api_key_validation_urls.clone()
549
+ } else if let Ok(raw_urls) = std::env::var("API_KEY_VALIDATION_URLS") {
550
+ raw_urls
551
+ .split(',')
552
+ .map(|url| url.trim().to_string())
553
+ .filter(|url| !url.is_empty())
554
+ .collect()
555
+ } else {
556
+ Vec::new()
557
+ };
558
+
543
559
  // Build RouterConfig
544
560
  Ok(RouterConfig {
545
561
  mode,
@@ -553,6 +569,7 @@ impl CliArgs {
553
569
  worker_startup_check_interval_secs: self.worker_startup_check_interval,
554
570
  intra_node_data_parallel_size: self.intra_node_data_parallel_size,
555
571
  api_key: self.api_key.clone(),
572
+ api_key_validation_urls,
556
573
  discovery,
557
574
  metrics,
558
575
  log_dir: self.log_dir.clone(),
@@ -646,6 +663,7 @@ impl CliArgs {
646
663
  }
647
664
 
648
665
  fn main() -> Result<(), Box<dyn std::error::Error>> {
666
+ dotenvy::dotenv().ok();
649
667
  println!("DEBUG: Main function started");
650
668
 
651
669
  // Parse prefill arguments manually before clap parsing