tb-router-embed 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ # Comma-separated vLLM backend URLs (host:port or full URL)
2
+ # Examples: localhost:8001 (same machine) or 192.168.86.173:8001,192.168.86.176:8001
3
+ EMBEDDING_BACKENDS=192.168.86.173:8001,192.168.86.176:8001
4
+
5
+ # Routing strategy: failover (default) or round_robin
6
+ ROUTER_STRATEGY=failover
7
+
8
+ # Port for the router (do not use 8001; reserved for vLLM backends)
9
+ ROUTER_PORT=8011
10
+
11
+ # Max concurrent requests to backends; excess requests wait in queue
12
+ ROUTER_MAX_CONCURRENT=20
@@ -0,0 +1,27 @@
1
+ name: Build
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ build:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+
15
+ - name: Set up Python
16
+ uses: actions/setup-python@v5
17
+ with:
18
+ python-version: "3.11"
19
+
20
+ - name: Install
21
+ run: pip install -e ".[dev]"
22
+
23
+ - name: Test
24
+ run: pytest tests/ -v
25
+
26
+ - name: Build package
27
+ run: pip install build && python -m build
@@ -0,0 +1,30 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ workflow_dispatch:
7
+
8
+ jobs:
9
+ deploy:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+
14
+ - name: Set up Python
15
+ uses: actions/setup-python@v5
16
+ with:
17
+ python-version: "3.11"
18
+
19
+ - name: Install build dependencies
20
+ run: |
21
+ python -m pip install --upgrade pip
22
+ pip install build twine
23
+
24
+ - name: Build distribution
25
+ run: python -m build
26
+
27
+ - name: Publish to PyPI
28
+ uses: pypa/gh-action-pypi-publish@release/v1
29
+ with:
30
+ password: ${{ secrets.PYPI_API_TOKEN }}
@@ -0,0 +1,23 @@
1
+ # Build
2
+ build/
3
+ dist/
4
+ *.egg-info/
5
+ *.egg
6
+
7
+ # Python
8
+ __pycache__/
9
+ *.py[cod]
10
+ *$py.class
11
+ .pytest_cache/
12
+ .venv/
13
+ venv/
14
+ env/
15
+
16
+ # Env (keep .env.example)
17
+ .env
18
+ .env.local
19
+
20
+ # IDE
21
+ .idea/
22
+ .vscode/
23
+ *.swp
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Layers
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,115 @@
1
+ Metadata-Version: 2.4
2
+ Name: tb-router-embed
3
+ Version: 1.0.1
4
+ Summary: HTTP proxy router for vLLM embedding API with round-robin and failover
5
+ License-Expression: MIT
6
+ License-File: LICENSE
7
+ Keywords: embedding,load-balancer,rag,router,vllm
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Requires-Python: >=3.11
13
+ Requires-Dist: fastapi>=0.100.0
14
+ Requires-Dist: httpx>=0.20.0
15
+ Requires-Dist: python-dotenv>=1.0.0
16
+ Requires-Dist: uvicorn[standard]>=0.20.0
17
+ Provides-Extra: dev
18
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
19
+ Requires-Dist: pytest>=7.0; extra == 'dev'
20
+ Description-Content-Type: text/markdown
21
+
22
+ # router-embed
23
+
24
+ HTTP proxy router for vLLM embedding API. Routes `/v1/embeddings` and `/v1/models` requests to multiple vLLM backends with configurable round-robin or failover strategies.
25
+
26
+ ## Setup
27
+
28
+ ```bash
29
+ python3.11 -m venv venv
30
+ source venv/bin/activate
31
+ pip install -r requirements.txt
32
+ pip install -e .
33
+ ```
34
+
35
+ ## Run
36
+
37
+ ```bash
38
+ cp .env.example .env # edit EMBEDDING_BACKENDS for your vLLM servers
39
+ source venv/bin/activate
40
+ python -m router_embed.main
41
+ ```
42
+
43
+ Or `router-embed` (after activating venv). If `router-embed` isn't found, use `./venv/bin/python -m router_embed.main` with no activation.
44
+
45
+ Configure clients (ingest/retrieve layers): set `EMBEDDING_URL=http://<router-host>:8011` instead of pointing directly at a vLLM server.
46
+
47
+ ## Deploy (simple)
48
+
49
+ ```bash
50
+ # From the project directory
51
+ source venv/bin/activate
52
+ cp .env.example .env # edit EMBEDDING_BACKENDS
53
+ nohup python -m router_embed.main &
54
+ ```
55
+
56
+ Or run in foreground (logs to terminal):
57
+ ```bash
58
+ source venv/bin/activate && python -m router_embed.main
59
+ ```
60
+
61
+ ## Configuration
62
+
63
+ | Variable | Default | Description |
64
+ |----------|---------|-------------|
65
+ | `EMBEDDING_BACKENDS` | `192.168.86.173:8001,192.168.86.176:8001` | Comma-separated backend URLs |
66
+ | `ROUTER_STRATEGY` | `failover` | `failover` or `round_robin` |
67
+ | `ROUTER_PORT` | `8011` | Port for the router (do not use 8001) |
68
+ | `ROUTER_MAX_CONCURRENT` | `20` | Max concurrent requests; excess wait in queue |
69
+
70
+ ## API
71
+
72
+ | Endpoint | Method | Description |
73
+ |----------|--------|-------------|
74
+ | `/v1/embeddings` | POST | Forward to backend (OpenAI-compatible) |
75
+ | `/v1/models` | GET | Forward to backend |
76
+ | `/health` | GET | Health check |
77
+
78
+ Examples:
79
+ ```bash
80
+ # Embeddings
81
+ curl -X POST http://localhost:8011/v1/embeddings \
82
+ -H "Content-Type: application/json" \
83
+ -d '{"model": "BAAI/bge-m3", "input": "hello world"}'
84
+
85
+ # Models
86
+ curl http://localhost:8011/v1/models
87
+
88
+ # Health
89
+ curl http://localhost:8011/health
90
+ ```
91
+
92
+ ## Troubleshooting
93
+
94
+ **"address already in use" (port 8011)** — Another process is using the port. Stop it: `lsof -ti:8011 | xargs kill` (macOS/Linux). Or set `ROUTER_PORT=8012` in `.env` to use a different port.
95
+
96
+ **"All backends unavailable: All connection attempts failed"** — The router could not reach any vLLM backend. Ensure:
97
+ 1. vLLM is running on your backend hosts (default: 192.168.86.173:8001, 192.168.86.176:8001)
98
+ 2. Your machine can reach those IPs (same network, no firewall blocking)
99
+ 3. Or set `EMBEDDING_BACKENDS` in `.env` to your vLLM URLs, e.g. `localhost:8001` if vLLM runs on the same machine
100
+
101
+ ## Development
102
+
103
+ ```bash
104
+ pip install -e ".[dev]"
105
+ pytest tests/ -v
106
+ # or: make test
107
+ ```
108
+
109
+ ## Publish to PyPI
110
+
111
+ One-shot via GitHub Actions (see [pypi-hello-world](https://github.com/taixingbi/pypi-hello-world)):
112
+
113
+ 1. Add `PYPI_API_TOKEN` as a repo secret (Settings → Secrets → Actions). Get token at [pypi.org/manage/account/token/](https://pypi.org/manage/account/token/).
114
+ 2. Push to `main` or run **Actions → Publish to PyPI → Run workflow** manually.
115
+ 3. The workflow builds and uploads to PyPI.
@@ -0,0 +1,94 @@
1
+ # router-embed
2
+
3
+ HTTP proxy router for vLLM embedding API. Routes `/v1/embeddings` and `/v1/models` requests to multiple vLLM backends with configurable round-robin or failover strategies.
4
+
5
+ ## Setup
6
+
7
+ ```bash
8
+ python3.11 -m venv venv
9
+ source venv/bin/activate
10
+ pip install -r requirements.txt
11
+ pip install -e .
12
+ ```
13
+
14
+ ## Run
15
+
16
+ ```bash
17
+ cp .env.example .env # edit EMBEDDING_BACKENDS for your vLLM servers
18
+ source venv/bin/activate
19
+ python -m router_embed.main
20
+ ```
21
+
22
+ Or `router-embed` (after activating venv). If `router-embed` isn't found, use `./venv/bin/python -m router_embed.main` with no activation.
23
+
24
+ Configure clients (ingest/retrieve layers): set `EMBEDDING_URL=http://<router-host>:8011` instead of pointing directly at a vLLM server.
25
+
26
+ ## Deploy (simple)
27
+
28
+ ```bash
29
+ # From the project directory
30
+ source venv/bin/activate
31
+ cp .env.example .env # edit EMBEDDING_BACKENDS
32
+ nohup python -m router_embed.main &
33
+ ```
34
+
35
+ Or run in foreground (logs to terminal):
36
+ ```bash
37
+ source venv/bin/activate && python -m router_embed.main
38
+ ```
39
+
40
+ ## Configuration
41
+
42
+ | Variable | Default | Description |
43
+ |----------|---------|-------------|
44
+ | `EMBEDDING_BACKENDS` | `192.168.86.173:8001,192.168.86.176:8001` | Comma-separated backend URLs |
45
+ | `ROUTER_STRATEGY` | `failover` | `failover` or `round_robin` |
46
+ | `ROUTER_PORT` | `8011` | Port for the router (do not use 8001) |
47
+ | `ROUTER_MAX_CONCURRENT` | `20` | Max concurrent requests; excess wait in queue |
48
+
49
+ ## API
50
+
51
+ | Endpoint | Method | Description |
52
+ |----------|--------|-------------|
53
+ | `/v1/embeddings` | POST | Forward to backend (OpenAI-compatible) |
54
+ | `/v1/models` | GET | Forward to backend |
55
+ | `/health` | GET | Health check |
56
+
57
+ Examples:
58
+ ```bash
59
+ # Embeddings
60
+ curl -X POST http://localhost:8011/v1/embeddings \
61
+ -H "Content-Type: application/json" \
62
+ -d '{"model": "BAAI/bge-m3", "input": "hello world"}'
63
+
64
+ # Models
65
+ curl http://localhost:8011/v1/models
66
+
67
+ # Health
68
+ curl http://localhost:8011/health
69
+ ```
70
+
71
+ ## Troubleshooting
72
+
73
+ **"address already in use" (port 8011)** — Another process is using the port. Stop it: `lsof -ti:8011 | xargs kill` (macOS/Linux). Or set `ROUTER_PORT=8012` in `.env` to use a different port.
74
+
75
+ **"All backends unavailable: All connection attempts failed"** — The router could not reach any vLLM backend. Ensure:
76
+ 1. vLLM is running on your backend hosts (default: 192.168.86.173:8001, 192.168.86.176:8001)
77
+ 2. Your machine can reach those IPs (same network, no firewall blocking)
78
+ 3. Or set `EMBEDDING_BACKENDS` in `.env` to your vLLM URLs, e.g. `localhost:8001` if vLLM runs on the same machine
79
+
80
+ ## Development
81
+
82
+ ```bash
83
+ pip install -e ".[dev]"
84
+ pytest tests/ -v
85
+ # or: make test
86
+ ```
87
+
88
+ ## Publish to PyPI
89
+
90
+ One-shot via GitHub Actions (see [pypi-hello-world](https://github.com/taixingbi/pypi-hello-world)):
91
+
92
+ 1. Add `PYPI_API_TOKEN` as a repo secret (Settings → Secrets → Actions). Get token at [pypi.org/manage/account/token/](https://pypi.org/manage/account/token/).
93
+ 2. Push to `main` or run **Actions → Publish to PyPI → Run workflow** manually.
94
+ 3. The workflow builds and uploads to PyPI.
@@ -0,0 +1,36 @@
1
+ [build-system]
2
+ requires = ["hatchling>=1.0"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "tb-router-embed"
7
+ version = "1.0.1"
8
+ description = "HTTP proxy router for vLLM embedding API with round-robin and failover"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.11"
12
+ keywords = ["rag", "embedding", "vllm", "router", "load-balancer"]
13
+ classifiers = [
14
+ "License :: OSI Approved :: MIT License",
15
+ "Programming Language :: Python :: 3",
16
+ "Programming Language :: Python :: 3.11",
17
+ "Programming Language :: Python :: 3.12",
18
+ ]
19
+ dependencies = [
20
+ "httpx>=0.20.0",
21
+ "uvicorn[standard]>=0.20.0",
22
+ "fastapi>=0.100.0",
23
+ "python-dotenv>=1.0.0",
24
+ ]
25
+
26
+ [project.optional-dependencies]
27
+ dev = [
28
+ "pytest>=7.0",
29
+ "pytest-asyncio>=0.21.0",
30
+ ]
31
+
32
+ [project.scripts]
33
+ router-embed = "router_embed.main:run"
34
+
35
+ [tool.hatch.build.targets.wheel]
36
+ packages = ["src/router_embed"]
@@ -0,0 +1,3 @@
1
+ # For PyPI publish: pip install -r requirements-publish.txt
2
+ build>=1.0.0
3
+ twine>=4.0.0
@@ -0,0 +1,5 @@
1
+ # Runtime dependencies (sync with pyproject.toml)
2
+ httpx>=0.20.0
3
+ uvicorn[standard]>=0.20.0
4
+ fastapi>=0.100.0
5
+ python-dotenv>=1.0.0
@@ -0,0 +1,3 @@
1
+ """vLLM embedding router: HTTP proxy with round-robin and failover."""
2
+
3
+ __version__ = "1.0.0"
@@ -0,0 +1,71 @@
1
+ """
2
+ Configuration for vLLM embedding router.
3
+ Loads .env if present; values can be overridden by environment variables or configure().
4
+ """
5
+ import os
6
+
7
+ from dotenv import load_dotenv
8
+
9
+ load_dotenv()
10
+
11
+ _overrides: dict = {}
12
+
13
+ DEFAULT_BACKENDS = "192.168.86.173:8001,192.168.86.176:8001"
14
+ DEFAULT_STRATEGY = "failover"
15
+ DEFAULT_PORT = 8011
16
+ DEFAULT_MAX_CONCURRENT = 20
17
+
18
+
19
+ def configure(
20
+ backends: str | None = None,
21
+ strategy: str | None = None,
22
+ port: int | None = None,
23
+ max_concurrent: int | None = None,
24
+ **kwargs,
25
+ ) -> None:
26
+ """Set configuration overrides (used before starting the server)."""
27
+ global _overrides
28
+ if backends is not None:
29
+ _overrides["backends"] = backends
30
+ if strategy is not None:
31
+ _overrides["strategy"] = strategy
32
+ if port is not None:
33
+ _overrides["port"] = port
34
+ if max_concurrent is not None:
35
+ _overrides["max_concurrent"] = max_concurrent
36
+ for k, v in kwargs.items():
37
+ if v is not None:
38
+ _overrides[k] = v
39
+
40
+
41
+ def get_backends() -> list[str]:
42
+ """Backend URLs: override > env > default. Returns list of base URLs."""
43
+ raw = _overrides.get("backends") or os.getenv("EMBEDDING_BACKENDS", DEFAULT_BACKENDS)
44
+ urls = []
45
+ for b in raw.split(","):
46
+ b = b.strip()
47
+ if not b:
48
+ continue
49
+ if not b.startswith(("http://", "https://")):
50
+ b = f"http://{b}"
51
+ urls.append(b.rstrip("/"))
52
+ return urls
53
+
54
+
55
+ def get_strategy() -> str:
56
+ """Routing strategy: override > env > default. One of round_robin, failover."""
57
+ return _overrides.get("strategy") or os.getenv("ROUTER_STRATEGY", DEFAULT_STRATEGY)
58
+
59
+
60
+ def get_port() -> int:
61
+ """Router port: override > env > default. Do not use 8001 (reserved for vLLM backends)."""
62
+ raw = _overrides.get("port") or os.getenv("ROUTER_PORT", str(DEFAULT_PORT))
63
+ return int(raw)
64
+
65
+
66
+ def get_max_concurrent() -> int:
67
+ """Max concurrent requests to backends; excess wait in queue."""
68
+ raw = _overrides.get("max_concurrent") or os.getenv(
69
+ "ROUTER_MAX_CONCURRENT", str(DEFAULT_MAX_CONCURRENT)
70
+ )
71
+ return int(raw)
@@ -0,0 +1,91 @@
1
+ """FastAPI app: proxy /v1/embeddings and /v1/models to vLLM backends."""
2
+ import sys
3
+ from pathlib import Path
4
+
5
+ if __name__ == "__main__":
6
+ _src = Path(__file__).resolve().parent.parent
7
+ if str(_src) not in sys.path:
8
+ sys.path.insert(0, str(_src))
9
+
10
+ import asyncio
11
+ from contextlib import asynccontextmanager
12
+
13
+ import httpx
14
+ from fastapi import FastAPI, Request
15
+ from fastapi.responses import JSONResponse, Response
16
+
17
+ from router_embed.config import get_backends, get_max_concurrent, get_port
18
+ from router_embed.router import proxy_request
19
+
20
+ _http_client: httpx.AsyncClient | None = None
21
+ _queue: asyncio.Semaphore | None = None
22
+
23
+ _SKIP_HEADERS = frozenset({"host", "content-length"})
24
+
25
+
26
+ def _err(code: int, msg: str) -> JSONResponse:
27
+ return JSONResponse(status_code=code, content={"error": msg})
28
+
29
+
30
+ @asynccontextmanager
31
+ async def lifespan(app: FastAPI):
32
+ global _http_client, _queue
33
+ _http_client = httpx.AsyncClient(timeout=60.0)
34
+ _queue = asyncio.Semaphore(get_max_concurrent())
35
+ yield
36
+ await _http_client.aclose()
37
+ _http_client = None
38
+ _queue = None
39
+
40
+
41
+ app = FastAPI(title="vLLM Embedding Router", lifespan=lifespan)
42
+
43
+
44
+ @app.get("/health")
45
+ async def health():
46
+ """Health check: router is up."""
47
+ return {"status": "ok"}
48
+
49
+
50
+ @app.api_route("/v1/embeddings", methods=["POST"])
51
+ @app.api_route("/v1/models", methods=["GET"])
52
+ async def proxy(request: Request):
53
+ """Forward /v1/embeddings and /v1/models to backend(s)."""
54
+ if _http_client is None or _queue is None:
55
+ return _err(503, "Router not initialized")
56
+
57
+ backends = get_backends()
58
+ if not backends:
59
+ return _err(503, "No backends configured. Set EMBEDDING_BACKENDS.")
60
+
61
+ headers = {k: v for k, v in request.headers.items() if k.lower() not in _SKIP_HEADERS}
62
+
63
+ try:
64
+ async with _queue:
65
+ resp, _ = await proxy_request(
66
+ _http_client,
67
+ request.method,
68
+ request.url.path,
69
+ content=await request.body(),
70
+ headers=headers,
71
+ )
72
+ return Response(content=resp.content, status_code=resp.status_code, headers=dict(resp.headers))
73
+ except (httpx.TimeoutException, httpx.ConnectError) as e:
74
+ return _err(503, f"All backends unavailable: {e!s}")
75
+
76
+
77
+ def run():
78
+ """CLI entry point: run uvicorn server."""
79
+ import uvicorn
80
+
81
+ port = get_port()
82
+ uvicorn.run(
83
+ "router_embed.main:app",
84
+ host="0.0.0.0",
85
+ port=port,
86
+ reload=False,
87
+ )
88
+
89
+
90
+ if __name__ == "__main__":
91
+ run()
@@ -0,0 +1,81 @@
1
+ """Backend selection and proxy logic for vLLM embedding router."""
2
+ import itertools
3
+
4
+ import httpx
5
+
6
+ from router_embed.config import get_backends, get_strategy
7
+
8
+ REQUEST_TIMEOUT = 60.0
9
+ _ROUND_ROBIN_INDEX: itertools.cycle | None = None
10
+
11
+
12
+ def _get_round_robin_index() -> itertools.cycle:
13
+ global _ROUND_ROBIN_INDEX
14
+ if _ROUND_ROBIN_INDEX is None:
15
+ backends = get_backends()
16
+ if not backends:
17
+ raise ValueError("No backends configured. Set EMBEDDING_BACKENDS.")
18
+ _ROUND_ROBIN_INDEX = itertools.cycle(range(len(backends)))
19
+ return _ROUND_ROBIN_INDEX
20
+
21
+
22
+ def _select_backend() -> str:
23
+ """Select backend URL based on strategy."""
24
+ backends = get_backends()
25
+ if not backends:
26
+ raise ValueError("No backends configured. Set EMBEDDING_BACKENDS.")
27
+
28
+ strategy = get_strategy()
29
+ if strategy == "round_robin":
30
+ idx = next(_get_round_robin_index())
31
+ return backends[idx]
32
+ if strategy == "failover":
33
+ return backends[0]
34
+ raise ValueError(f"Unknown ROUTER_STRATEGY: {strategy}. Use round_robin or failover.")
35
+
36
+
37
+ def _is_retryable(status_code: int) -> bool:
38
+ return status_code == 408 or status_code >= 500
39
+
40
+
41
+ async def proxy_request(
42
+ client: httpx.AsyncClient,
43
+ method: str,
44
+ path: str,
45
+ content: bytes | None = None,
46
+ headers: dict | None = None,
47
+ ) -> tuple[httpx.Response, str]:
48
+ """
49
+ Forward request to backend(s). Returns (response, backend_url_used).
50
+ On failover: tries next backend on 5xx or timeout.
51
+ Raises httpx.HTTPStatusError if all backends fail.
52
+ """
53
+ backends = get_backends()
54
+ if not backends:
55
+ raise ValueError("No backends configured. Set EMBEDDING_BACKENDS.")
56
+
57
+ strategy = get_strategy()
58
+ candidates = backends if strategy == "failover" else [_select_backend()]
59
+
60
+ last_error: Exception | None = None
61
+ last_response: httpx.Response | None = None
62
+
63
+ for base_url in candidates:
64
+ url = f"{base_url}{path}"
65
+ try:
66
+ resp = await client.request(
67
+ method, url, content=content, headers=headers, timeout=REQUEST_TIMEOUT
68
+ )
69
+ if strategy == "failover" and _is_retryable(resp.status_code):
70
+ last_response = resp
71
+ continue
72
+ return resp, base_url
73
+ except (httpx.TimeoutException, httpx.ConnectError) as e:
74
+ last_error = e
75
+ continue
76
+
77
+ if last_response is not None:
78
+ return last_response, candidates[-1]
79
+ if last_error is not None:
80
+ raise last_error
81
+ raise RuntimeError("No backends available")
File without changes
@@ -0,0 +1,44 @@
1
+ """Tests for router_embed."""
2
+ import pytest
3
+
4
+ from router_embed.config import configure, get_backends, get_max_concurrent, get_port, get_strategy
5
+
6
+
7
+ @pytest.fixture(autouse=True)
8
+ def reset_config():
9
+ import router_embed.config as cfg
10
+ cfg._overrides.clear()
11
+ yield
12
+
13
+
14
+ def test_get_backends_default():
15
+ backends = get_backends()
16
+ assert len(backends) == 2
17
+ assert "192.168.86.173" in backends[0]
18
+ assert "192.168.86.176" in backends[1]
19
+
20
+
21
+ def test_get_backends_override():
22
+ configure(backends="http://a:8001,http://b:8002")
23
+ assert get_backends() == ["http://a:8001", "http://b:8002"]
24
+
25
+
26
+ def test_get_strategy_default():
27
+ assert get_strategy() == "failover"
28
+
29
+
30
+ def test_get_port_default():
31
+ assert get_port() == 8011
32
+
33
+
34
+ def test_get_max_concurrent_default():
35
+ assert get_max_concurrent() == 20
36
+
37
+
38
+ def test_health_endpoint():
39
+ from fastapi.testclient import TestClient
40
+ from router_embed.main import app
41
+ client = TestClient(app)
42
+ r = client.get("/health")
43
+ assert r.status_code == 200
44
+ assert r.json() == {"status": "ok"}