stickybalancer 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stickybalancer-1.0.0/.gitignore +6 -0
- stickybalancer-1.0.0/LICENSE +21 -0
- stickybalancer-1.0.0/PKG-INFO +178 -0
- stickybalancer-1.0.0/README.md +164 -0
- stickybalancer-1.0.0/pyproject.toml +20 -0
- stickybalancer-1.0.0/simulation/sim_results/avg_probes.png +0 -0
- stickybalancer-1.0.0/simulation/sim_results/avg_response_time.png +0 -0
- stickybalancer-1.0.0/simulation/sim_results/avg_wait_time.png +0 -0
- stickybalancer-1.0.0/simulation/sim_results/peak_load.png +0 -0
- stickybalancer-1.0.0/simulation/simulation.py +345 -0
- stickybalancer-1.0.0/src/stickybalancer/__init__.py +4 -0
- stickybalancer-1.0.0/src/stickybalancer/balancer.py +71 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Sayanaditya Das
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: stickybalancer
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: A minimalist load balancing library implementing Strict Po2C and the Sticky-Lazy protocol.
|
|
5
|
+
Project-URL: Homepage, https://github.com/sayanaditya/stickybalancer
|
|
6
|
+
Project-URL: Repository, https://github.com/sayanaditya/stickybalancer
|
|
7
|
+
Author-email: Sayanaditya Das <sayanaditya43@gmail.com>
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Requires-Python: >=3.8
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
|
|
15
|
+
# Sticky-Lazy Load Balancing
|
|
16
|
+
|
|
17
|
+
## Quick Start
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install stickybalancer
|
|
21
|
+
|
|
22
|
+
from stickybalancer import StickyLazyBalancer
|
|
23
|
+
|
|
24
|
+
servers = ["server-1", "server-2", "server-3", "server-4"]
|
|
25
|
+
lb = StickyLazyBalancer(servers, threshold=3)
|
|
26
|
+
|
|
27
|
+
# Route a request
|
|
28
|
+
target = lb.get_next()
|
|
29
|
+
print(f"Sending request to: {target}")
|
|
30
|
+
|
|
31
|
+
# After the request completes, release the server
|
|
32
|
+
lb.release(target)
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Comparison table
|
|
36
|
+
A probe-efficient alternative to Power-of-Two-Choices (Po2C) that **caches routing decisions** to cut network overhead, while keeping the same logarithmic guarantee on worst-case queue depth.
|
|
37
|
+
|
|
38
|
+
| Protocol | E[Probes / Request] | Asymptotic Max Queue Depth $M_N$ |
|
|
39
|
+
|---|---|---|
|
|
40
|
+
| Pure Random | 1 | $\dfrac{\ln N}{\ln\ln N}$ |
|
|
41
|
+
| Round Robin | 0 | Flat if tasks are i.i.d., **unbounded variance** if not |
|
|
42
|
+
| Strict Po2C | 2 | $\dfrac{\ln\ln N}{\ln 2} + O(1)$ |
|
|
43
|
+
| **Sticky-Lazy** | $\geq \dfrac{1}{T}$ | $\dfrac{\ln\ln N}{\ln 2} + T$ |
|
|
44
|
+
|
|
45
|
+
$N$ = number of servers, $T$ = sticky threshold (tunable), $M_N$ = max load over all bins after $N$ requests have landed (the standard "balls into bins" quantity).
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## 1. The Problem
|
|
50
|
+
|
|
51
|
+
Po2C is the industry default for stateless load balancing: sample 2 servers, probe both, route to the lighter one. It guarantees max load $O(\ln\ln N)$ — exponentially better than random routing's $O(\ln N / \ln\ln N)$ — but it pays for this **every single request**, with 2 network round-trips just to *decide where to send the 3rd*.
|
|
52
|
+
|
|
53
|
+
At high QPS, the probe traffic itself becomes a meaningful fraction of your internal network load.
|
|
54
|
+
|
|
55
|
+
## 2. The Algorithm
|
|
56
|
+
|
|
57
|
+
```
|
|
58
|
+
get_next():
|
|
59
|
+
if cached_server exists and blind_capacity > 0:
|
|
60
|
+
blind_capacity -= 1 # FREE — 0 probes
|
|
61
|
+
return cached_server
|
|
62
|
+
|
|
63
|
+
s1 = sample(servers) # 1 probe
|
|
64
|
+
if load[s1] < T:
|
|
65
|
+
cache(s1); blind_capacity = T - load[s1]
|
|
66
|
+
return s1
|
|
67
|
+
|
|
68
|
+
s2 = sample(servers) # 2nd probe (Po2C fallback)
|
|
69
|
+
winner = argmin(load[s1], load[s2])
|
|
70
|
+
if load[winner] < T:
|
|
71
|
+
cache(winner); blind_capacity = T - load[winner]
|
|
72
|
+
else:
|
|
73
|
+
invalidate cache
|
|
74
|
+
return winner
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Three phases per cache cycle:
|
|
78
|
+
|
|
79
|
+
1. **Sticky (caching):** a probe finds a server under threshold $T$ → cache it, set a credit equal to its remaining headroom $(T - \text{load})$.
|
|
80
|
+
2. **Lazy (blind):** every subsequent request drains the credit with zero probe, until the credit hits 0.
|
|
81
|
+
3. **Safety net:** if a probe ever finds a server already at $\ge T$, the cache is invalidated and the request falls back to a strict Po2C double-probe, so the algorithm never blindly piles load onto a hot server.
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
## 3. Proofs
|
|
85
|
+
|
|
86
|
+
### Po2C tail bound
|
|
87
|
+
|
|
88
|
+
Let $x_k$ = fraction of bins with load $\ge k$ after $N$ balls. A bin can only reach load $k$ if, at the moment it was chosen, both sampled bins already had load $\ge k-1$ (otherwise Po2C would have picked the lighter, sub-$(k-1)$ one). Treating choices as independent (folding any dependence into a constant $\lambda$):
|
|
89
|
+
|
|
90
|
+
$$x_k \le \lambda \cdot x_{k-1}^2, \qquad x_1 \le \lambda$$
|
|
91
|
+
|
|
92
|
+
Unrolling the recursion:
|
|
93
|
+
|
|
94
|
+
$$x_k \le \lambda^{1+2+4+\dots+2^{k-1}} = \lambda^{2^k - 1}$$
|
|
95
|
+
|
|
96
|
+
By a union bound over $N$ bins:
|
|
97
|
+
|
|
98
|
+
$$\mathbb{P}(M_N \ge k) \le N\cdot x_k \le N\cdot\lambda^{2^k-1}$$
|
|
99
|
+
|
|
100
|
+
Solving for the crossover point : We want the smallest $k$ where the RHS drops below 1. Set $N \lambda^{2^k-1}=1$:
|
|
101
|
+
|
|
102
|
+
$$2^k = 1 + \frac{\ln N}{\ln(1/\lambda)} \implies k = \log_2\left(1+\frac{\ln N}{\ln(1/\lambda)}\right) = \frac{\ln\ln N}{\ln 2} + O(1)$$
|
|
103
|
+
|
|
104
|
+
since the $1+$ and the $\ln(1/\lambda)$ constant only contribute an additive $O(1)$ term as $N\to\infty$. This recovers the classical $M_N = \dfrac{\ln\ln N}{\ln 2}+O(1)$ result (Azar–Broder–Karlin–Upfal, 1994).
|
|
105
|
+
|
|
106
|
+
### $\mathbb{E}[P] = 2$ (for Po2C)
|
|
107
|
+
Each selection process of the server takes 2 random servers from the server list so we need 2 probes per call.
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
### Sticky Lazy tail bound
|
|
112
|
+
|
|
113
|
+
Decompose any bin's load into two parts:
|
|
114
|
+
|
|
115
|
+
$$\text{load} = \underbrace{\text{load contributed by Po2C-style probe decisions}}_{\text{bounded exactly as in Po2C}} + \underbrace{\text{load added blindly during one cache cycle}}_{\le T-1 \text{ by construction}}$$
|
|
116
|
+
|
|
117
|
+
The first term is governed by the *same* recursion as plain Po2C.
|
|
118
|
+
|
|
119
|
+
The second term is the blind cap: by construction the cache **never lets a server accept more than $T-1$ requests beyond the level it was caught at**, because the credit is capped at $(T - \text{load})$ at cache time and load only increases monotonically during the blind run. So the blind phase adds **at most $T$** to whatever the probed component would have been:
|
|
120
|
+
|
|
121
|
+
$$M_N \le \underbrace{\frac{\ln\ln N}{\ln 2}+O(1)}_{\text{probed part }} + \underbrace{T}_{\text{blind cap}}$$
|
|
122
|
+
|
|
123
|
+
### $\mathbb{E}[P]$ (for Sticky-Lazy)
|
|
124
|
+
|
|
125
|
+
**Two cases per probe.** Let $t = \mathbb{P}(\text{probe lands on a server with load} < T)$ — the cache-hit case. The complementary case $(1-t)$ is the safety-net branch.
|
|
126
|
+
|
|
127
|
+
- **Case 1 ($t$):** $1$ probe; cycle serves $T-L$ requests for $L\in\{0,\dots,T-1\}$. Using the average-of-extremes estimate $L_{\text{avg}} = \frac{T-1}{2}$ (no assumption on which $L$ is likelier, because it will depend on many other factors like server processing speed, query size distribution for each request and others, but for calculation let the midpoint of the known range is the expected value):
|
|
128
|
+
$$\text{cost}_1 = 1 \text{ probe}, \quad \text{served}_1 \approx T - \frac{T-1}{2} = \frac{T+1}{2}$$
|
|
129
|
+
- **Case 2 ($1-t$):** the first probe found $L\ge T$, so a second probe fires (Po2C fallback). Win or lose the cache, exactly $1$ request is served:
|
|
130
|
+
$$\text{cost}_2 = 2 \text{ probes}, \quad \text{served}_2 = 1$$
|
|
131
|
+
|
|
132
|
+
**Combine.** Average probes and requests served per attempt, then take the ratio:
|
|
133
|
+
|
|
134
|
+
$$\mathbb{E}[P] \approx \frac{t\cdot 1 + (1-t)\cdot 2}{t\cdot\dfrac{T+1}{2} + (1-t)\cdot 1} = \frac{2-t}{1 + t\cdot\dfrac{T-1}{2}}$$
|
|
135
|
+
|
|
136
|
+
**Comparison with Po2C.** Po2C is the special case $t=0$ ($\mathbb{E}[P]=2$, always), Sticky-Lazy with any $t>0$ strictly reduces this, since raising $t$ shrinks the numerator and grows the denominator simultaneously. As $T\to\infty$ or $t\to 1$ (low contention, most probes land on safe servers), $\mathbb{E}[P] \to \frac{2}{T+1} \ll 2$. So **Sticky-Lazy never does worse than Po2C**, and the savings grow with both $T$ and the cache-hit rate $t$.
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
### Round Robin's variance is unbounded under heterogeneous tasks
|
|
143
|
+
|
|
144
|
+
Round Robin assigns request $i$ to server $i \bmod N$, blind to load. If task durations $D_i$ **heavy-tailed** (e.g. $\text{Var}(D)=\infty$, as in many real workloads), a server can be unlucky enough to receive a run of long tasks purely by index coincidence, with no feedback mechanism to redirect future requests away from it. Formally, queue depth becomes a sum of i.i.d. heavy-tailed variables with no rebalancing term, so $\text{Var}(\text{queue depth}) \to \infty$ as task-duration variance grows. But in the case of Po2C and Sticky lazy protocol it actively avoids distribution of load over heavy loaded servers making the variance smaller.
|
|
145
|
+
|
|
146
|
+
---
|
|
147
|
+
## 4. Pros vs. Cons
|
|
148
|
+
|
|
149
|
+
### ✅ Pros
|
|
150
|
+
- Cuts probe traffic from a fixed 2 (Po2C) down to roughly $\frac{2}{T+1}$
|
|
151
|
+
- Same safety class as Po2C ($O(\ln\ln N)$), just shifted by a constant $T$.
|
|
152
|
+
- Self-healing: falls back to Po2C automatically the moment caching gets risky.
|
|
153
|
+
- One knob ($T$) to trade off bandwidth vs. balance, tuned per deployment.
|
|
154
|
+
|
|
155
|
+
### ❌ Cons
|
|
156
|
+
- Assumes uniform request cost , a string of *expensive* requests can overload a cached server before the credit runs out.
|
|
157
|
+
- Cache can go stale if load shifts from traffic outside the balancer's view.
|
|
158
|
+
- Slightly worse balance than Po2C within the threshold zone : a lighter server can sit idle while the sticky one keeps getting hit.
|
|
159
|
+
- Overhead is worst for small $N$, where the $+T$ term outweighs the tiny $\ln\ln N$ baseline.
|
|
160
|
+
|
|
161
|
+
Most of these are fixable — cost-weighted credits, a TTL on the cache, occasional re-probing, or just shrinking $T$ for small clusters etc.
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
## 5. Simulation Results
|
|
165
|
+
|
|
166
|
+
To validate the theory against something closer to production traffic, all four protocols were run through a discrete-event simulation: 50 servers, 30k requests, FCFS queues, diurnal traffic with burst windows, log-normal + Pareto-tailed request sizes, and heterogeneous server speeds — averaged over 5 seeded trials per workload (`light`, `medium`, `heavy`). [Full simulation code →](./simulation/simulation.py)
|
|
167
|
+
|
|
168
|
+
| Probes/Request | Peak Load |
|
|
169
|
+
|---|---|
|
|
170
|
+
|  |  |
|
|
171
|
+
|
|
172
|
+
| Wait Time | Response Time |
|
|
173
|
+
|---|---|
|
|
174
|
+
|  |  |
|
|
175
|
+
|
|
176
|
+
**Takeaways:**
|
|
177
|
+
- Sticky-Lazy's probe rate stays well under Po2C's fixed 2 across all workloads — confirming the $\ge 1/T$ floor, with the safety net pushing it above the theoretical best case as expected.
|
|
178
|
+
- Peak load and latency for Sticky-Lazy track Po2C closely, both staying flat as Random/Round-Robin degrade sharply under heavier traffic — the predicted $+T$ gap is small and visible, not hidden.
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# Sticky-Lazy Load Balancing
|
|
2
|
+
|
|
3
|
+
## Quick Start
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
pip install stickybalancer
|
|
7
|
+
|
|
8
|
+
from stickybalancer import StickyLazyBalancer
|
|
9
|
+
|
|
10
|
+
servers = ["server-1", "server-2", "server-3", "server-4"]
|
|
11
|
+
lb = StickyLazyBalancer(servers, threshold=3)
|
|
12
|
+
|
|
13
|
+
# Route a request
|
|
14
|
+
target = lb.get_next()
|
|
15
|
+
print(f"Sending request to: {target}")
|
|
16
|
+
|
|
17
|
+
# After the request completes, release the server
|
|
18
|
+
lb.release(target)
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Comparison table
|
|
22
|
+
A probe-efficient alternative to Power-of-Two-Choices (Po2C) that **caches routing decisions** to cut network overhead, while keeping the same logarithmic guarantee on worst-case queue depth.
|
|
23
|
+
|
|
24
|
+
| Protocol | E[Probes / Request] | Asymptotic Max Queue Depth $M_N$ |
|
|
25
|
+
|---|---|---|
|
|
26
|
+
| Pure Random | 1 | $\dfrac{\ln N}{\ln\ln N}$ |
|
|
27
|
+
| Round Robin | 0 | Flat if tasks are i.i.d., **unbounded variance** if not |
|
|
28
|
+
| Strict Po2C | 2 | $\dfrac{\ln\ln N}{\ln 2} + O(1)$ |
|
|
29
|
+
| **Sticky-Lazy** | $\geq \dfrac{1}{T}$ | $\dfrac{\ln\ln N}{\ln 2} + T$ |
|
|
30
|
+
|
|
31
|
+
$N$ = number of servers, $T$ = sticky threshold (tunable), $M_N$ = max load over all bins after $N$ requests have landed (the standard "balls into bins" quantity).
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## 1. The Problem
|
|
36
|
+
|
|
37
|
+
Po2C is the industry default for stateless load balancing: sample 2 servers, probe both, route to the lighter one. It guarantees max load $O(\ln\ln N)$ — exponentially better than random routing's $O(\ln N / \ln\ln N)$ — but it pays for this **every single request**, with 2 network round-trips just to *decide where to send the 3rd*.
|
|
38
|
+
|
|
39
|
+
At high QPS, the probe traffic itself becomes a meaningful fraction of your internal network load.
|
|
40
|
+
|
|
41
|
+
## 2. The Algorithm
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
get_next():
|
|
45
|
+
if cached_server exists and blind_capacity > 0:
|
|
46
|
+
blind_capacity -= 1 # FREE — 0 probes
|
|
47
|
+
return cached_server
|
|
48
|
+
|
|
49
|
+
s1 = sample(servers) # 1 probe
|
|
50
|
+
if load[s1] < T:
|
|
51
|
+
cache(s1); blind_capacity = T - load[s1]
|
|
52
|
+
return s1
|
|
53
|
+
|
|
54
|
+
s2 = sample(servers) # 2nd probe (Po2C fallback)
|
|
55
|
+
winner = argmin(load[s1], load[s2])
|
|
56
|
+
if load[winner] < T:
|
|
57
|
+
cache(winner); blind_capacity = T - load[winner]
|
|
58
|
+
else:
|
|
59
|
+
invalidate cache
|
|
60
|
+
return winner
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Three phases per cache cycle:
|
|
64
|
+
|
|
65
|
+
1. **Sticky (caching):** a probe finds a server under threshold $T$ → cache it, set a credit equal to its remaining headroom $(T - \text{load})$.
|
|
66
|
+
2. **Lazy (blind):** every subsequent request drains the credit with zero probe, until the credit hits 0.
|
|
67
|
+
3. **Safety net:** if a probe ever finds a server already at $\ge T$, the cache is invalidated and the request falls back to a strict Po2C double-probe, so the algorithm never blindly piles load onto a hot server.
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
## 3. Proofs
|
|
71
|
+
|
|
72
|
+
### Po2C tail bound
|
|
73
|
+
|
|
74
|
+
Let $x_k$ = fraction of bins with load $\ge k$ after $N$ balls. A bin can only reach load $k$ if, at the moment it was chosen, both sampled bins already had load $\ge k-1$ (otherwise Po2C would have picked the lighter, sub-$(k-1)$ one). Treating choices as independent (folding any dependence into a constant $\lambda$):
|
|
75
|
+
|
|
76
|
+
$$x_k \le \lambda \cdot x_{k-1}^2, \qquad x_1 \le \lambda$$
|
|
77
|
+
|
|
78
|
+
Unrolling the recursion:
|
|
79
|
+
|
|
80
|
+
$$x_k \le \lambda^{1+2+4+\dots+2^{k-1}} = \lambda^{2^k - 1}$$
|
|
81
|
+
|
|
82
|
+
By a union bound over $N$ bins:
|
|
83
|
+
|
|
84
|
+
$$\mathbb{P}(M_N \ge k) \le N\cdot x_k \le N\cdot\lambda^{2^k-1}$$
|
|
85
|
+
|
|
86
|
+
Solving for the crossover point : We want the smallest $k$ where the RHS drops below 1. Set $N \lambda^{2^k-1}=1$:
|
|
87
|
+
|
|
88
|
+
$$2^k = 1 + \frac{\ln N}{\ln(1/\lambda)} \implies k = \log_2\left(1+\frac{\ln N}{\ln(1/\lambda)}\right) = \frac{\ln\ln N}{\ln 2} + O(1)$$
|
|
89
|
+
|
|
90
|
+
since the $1+$ and the $\ln(1/\lambda)$ constant only contribute an additive $O(1)$ term as $N\to\infty$. This recovers the classical $M_N = \dfrac{\ln\ln N}{\ln 2}+O(1)$ result (Azar–Broder–Karlin–Upfal, 1994).
|
|
91
|
+
|
|
92
|
+
### $\mathbb{E}[P] = 2$ (for Po2C)
|
|
93
|
+
Each selection process of the server takes 2 random servers from the server list so we need 2 probes per call.
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
### Sticky Lazy tail bound
|
|
98
|
+
|
|
99
|
+
Decompose any bin's load into two parts:
|
|
100
|
+
|
|
101
|
+
$$\text{load} = \underbrace{\text{load contributed by Po2C-style probe decisions}}_{\text{bounded exactly as in Po2C}} + \underbrace{\text{load added blindly during one cache cycle}}_{\le T-1 \text{ by construction}}$$
|
|
102
|
+
|
|
103
|
+
The first term is governed by the *same* recursion as plain Po2C.
|
|
104
|
+
|
|
105
|
+
The second term is the blind cap: by construction the cache **never lets a server accept more than $T-1$ requests beyond the level it was caught at**, because the credit is capped at $(T - \text{load})$ at cache time and load only increases monotonically during the blind run. So the blind phase adds **at most $T$** to whatever the probed component would have been:
|
|
106
|
+
|
|
107
|
+
$$M_N \le \underbrace{\frac{\ln\ln N}{\ln 2}+O(1)}_{\text{probed part }} + \underbrace{T}_{\text{blind cap}}$$
|
|
108
|
+
|
|
109
|
+
### $\mathbb{E}[P]$ (for Sticky-Lazy)
|
|
110
|
+
|
|
111
|
+
**Two cases per probe.** Let $t = \mathbb{P}(\text{probe lands on a server with load} < T)$ — the cache-hit case. The complementary case $(1-t)$ is the safety-net branch.
|
|
112
|
+
|
|
113
|
+
- **Case 1 ($t$):** $1$ probe; cycle serves $T-L$ requests for $L\in\{0,\dots,T-1\}$. Using the average-of-extremes estimate $L_{\text{avg}} = \frac{T-1}{2}$ (no assumption on which $L$ is likelier, because it will depend on many other factors like server processing speed, query size distribution for each request and others, but for calculation let the midpoint of the known range is the expected value):
|
|
114
|
+
$$\text{cost}_1 = 1 \text{ probe}, \quad \text{served}_1 \approx T - \frac{T-1}{2} = \frac{T+1}{2}$$
|
|
115
|
+
- **Case 2 ($1-t$):** the first probe found $L\ge T$, so a second probe fires (Po2C fallback). Win or lose the cache, exactly $1$ request is served:
|
|
116
|
+
$$\text{cost}_2 = 2 \text{ probes}, \quad \text{served}_2 = 1$$
|
|
117
|
+
|
|
118
|
+
**Combine.** Average probes and requests served per attempt, then take the ratio:
|
|
119
|
+
|
|
120
|
+
$$\mathbb{E}[P] \approx \frac{t\cdot 1 + (1-t)\cdot 2}{t\cdot\dfrac{T+1}{2} + (1-t)\cdot 1} = \frac{2-t}{1 + t\cdot\dfrac{T-1}{2}}$$
|
|
121
|
+
|
|
122
|
+
**Comparison with Po2C.** Po2C is the special case $t=0$ ($\mathbb{E}[P]=2$, always), Sticky-Lazy with any $t>0$ strictly reduces this, since raising $t$ shrinks the numerator and grows the denominator simultaneously. As $T\to\infty$ or $t\to 1$ (low contention, most probes land on safe servers), $\mathbb{E}[P] \to \frac{2}{T+1} \ll 2$. So **Sticky-Lazy never does worse than Po2C**, and the savings grow with both $T$ and the cache-hit rate $t$.
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
### Round Robin's variance is unbounded under heterogeneous tasks
|
|
129
|
+
|
|
130
|
+
Round Robin assigns request $i$ to server $i \bmod N$, blind to load. If task durations $D_i$ **heavy-tailed** (e.g. $\text{Var}(D)=\infty$, as in many real workloads), a server can be unlucky enough to receive a run of long tasks purely by index coincidence, with no feedback mechanism to redirect future requests away from it. Formally, queue depth becomes a sum of i.i.d. heavy-tailed variables with no rebalancing term, so $\text{Var}(\text{queue depth}) \to \infty$ as task-duration variance grows. But in the case of Po2C and Sticky lazy protocol it actively avoids distribution of load over heavy loaded servers making the variance smaller.
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
## 4. Pros vs. Cons
|
|
134
|
+
|
|
135
|
+
### ✅ Pros
|
|
136
|
+
- Cuts probe traffic from a fixed 2 (Po2C) down to roughly $\frac{2}{T+1}$
|
|
137
|
+
- Same safety class as Po2C ($O(\ln\ln N)$), just shifted by a constant $T$.
|
|
138
|
+
- Self-healing: falls back to Po2C automatically the moment caching gets risky.
|
|
139
|
+
- One knob ($T$) to trade off bandwidth vs. balance, tuned per deployment.
|
|
140
|
+
|
|
141
|
+
### ❌ Cons
|
|
142
|
+
- Assumes uniform request cost , a string of *expensive* requests can overload a cached server before the credit runs out.
|
|
143
|
+
- Cache can go stale if load shifts from traffic outside the balancer's view.
|
|
144
|
+
- Slightly worse balance than Po2C within the threshold zone : a lighter server can sit idle while the sticky one keeps getting hit.
|
|
145
|
+
- Overhead is worst for small $N$, where the $+T$ term outweighs the tiny $\ln\ln N$ baseline.
|
|
146
|
+
|
|
147
|
+
Most of these are fixable — cost-weighted credits, a TTL on the cache, occasional re-probing, or just shrinking $T$ for small clusters etc.
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
## 5. Simulation Results
|
|
151
|
+
|
|
152
|
+
To validate the theory against something closer to production traffic, all four protocols were run through a discrete-event simulation: 50 servers, 30k requests, FCFS queues, diurnal traffic with burst windows, log-normal + Pareto-tailed request sizes, and heterogeneous server speeds — averaged over 5 seeded trials per workload (`light`, `medium`, `heavy`). [Full simulation code →](./simulation/simulation.py)
|
|
153
|
+
|
|
154
|
+
| Probes/Request | Peak Load |
|
|
155
|
+
|---|---|
|
|
156
|
+
|  |  |
|
|
157
|
+
|
|
158
|
+
| Wait Time | Response Time |
|
|
159
|
+
|---|---|
|
|
160
|
+
|  |  |
|
|
161
|
+
|
|
162
|
+
**Takeaways:**
|
|
163
|
+
- Sticky-Lazy's probe rate stays well under Po2C's fixed 2 across all workloads — confirming the $\ge 1/T$ floor, with the safety net pushing it above the theoretical best case as expected.
|
|
164
|
+
- Peak load and latency for Sticky-Lazy track Po2C closely, both staying flat as Random/Round-Robin degrade sharply under heavier traffic — the predicted $+T$ gap is small and visible, not hidden.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "stickybalancer"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "A minimalist load balancing library implementing Strict Po2C and the Sticky-Lazy protocol."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
authors = [{ name="Sayanaditya Das", email="sayanaditya43@gmail.com" }]
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Programming Language :: Python :: 3",
|
|
14
|
+
"License :: OSI Approved :: MIT License",
|
|
15
|
+
"Operating System :: OS Independent",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[project.urls]
|
|
19
|
+
Homepage = "https://github.com/sayanaditya/stickybalancer"
|
|
20
|
+
Repository = "https://github.com/sayanaditya/stickybalancer"
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import heapq
|
|
3
|
+
import math
|
|
4
|
+
import random
|
|
5
|
+
import statistics
|
|
6
|
+
import time
|
|
7
|
+
from typing import Callable, Dict, List, Tuple
|
|
8
|
+
|
|
9
|
+
import matplotlib.pyplot as plt
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# ----------------------------
|
|
13
|
+
# Balancers
|
|
14
|
+
# ----------------------------
|
|
15
|
+
|
|
16
|
+
class RandomBalancer:
|
|
17
|
+
def __init__(self, servers):
|
|
18
|
+
self.servers = list(servers)
|
|
19
|
+
self.loads = {s: 0 for s in self.servers}
|
|
20
|
+
self.probes = 0
|
|
21
|
+
|
|
22
|
+
def get_next(self):
|
|
23
|
+
self.probes += 1
|
|
24
|
+
s = random.choice(self.servers)
|
|
25
|
+
self.loads[s] += 1
|
|
26
|
+
return s
|
|
27
|
+
|
|
28
|
+
def release(self, server):
|
|
29
|
+
if server in self.loads and self.loads[server] > 0:
|
|
30
|
+
self.loads[server] -= 1
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class RoundRobinBalancer:
|
|
34
|
+
def __init__(self, servers):
|
|
35
|
+
self.servers = list(servers)
|
|
36
|
+
self.loads = {s: 0 for s in self.servers}
|
|
37
|
+
self.idx = 0
|
|
38
|
+
self.probes = 0
|
|
39
|
+
|
|
40
|
+
def get_next(self):
|
|
41
|
+
s = self.servers[self.idx]
|
|
42
|
+
self.idx = (self.idx + 1) % len(self.servers)
|
|
43
|
+
self.loads[s] += 1
|
|
44
|
+
return s
|
|
45
|
+
|
|
46
|
+
def release(self, server):
|
|
47
|
+
if server in self.loads and self.loads[server] > 0:
|
|
48
|
+
self.loads[server] -= 1
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class Po2CBalancer:
|
|
52
|
+
def __init__(self, servers):
|
|
53
|
+
self.servers = list(servers)
|
|
54
|
+
self.loads = {s: 0 for s in self.servers}
|
|
55
|
+
self.probes = 0
|
|
56
|
+
|
|
57
|
+
def get_next(self):
|
|
58
|
+
s1, s2 = random.sample(self.servers, 2)
|
|
59
|
+
self.probes += 2
|
|
60
|
+
winner = s1 if self.loads[s1] < self.loads[s2] else s2
|
|
61
|
+
self.loads[winner] += 1
|
|
62
|
+
return winner
|
|
63
|
+
|
|
64
|
+
def release(self, server):
|
|
65
|
+
if server in self.loads and self.loads[server] > 0:
|
|
66
|
+
self.loads[server] -= 1
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class StickyLazyBalancer:
|
|
70
|
+
def __init__(self, servers, threshold=3):
|
|
71
|
+
self.servers = list(servers)
|
|
72
|
+
self.threshold = threshold
|
|
73
|
+
self.loads = {s: 0 for s in self.servers}
|
|
74
|
+
self.cached_server = None
|
|
75
|
+
self.blind_capacity = 0
|
|
76
|
+
self.probes = 0
|
|
77
|
+
|
|
78
|
+
def get_next(self):
|
|
79
|
+
if self.cached_server is not None and self.blind_capacity > 0:
|
|
80
|
+
self.blind_capacity -= 1
|
|
81
|
+
self.loads[self.cached_server] += 1
|
|
82
|
+
return self.cached_server
|
|
83
|
+
|
|
84
|
+
s1 = random.choice(self.servers)
|
|
85
|
+
self.probes += 1
|
|
86
|
+
|
|
87
|
+
if self.loads[s1] < self.threshold:
|
|
88
|
+
self.loads[s1] += 1
|
|
89
|
+
self.cached_server = s1
|
|
90
|
+
self.blind_capacity = self.threshold - self.loads[s1]
|
|
91
|
+
return s1
|
|
92
|
+
|
|
93
|
+
s2 = random.choice([s for s in self.servers if s != s1])
|
|
94
|
+
self.probes += 1
|
|
95
|
+
|
|
96
|
+
winner = s1 if self.loads[s1] < self.loads[s2] else s2
|
|
97
|
+
self.loads[winner] += 1
|
|
98
|
+
|
|
99
|
+
if self.loads[winner] < self.threshold:
|
|
100
|
+
self.cached_server = winner
|
|
101
|
+
self.blind_capacity = self.threshold - self.loads[winner]
|
|
102
|
+
else:
|
|
103
|
+
self.cached_server = None
|
|
104
|
+
self.blind_capacity = 0
|
|
105
|
+
|
|
106
|
+
return winner
|
|
107
|
+
|
|
108
|
+
def release(self, server):
|
|
109
|
+
if server in self.loads and self.loads[server] > 0:
|
|
110
|
+
self.loads[server] -= 1
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
# ----------------------------
|
|
114
|
+
# Realistic synthetic workload
|
|
115
|
+
# ----------------------------
|
|
116
|
+
|
|
117
|
+
def diurnal_rate(t: float, base: float, period: float = 24 * 3600.0) -> float:
|
|
118
|
+
return base * (1.0 + 0.55 * math.sin(2.0 * math.pi * t / period))
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def burst_multiplier(t: float, windows: List[Tuple[float, float]]) -> float:
|
|
122
|
+
for a, b in windows:
|
|
123
|
+
if a <= t <= b:
|
|
124
|
+
return 2.5
|
|
125
|
+
return 1.0
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def generate_arrivals(
|
|
129
|
+
num_requests: int,
|
|
130
|
+
base_rate: float,
|
|
131
|
+
seed: int,
|
|
132
|
+
) -> List[float]:
|
|
133
|
+
random.seed(seed)
|
|
134
|
+
burst_windows = [
|
|
135
|
+
(2 * 3600.0, 3 * 3600.0),
|
|
136
|
+
(8 * 3600.0, 9 * 3600.0),
|
|
137
|
+
(15 * 3600.0, 16 * 3600.0),
|
|
138
|
+
]
|
|
139
|
+
|
|
140
|
+
arrivals = []
|
|
141
|
+
t = 0.0
|
|
142
|
+
for _ in range(num_requests):
|
|
143
|
+
rate = diurnal_rate(t, base_rate) * burst_multiplier(t, burst_windows)
|
|
144
|
+
rate = max(rate, 0.05)
|
|
145
|
+
t += random.expovariate(rate)
|
|
146
|
+
arrivals.append(t)
|
|
147
|
+
return arrivals
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def request_size() -> float:
|
|
151
|
+
u = random.random()
|
|
152
|
+
if u < 0.70:
|
|
153
|
+
return random.lognormvariate(mu=-0.2, sigma=0.35)
|
|
154
|
+
elif u < 0.95:
|
|
155
|
+
return random.lognormvariate(mu=0.7, sigma=0.45)
|
|
156
|
+
else:
|
|
157
|
+
return 6.0 * random.paretovariate(1.7)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def make_servers(n: int, seed: int) -> Dict[int, float]:
|
|
161
|
+
random.seed(seed)
|
|
162
|
+
return {i: random.uniform(0.6, 1.8) for i in range(n)}
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
# ----------------------------
|
|
166
|
+
# Simulation
|
|
167
|
+
# ----------------------------
|
|
168
|
+
|
|
169
|
+
def simulate(
|
|
170
|
+
balancer_factory: Callable[[List[int]], object],
|
|
171
|
+
num_servers: int,
|
|
172
|
+
num_requests: int,
|
|
173
|
+
base_rate: float,
|
|
174
|
+
seed: int,
|
|
175
|
+
) -> Dict[str, float]:
|
|
176
|
+
random.seed(seed)
|
|
177
|
+
|
|
178
|
+
servers = list(range(num_servers))
|
|
179
|
+
server_speed = make_servers(num_servers, seed + 100)
|
|
180
|
+
balancer = balancer_factory(servers)
|
|
181
|
+
arrivals = generate_arrivals(num_requests, base_rate, seed + 200)
|
|
182
|
+
|
|
183
|
+
# FCFS per server
|
|
184
|
+
available_at = {s: 0.0 for s in servers}
|
|
185
|
+
events: List[Tuple[float, int]] = []
|
|
186
|
+
|
|
187
|
+
response_times = []
|
|
188
|
+
wait_times = []
|
|
189
|
+
sampled_peak = []
|
|
190
|
+
|
|
191
|
+
for i, now in enumerate(arrivals):
|
|
192
|
+
while events and events[0][0] <= now:
|
|
193
|
+
_, server = heapq.heappop(events)
|
|
194
|
+
balancer.release(server)
|
|
195
|
+
|
|
196
|
+
chosen = balancer.get_next()
|
|
197
|
+
|
|
198
|
+
service_demand = request_size()
|
|
199
|
+
service_time = service_demand / server_speed[chosen]
|
|
200
|
+
|
|
201
|
+
start_time = max(now, available_at[chosen])
|
|
202
|
+
finish_time = start_time + service_time
|
|
203
|
+
available_at[chosen] = finish_time
|
|
204
|
+
|
|
205
|
+
wait_times.append(start_time - now)
|
|
206
|
+
response_times.append(finish_time - now)
|
|
207
|
+
|
|
208
|
+
heapq.heappush(events, (finish_time, chosen))
|
|
209
|
+
|
|
210
|
+
if i % 100 == 0:
|
|
211
|
+
sampled_peak.append(max(balancer.loads.values()))
|
|
212
|
+
|
|
213
|
+
return {
|
|
214
|
+
"avg_wait_time": statistics.mean(wait_times),
|
|
215
|
+
"avg_response_time": statistics.mean(response_times),
|
|
216
|
+
"peak_load_mean": statistics.mean(sampled_peak),
|
|
217
|
+
"avg_probes": getattr(balancer, "probes", 0) / num_requests,
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def run_trials(
|
|
222
|
+
balancer_factory: Callable[[List[int]], object],
|
|
223
|
+
num_trials: int,
|
|
224
|
+
num_servers: int,
|
|
225
|
+
num_requests: int,
|
|
226
|
+
base_rate: float,
|
|
227
|
+
) -> Dict[str, float]:
|
|
228
|
+
keys = ["avg_wait_time", "avg_response_time", "peak_load_mean", "avg_probes"]
|
|
229
|
+
vals = {k: [] for k in keys}
|
|
230
|
+
|
|
231
|
+
for t in range(num_trials):
|
|
232
|
+
r = simulate(
|
|
233
|
+
balancer_factory=balancer_factory,
|
|
234
|
+
num_servers=num_servers,
|
|
235
|
+
num_requests=num_requests,
|
|
236
|
+
base_rate=base_rate,
|
|
237
|
+
seed=1000 + t,
|
|
238
|
+
)
|
|
239
|
+
for k in keys:
|
|
240
|
+
vals[k].append(r[k])
|
|
241
|
+
|
|
242
|
+
return {k: statistics.mean(v) for k, v in vals.items()}
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
# ----------------------------
|
|
246
|
+
# Plotting
|
|
247
|
+
# ----------------------------
|
|
248
|
+
|
|
249
|
+
def ensure_dir(path: str) -> None:
|
|
250
|
+
os.makedirs(path, exist_ok=True)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def grouped_bar_chart(results, metric, title, ylabel, outpath):
|
|
254
|
+
workloads = list(results.keys())
|
|
255
|
+
algos = list(next(iter(results.values())).keys())
|
|
256
|
+
x = list(range(len(workloads)))
|
|
257
|
+
width = 0.18
|
|
258
|
+
|
|
259
|
+
plt.figure(figsize=(11, 6))
|
|
260
|
+
for i, algo in enumerate(algos):
|
|
261
|
+
values = [results[w][algo][metric] for w in workloads]
|
|
262
|
+
offset = (i - (len(algos) - 1) / 2) * width
|
|
263
|
+
plt.bar([xi + offset for xi in x], values, width=width, label=algo)
|
|
264
|
+
|
|
265
|
+
plt.xticks(x, workloads)
|
|
266
|
+
plt.xlabel("Workload")
|
|
267
|
+
plt.ylabel(ylabel)
|
|
268
|
+
plt.title(title)
|
|
269
|
+
plt.legend()
|
|
270
|
+
plt.tight_layout()
|
|
271
|
+
plt.savefig(outpath, dpi=220)
|
|
272
|
+
plt.close()
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
# ----------------------------
|
|
276
|
+
# Main
|
|
277
|
+
# ----------------------------
|
|
278
|
+
|
|
279
|
+
def main():
|
|
280
|
+
outdir = "sim_results"
|
|
281
|
+
ensure_dir(outdir)
|
|
282
|
+
|
|
283
|
+
balancers = {
|
|
284
|
+
"Random": lambda servers: RandomBalancer(servers),
|
|
285
|
+
"RoundRobin": lambda servers: RoundRobinBalancer(servers),
|
|
286
|
+
"Po2C": lambda servers: Po2CBalancer(servers),
|
|
287
|
+
"StickyLazy": lambda servers: StickyLazyBalancer(servers, threshold=3),
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
workloads = {
|
|
291
|
+
"light": 4.0,
|
|
292
|
+
"medium": 10.0,
|
|
293
|
+
"heavy": 18.0,
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
results = {}
|
|
297
|
+
for wname, base_rate in workloads.items():
|
|
298
|
+
results[wname] = {}
|
|
299
|
+
for aname, factory in balancers.items():
|
|
300
|
+
print(f"Running {aname} on {wname}...")
|
|
301
|
+
results[wname][aname] = run_trials(
|
|
302
|
+
balancer_factory=factory,
|
|
303
|
+
num_trials=5,
|
|
304
|
+
num_servers=50,
|
|
305
|
+
num_requests=30_000,
|
|
306
|
+
base_rate=base_rate,
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
grouped_bar_chart(
|
|
310
|
+
results,
|
|
311
|
+
"avg_probes",
|
|
312
|
+
"Average probes per request",
|
|
313
|
+
"Probes/request",
|
|
314
|
+
os.path.join(outdir, "avg_probes.png"),
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
grouped_bar_chart(
|
|
318
|
+
results,
|
|
319
|
+
"avg_wait_time",
|
|
320
|
+
"Average wait time",
|
|
321
|
+
"Seconds",
|
|
322
|
+
os.path.join(outdir, "avg_wait_time.png"),
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
grouped_bar_chart(
|
|
326
|
+
results,
|
|
327
|
+
"avg_response_time",
|
|
328
|
+
"Average response time",
|
|
329
|
+
"Seconds",
|
|
330
|
+
os.path.join(outdir, "avg_response_time.png"),
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
grouped_bar_chart(
|
|
334
|
+
results,
|
|
335
|
+
"peak_load_mean",
|
|
336
|
+
"Average peak load",
|
|
337
|
+
"Active requests",
|
|
338
|
+
os.path.join(outdir, "peak_load.png"),
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
print(f"Saved PNGs to: {outdir}/")
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
if __name__ == "__main__":
|
|
345
|
+
main()
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import random
|
|
2
|
+
|
|
3
|
+
class Po2CBalancer:
|
|
4
|
+
def __init__(self, servers):
|
|
5
|
+
self.servers = list(servers)
|
|
6
|
+
self.loads = {server : 0 for server in servers}
|
|
7
|
+
|
|
8
|
+
def get_next(self):
|
|
9
|
+
"""
|
|
10
|
+
Chooses two random servers and selects the server with smaller load
|
|
11
|
+
"""
|
|
12
|
+
s1, s2 = random.sample(self.servers, 2)
|
|
13
|
+
winner = s1 if self.loads[s1] < self.loads[s2] else s2
|
|
14
|
+
self.loads[winner] += 1
|
|
15
|
+
return winner
|
|
16
|
+
|
|
17
|
+
def release(self, server):
|
|
18
|
+
if server in self.loads and self.loads[server] > 0:
|
|
19
|
+
self.loads[server] -= 1
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class StickyLazyBalancer:
|
|
23
|
+
def __init__(self, servers, threshold=3):
|
|
24
|
+
self.servers = list(servers)
|
|
25
|
+
self.threshold = threshold
|
|
26
|
+
self.loads = {server : 0 for server in servers}
|
|
27
|
+
self.cached_server = None
|
|
28
|
+
self.blind_capacity = 0
|
|
29
|
+
|
|
30
|
+
def get_next(self):
|
|
31
|
+
"""
|
|
32
|
+
Work Flow :
|
|
33
|
+
1. Cache Hit : If there is a cached server with blind capacity > 0 then route the request to that server
|
|
34
|
+
2. Single Probe : Otherwise randomly choose a server and if that server has load < threshold then route the request and cache it
|
|
35
|
+
3. Dual Probe (Po2C) : Otherwise sample another server and route to the server with lesser load and cache if load < threshold
|
|
36
|
+
"""
|
|
37
|
+
# If we already have a server in cache
|
|
38
|
+
if self.cached_server is not None and self.blind_capacity > 0:
|
|
39
|
+
self.blind_capacity -= 1
|
|
40
|
+
self.loads[self.cached_server] += 1
|
|
41
|
+
return self.cached_server
|
|
42
|
+
|
|
43
|
+
# If cache is empty so we choose initial random probe
|
|
44
|
+
s1 = random.choice(self.servers)
|
|
45
|
+
|
|
46
|
+
if self.loads[s1] < self.threshold:
|
|
47
|
+
self.loads[s1] += 1
|
|
48
|
+
self.cached_server = s1
|
|
49
|
+
self.blind_capacity = self.threshold - self.loads[s1]
|
|
50
|
+
return s1
|
|
51
|
+
|
|
52
|
+
# If s1 has load >= threshold , then Po2C secondary probe selection
|
|
53
|
+
s2 = random.choice(self.servers)
|
|
54
|
+
|
|
55
|
+
winner = s1 if self.loads[s1] < self.loads[s2] else s2
|
|
56
|
+
self.loads[winner] += 1
|
|
57
|
+
|
|
58
|
+
if self.loads[winner] < self.threshold:
|
|
59
|
+
self.cached_server = winner
|
|
60
|
+
self.blind_capacity = self.threshold - self.loads[winner]
|
|
61
|
+
else :
|
|
62
|
+
self.cached_server = None
|
|
63
|
+
self.blind_capacity = 0
|
|
64
|
+
|
|
65
|
+
return winner
|
|
66
|
+
|
|
67
|
+
def release(self, server):
|
|
68
|
+
if server in self.loads and self.loads[server] > 0:
|
|
69
|
+
self.loads[server] -= 1
|
|
70
|
+
|
|
71
|
+
|