speedy-utils 1.1.5__py3-none-any.whl → 1.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_utils/__init__.py +1 -5
- llm_utils/chat_format/display.py +17 -4
- llm_utils/chat_format/transform.py +9 -9
- llm_utils/group_messages.py +1 -1
- llm_utils/lm/async_lm/__init__.py +7 -0
- llm_utils/lm/async_lm/_utils.py +201 -0
- llm_utils/lm/async_lm/async_llm_task.py +509 -0
- llm_utils/lm/async_lm/async_lm.py +387 -0
- llm_utils/lm/async_lm/async_lm_base.py +405 -0
- llm_utils/lm/async_lm/lm_specific.py +136 -0
- llm_utils/lm/utils.py +1 -3
- llm_utils/scripts/vllm_load_balancer.py +244 -147
- speedy_utils/__init__.py +3 -1
- speedy_utils/common/notebook_utils.py +4 -4
- speedy_utils/common/report_manager.py +2 -3
- speedy_utils/common/utils_cache.py +233 -3
- speedy_utils/common/utils_io.py +2 -0
- speedy_utils/scripts/mpython.py +1 -3
- {speedy_utils-1.1.5.dist-info → speedy_utils-1.1.7.dist-info}/METADATA +1 -1
- speedy_utils-1.1.7.dist-info/RECORD +39 -0
- llm_utils/lm/async_lm.py +0 -942
- llm_utils/lm/chat_html.py +0 -246
- llm_utils/lm/lm_json.py +0 -68
- llm_utils/lm/sync_lm.py +0 -943
- speedy_utils-1.1.5.dist-info/RECORD +0 -37
- {speedy_utils-1.1.5.dist-info → speedy_utils-1.1.7.dist-info}/WHEEL +0 -0
- {speedy_utils-1.1.5.dist-info → speedy_utils-1.1.7.dist-info}/entry_points.txt +0 -0
|
@@ -1,22 +1,21 @@
|
|
|
1
1
|
import argparse
|
|
2
|
-
import sys
|
|
3
|
-
import os
|
|
4
|
-
import time
|
|
5
|
-
from datetime import datetime
|
|
6
|
-
from collections import defaultdict
|
|
7
|
-
|
|
8
2
|
import asyncio
|
|
9
3
|
import contextlib
|
|
4
|
+
import os
|
|
10
5
|
import random
|
|
6
|
+
import sys
|
|
7
|
+
import time
|
|
8
|
+
from collections import defaultdict
|
|
9
|
+
from datetime import datetime
|
|
11
10
|
|
|
12
11
|
import aiohttp
|
|
13
12
|
from loguru import logger
|
|
14
|
-
from tabulate import tabulate
|
|
15
|
-
|
|
16
13
|
from speedy_utils import setup_logger
|
|
14
|
+
from tabulate import tabulate
|
|
17
15
|
|
|
18
16
|
setup_logger(min_interval=5)
|
|
19
17
|
|
|
18
|
+
|
|
20
19
|
# --- CLI Argument Parsing ---
|
|
21
20
|
def parse_args():
|
|
22
21
|
parser = argparse.ArgumentParser(
|
|
@@ -27,14 +26,16 @@ Examples:
|
|
|
27
26
|
python vllm_load_balancer.py 8001 --ports 8140,8150,8160
|
|
28
27
|
python vllm_load_balancer.py 8080 --ports 8140,8150 --host 192.168.1.100
|
|
29
28
|
python vllm_load_balancer.py 8001 --ports 8140,8150 --status-interval 3
|
|
29
|
+
python vllm_load_balancer.py 8001 --ports 8140,8150 --throttle-ms 100
|
|
30
30
|
|
|
31
31
|
Features:
|
|
32
32
|
• Real-time dashboard with color-coded status
|
|
33
33
|
• Automatic health checks and failover
|
|
34
34
|
• Least-connections load balancing
|
|
35
|
+
• Request throttling to prevent server overload
|
|
35
36
|
• Professional terminal interface
|
|
36
37
|
• Connection statistics and monitoring
|
|
37
|
-
"""
|
|
38
|
+
""",
|
|
38
39
|
)
|
|
39
40
|
parser.add_argument(
|
|
40
41
|
"port",
|
|
@@ -71,8 +72,15 @@ Features:
|
|
|
71
72
|
default=None,
|
|
72
73
|
help="Port for the HTTP stats dashboard (default: proxy port + 1)",
|
|
73
74
|
)
|
|
75
|
+
parser.add_argument(
|
|
76
|
+
"--throttle-ms",
|
|
77
|
+
type=float,
|
|
78
|
+
default=30.0,
|
|
79
|
+
help="Minimum milliseconds between requests to same server (default: 30ms)",
|
|
80
|
+
)
|
|
74
81
|
return parser.parse_args()
|
|
75
82
|
|
|
83
|
+
|
|
76
84
|
# --- Configuration (populated from CLI) ---
|
|
77
85
|
LOAD_BALANCER_HOST = "0.0.0.0"
|
|
78
86
|
LOAD_BALANCER_PORT = 8008 # Will be overwritten by CLI
|
|
@@ -81,11 +89,13 @@ BACKEND_HOST = "localhost" # Will be overwritten by CLI
|
|
|
81
89
|
BACKEND_PORTS = [] # Will be overwritten by CLI
|
|
82
90
|
STATUS_PRINT_INTERVAL = 5
|
|
83
91
|
HEALTH_CHECK_TIMEOUT = 2
|
|
92
|
+
THROTTLE_MS = 30.0 # Will be overwritten by CLI
|
|
84
93
|
BUFFER_SIZE = 4096
|
|
85
94
|
|
|
86
95
|
# --- Global Shared State ---
|
|
87
96
|
available_servers = []
|
|
88
97
|
connection_counts = defaultdict(int)
|
|
98
|
+
last_send_times = defaultdict(float) # Track last dispatch time per server
|
|
89
99
|
state_lock = asyncio.Lock()
|
|
90
100
|
start_time = None
|
|
91
101
|
total_connections_served = 0
|
|
@@ -95,10 +105,10 @@ current_active_connections = 0
|
|
|
95
105
|
# --- Terminal Utilities ---
|
|
96
106
|
def clear_terminal():
|
|
97
107
|
"""Clear terminal screen with cross-platform support."""
|
|
98
|
-
if os.name ==
|
|
99
|
-
os.system(
|
|
108
|
+
if os.name == "nt": # Windows
|
|
109
|
+
os.system("cls")
|
|
100
110
|
else: # Unix/Linux/MacOS
|
|
101
|
-
os.system(
|
|
111
|
+
os.system("clear")
|
|
102
112
|
|
|
103
113
|
|
|
104
114
|
def get_terminal_size():
|
|
@@ -114,12 +124,12 @@ def format_uptime(start_time):
|
|
|
114
124
|
"""Format uptime in a human-readable way."""
|
|
115
125
|
if not start_time:
|
|
116
126
|
return "Unknown"
|
|
117
|
-
|
|
127
|
+
|
|
118
128
|
uptime_seconds = time.time() - start_time
|
|
119
129
|
hours = int(uptime_seconds // 3600)
|
|
120
130
|
minutes = int((uptime_seconds % 3600) // 60)
|
|
121
131
|
seconds = int(uptime_seconds % 60)
|
|
122
|
-
|
|
132
|
+
|
|
123
133
|
if hours > 0:
|
|
124
134
|
return f"{hours}h {minutes}m {seconds}s"
|
|
125
135
|
elif minutes > 0:
|
|
@@ -132,7 +142,7 @@ def print_banner():
|
|
|
132
142
|
"""Print a professional startup banner."""
|
|
133
143
|
columns, _ = get_terminal_size()
|
|
134
144
|
banner_width = min(columns - 4, 80)
|
|
135
|
-
|
|
145
|
+
|
|
136
146
|
print("=" * banner_width)
|
|
137
147
|
print(f"{'🚀 vLLM Load Balancer':^{banner_width}}")
|
|
138
148
|
print(f"{'High-Performance Async TCP/HTTP Load Balancer':^{banner_width}}")
|
|
@@ -143,44 +153,45 @@ def print_banner():
|
|
|
143
153
|
print(f"Backend Ports: {', '.join(map(str, BACKEND_PORTS))}")
|
|
144
154
|
print(f"Health Check Interval: 10s (Timeout: {HEALTH_CHECK_TIMEOUT}s)")
|
|
145
155
|
print(f"Status Update Interval: {STATUS_PRINT_INTERVAL}s")
|
|
156
|
+
print(f"Request Throttling: {THROTTLE_MS}ms minimum between requests")
|
|
146
157
|
print("=" * banner_width)
|
|
147
158
|
print()
|
|
148
159
|
|
|
149
160
|
|
|
150
161
|
# --- ANSI Color Codes ---
|
|
151
162
|
class Colors:
|
|
152
|
-
RESET =
|
|
153
|
-
BOLD =
|
|
154
|
-
DIM =
|
|
155
|
-
|
|
163
|
+
RESET = "\033[0m"
|
|
164
|
+
BOLD = "\033[1m"
|
|
165
|
+
DIM = "\033[2m"
|
|
166
|
+
|
|
156
167
|
# Foreground colors
|
|
157
|
-
BLACK =
|
|
158
|
-
RED =
|
|
159
|
-
GREEN =
|
|
160
|
-
YELLOW =
|
|
161
|
-
BLUE =
|
|
162
|
-
MAGENTA =
|
|
163
|
-
CYAN =
|
|
164
|
-
WHITE =
|
|
165
|
-
|
|
168
|
+
BLACK = "\033[30m"
|
|
169
|
+
RED = "\033[31m"
|
|
170
|
+
GREEN = "\033[32m"
|
|
171
|
+
YELLOW = "\033[33m"
|
|
172
|
+
BLUE = "\033[34m"
|
|
173
|
+
MAGENTA = "\033[35m"
|
|
174
|
+
CYAN = "\033[36m"
|
|
175
|
+
WHITE = "\033[37m"
|
|
176
|
+
|
|
166
177
|
# Bright colors
|
|
167
|
-
BRIGHT_BLACK =
|
|
168
|
-
BRIGHT_RED =
|
|
169
|
-
BRIGHT_GREEN =
|
|
170
|
-
BRIGHT_YELLOW =
|
|
171
|
-
BRIGHT_BLUE =
|
|
172
|
-
BRIGHT_MAGENTA =
|
|
173
|
-
BRIGHT_CYAN =
|
|
174
|
-
BRIGHT_WHITE =
|
|
175
|
-
|
|
178
|
+
BRIGHT_BLACK = "\033[90m"
|
|
179
|
+
BRIGHT_RED = "\033[91m"
|
|
180
|
+
BRIGHT_GREEN = "\033[92m"
|
|
181
|
+
BRIGHT_YELLOW = "\033[93m"
|
|
182
|
+
BRIGHT_BLUE = "\033[94m"
|
|
183
|
+
BRIGHT_MAGENTA = "\033[95m"
|
|
184
|
+
BRIGHT_CYAN = "\033[96m"
|
|
185
|
+
BRIGHT_WHITE = "\033[97m"
|
|
186
|
+
|
|
176
187
|
# Background colors
|
|
177
|
-
BG_RED =
|
|
178
|
-
BG_GREEN =
|
|
179
|
-
BG_YELLOW =
|
|
180
|
-
BG_BLUE =
|
|
188
|
+
BG_RED = "\033[41m"
|
|
189
|
+
BG_GREEN = "\033[42m"
|
|
190
|
+
BG_YELLOW = "\033[43m"
|
|
191
|
+
BG_BLUE = "\033[44m"
|
|
181
192
|
|
|
182
193
|
|
|
183
|
-
# --- Helper Functions ---
|
|
194
|
+
# --- Helper Functions ---
|
|
184
195
|
async def relay_data(reader, writer, direction):
|
|
185
196
|
"""Reads data from reader and writes to writer until EOF or error."""
|
|
186
197
|
try:
|
|
@@ -224,7 +235,6 @@ async def safe_close_writer(writer):
|
|
|
224
235
|
logger.debug(f"Error closing writer in context manager: {e}")
|
|
225
236
|
|
|
226
237
|
|
|
227
|
-
|
|
228
238
|
# --- Health Check for Provided Ports ---
|
|
229
239
|
async def check_server_health(session, host, port):
|
|
230
240
|
"""Performs an HTTP GET request to the /health endpoint."""
|
|
@@ -313,6 +323,11 @@ async def scan_and_update_servers():
|
|
|
313
323
|
logger.debug(
|
|
314
324
|
f"Removed connection count entry for unavailable server {server}"
|
|
315
325
|
)
|
|
326
|
+
if server in last_send_times:
|
|
327
|
+
del last_send_times[server]
|
|
328
|
+
logger.debug(
|
|
329
|
+
f"Removed throttling timestamp for unavailable server {server}"
|
|
330
|
+
)
|
|
316
331
|
|
|
317
332
|
available_servers = sorted(list(current_set))
|
|
318
333
|
for server in available_servers:
|
|
@@ -333,11 +348,10 @@ async def scan_and_update_servers():
|
|
|
333
348
|
await asyncio.sleep(10)
|
|
334
349
|
|
|
335
350
|
|
|
336
|
-
# --- Core Load Balancer Logic
|
|
351
|
+
# --- Core Load Balancer Logic ---
|
|
337
352
|
async def handle_client(client_reader, client_writer):
|
|
338
353
|
"""Handles a single client connection."""
|
|
339
354
|
client_addr = client_writer.get_extra_info("peername")
|
|
340
|
-
logger.info(f"Accepted connection from {client_addr}")
|
|
341
355
|
|
|
342
356
|
backend_server = None
|
|
343
357
|
backend_reader = None
|
|
@@ -376,15 +390,10 @@ async def handle_client(client_reader, client_writer):
|
|
|
376
390
|
connection_counts[selected_server] += 1
|
|
377
391
|
backend_server = selected_server
|
|
378
392
|
server_selected = True
|
|
379
|
-
|
|
393
|
+
|
|
380
394
|
# Update global statistics
|
|
381
|
-
global total_connections_served, current_active_connections
|
|
382
395
|
total_connections_served += 1
|
|
383
396
|
current_active_connections += 1
|
|
384
|
-
|
|
385
|
-
logger.info(
|
|
386
|
-
f"Routing {client_addr} to {backend_server} (Current connections: {connection_counts[backend_server]})"
|
|
387
|
-
)
|
|
388
397
|
else:
|
|
389
398
|
logger.error(
|
|
390
399
|
f"Logic error: No server chosen despite available servers list not being empty for {client_addr}."
|
|
@@ -402,6 +411,42 @@ async def handle_client(client_reader, client_writer):
|
|
|
402
411
|
pass
|
|
403
412
|
server_selected = False
|
|
404
413
|
return
|
|
414
|
+
|
|
415
|
+
# --- Throttling Logic (Concurrency Safe) ---
|
|
416
|
+
# Atomically schedule the next request to avoid thundering herd on a single server.
|
|
417
|
+
sleep_duration_s = 0
|
|
418
|
+
now_ms = time.time() * 1000
|
|
419
|
+
|
|
420
|
+
async with state_lock:
|
|
421
|
+
# Get the time the last request was DISPATCHED to this server.
|
|
422
|
+
last_dispatch_ms = last_send_times.get(backend_server, 0)
|
|
423
|
+
|
|
424
|
+
# Calculate when the next request is allowed to be sent.
|
|
425
|
+
next_allowed_ms = last_dispatch_ms + THROTTLE_MS
|
|
426
|
+
|
|
427
|
+
# If the next allowed time is in the future, we must wait.
|
|
428
|
+
if next_allowed_ms > now_ms:
|
|
429
|
+
sleep_duration_s = (next_allowed_ms - now_ms) / 1000
|
|
430
|
+
# The next request after this one will be scheduled relative to when
|
|
431
|
+
# THIS request is actually sent (i.e., after its delay).
|
|
432
|
+
new_dispatch_time_ms = next_allowed_ms
|
|
433
|
+
else:
|
|
434
|
+
# We can send immediately.
|
|
435
|
+
sleep_duration_s = 0
|
|
436
|
+
new_dispatch_time_ms = now_ms
|
|
437
|
+
|
|
438
|
+
# CRITICAL: Update the dispatch time for the *next* caller immediately inside the lock.
|
|
439
|
+
last_send_times[backend_server] = new_dispatch_time_ms
|
|
440
|
+
|
|
441
|
+
if sleep_duration_s > 0:
|
|
442
|
+
logger.debug(
|
|
443
|
+
f"Throttling request to {backend_server} for {sleep_duration_s:.3f}s to maintain >{THROTTLE_MS}ms interval."
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
# Sleep outside the lock to avoid blocking other tasks.
|
|
447
|
+
if sleep_duration_s > 0:
|
|
448
|
+
await asyncio.sleep(sleep_duration_s)
|
|
449
|
+
|
|
405
450
|
try:
|
|
406
451
|
logger.debug(
|
|
407
452
|
f"Attempting connection to backend {backend_server} for {client_addr}"
|
|
@@ -473,16 +518,14 @@ async def handle_client(client_reader, client_writer):
|
|
|
473
518
|
except Exception as e:
|
|
474
519
|
logger.error(f"Error handling client {client_addr}: {e}")
|
|
475
520
|
finally:
|
|
476
|
-
logger.info(f"Closing connection for {client_addr}")
|
|
477
521
|
# Decrement connection count only if we successfully selected/incremented
|
|
478
522
|
if backend_server and server_selected:
|
|
479
523
|
async with state_lock:
|
|
480
524
|
if backend_server in connection_counts:
|
|
481
525
|
if connection_counts[backend_server] > 0:
|
|
482
526
|
connection_counts[backend_server] -= 1
|
|
483
|
-
current_active_connections = max(
|
|
484
|
-
|
|
485
|
-
f"Connection closed for {client_addr}. Backend {backend_server} connections: {connection_counts[backend_server]}"
|
|
527
|
+
current_active_connections = max(
|
|
528
|
+
0, current_active_connections - 1
|
|
486
529
|
)
|
|
487
530
|
else:
|
|
488
531
|
logger.warning(
|
|
@@ -491,7 +534,6 @@ async def handle_client(client_reader, client_writer):
|
|
|
491
534
|
connection_counts[backend_server] = 0
|
|
492
535
|
|
|
493
536
|
|
|
494
|
-
|
|
495
537
|
# --- Status Reporting Task ---
|
|
496
538
|
async def print_status_periodically():
|
|
497
539
|
"""Periodically displays a professional real-time status dashboard."""
|
|
@@ -514,35 +556,54 @@ async def display_status_dashboard():
|
|
|
514
556
|
# Get terminal dimensions for responsive layout
|
|
515
557
|
columns, rows = get_terminal_size()
|
|
516
558
|
dash_width = min(columns - 4, 100)
|
|
517
|
-
|
|
559
|
+
|
|
518
560
|
# Header with title and timestamp
|
|
519
561
|
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
520
562
|
print(f"{Colors.BOLD}{Colors.BRIGHT_CYAN}{'=' * dash_width}{Colors.RESET}")
|
|
521
|
-
print(
|
|
522
|
-
|
|
563
|
+
print(
|
|
564
|
+
f"{Colors.BOLD}{Colors.BRIGHT_CYAN}{'🚀 vLLM Load Balancer Dashboard':^{dash_width}}{Colors.RESET}"
|
|
565
|
+
)
|
|
566
|
+
print(
|
|
567
|
+
f"{Colors.BRIGHT_CYAN}{'Real-time Status & Monitoring':^{dash_width}}{Colors.RESET}"
|
|
568
|
+
)
|
|
523
569
|
print(f"{Colors.BOLD}{Colors.BRIGHT_CYAN}{'=' * dash_width}{Colors.RESET}")
|
|
524
570
|
print()
|
|
525
|
-
|
|
571
|
+
|
|
526
572
|
# System Information Section
|
|
527
573
|
uptime = format_uptime(start_time)
|
|
528
574
|
print(f"{Colors.BOLD}{Colors.BRIGHT_WHITE}📊 System Information{Colors.RESET}")
|
|
529
575
|
print(f"{Colors.BRIGHT_BLACK}{'─' * (dash_width // 2)}{Colors.RESET}")
|
|
530
576
|
print(f"{Colors.YELLOW}🕐 Current Time:{Colors.RESET} {current_time}")
|
|
531
577
|
print(f"{Colors.YELLOW}⏱️ Uptime:{Colors.RESET} {uptime}")
|
|
532
|
-
print(
|
|
578
|
+
print(
|
|
579
|
+
f"{Colors.YELLOW}🌐 Load Balancer:{Colors.RESET} {LOAD_BALANCER_HOST}:{LOAD_BALANCER_PORT}"
|
|
580
|
+
)
|
|
533
581
|
print(f"{Colors.YELLOW}🎯 Backend Host:{Colors.RESET} {BACKEND_HOST}")
|
|
534
|
-
print(
|
|
582
|
+
print(
|
|
583
|
+
f"{Colors.YELLOW}🔧 Configured Ports:{Colors.RESET} {', '.join(map(str, BACKEND_PORTS))}"
|
|
584
|
+
)
|
|
585
|
+
print(
|
|
586
|
+
f"{Colors.YELLOW}⚡ Request Throttling:{Colors.RESET} {THROTTLE_MS}ms minimum"
|
|
587
|
+
)
|
|
535
588
|
print()
|
|
536
|
-
|
|
589
|
+
|
|
537
590
|
# Connection Statistics Section
|
|
538
591
|
print(f"{Colors.BOLD}{Colors.BRIGHT_WHITE}📈 Connection Statistics{Colors.RESET}")
|
|
539
592
|
print(f"{Colors.BRIGHT_BLACK}{'─' * (dash_width // 2)}{Colors.RESET}")
|
|
540
|
-
print(
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
print(
|
|
593
|
+
print(
|
|
594
|
+
f"{Colors.GREEN}📊 Total Connections Served:{Colors.RESET} {total_connections_served:,}"
|
|
595
|
+
)
|
|
596
|
+
print(
|
|
597
|
+
f"{Colors.GREEN}🔗 Currently Active:{Colors.RESET} {current_active_connections}"
|
|
598
|
+
)
|
|
599
|
+
print(
|
|
600
|
+
f"{Colors.GREEN}⚡ Health Check Timeout:{Colors.RESET} {HEALTH_CHECK_TIMEOUT}s"
|
|
601
|
+
)
|
|
602
|
+
print(
|
|
603
|
+
f"{Colors.GREEN}🔄 Status Update Interval:{Colors.RESET} {STATUS_PRINT_INTERVAL}s"
|
|
604
|
+
)
|
|
544
605
|
print()
|
|
545
|
-
|
|
606
|
+
|
|
546
607
|
# Backend Servers Status
|
|
547
608
|
print(f"{Colors.BOLD}{Colors.BRIGHT_WHITE}Backend Servers Status{Colors.RESET}")
|
|
548
609
|
print(f"{Colors.BRIGHT_BLACK}{'─' * (dash_width // 2)}{Colors.RESET}")
|
|
@@ -552,7 +613,7 @@ async def display_status_dashboard():
|
|
|
552
613
|
f"{Colors.BOLD}Host{Colors.RESET}",
|
|
553
614
|
f"{Colors.BOLD}Port{Colors.RESET}",
|
|
554
615
|
f"{Colors.BOLD}Active Conn.{Colors.RESET}",
|
|
555
|
-
f"{Colors.BOLD}Status{Colors.RESET}"
|
|
616
|
+
f"{Colors.BOLD}Status{Colors.RESET}",
|
|
556
617
|
]
|
|
557
618
|
|
|
558
619
|
table_data = []
|
|
@@ -580,13 +641,15 @@ async def display_status_dashboard():
|
|
|
580
641
|
else f"{Colors.BG_RED}{Colors.WHITE} OFFLINE {Colors.RESET}"
|
|
581
642
|
)
|
|
582
643
|
|
|
583
|
-
table_data.append(
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
644
|
+
table_data.append(
|
|
645
|
+
[
|
|
646
|
+
f"{Colors.CYAN}{BACKEND_HOST}:{port}{Colors.RESET}",
|
|
647
|
+
BACKEND_HOST,
|
|
648
|
+
str(port),
|
|
649
|
+
conn_display,
|
|
650
|
+
status_display,
|
|
651
|
+
]
|
|
652
|
+
)
|
|
590
653
|
|
|
591
654
|
try:
|
|
592
655
|
table = tabulate(table_data, headers=headers, tablefmt="fancy_grid")
|
|
@@ -594,13 +657,23 @@ async def display_status_dashboard():
|
|
|
594
657
|
print()
|
|
595
658
|
|
|
596
659
|
# Summary metrics
|
|
597
|
-
online_count = sum(
|
|
598
|
-
|
|
660
|
+
online_count = sum(
|
|
661
|
+
1 for port in BACKEND_PORTS if (BACKEND_HOST, port) in current_available
|
|
662
|
+
)
|
|
663
|
+
avg_connections = (
|
|
664
|
+
total_backend_connections / online_count if online_count else 0
|
|
665
|
+
)
|
|
599
666
|
print(f"{Colors.BOLD}{Colors.BRIGHT_WHITE}📋 Summary{Colors.RESET}")
|
|
600
667
|
print(f"{Colors.BRIGHT_BLACK}{'─' * (dash_width // 4)}{Colors.RESET}")
|
|
601
|
-
print(
|
|
602
|
-
|
|
603
|
-
|
|
668
|
+
print(
|
|
669
|
+
f"{Colors.MAGENTA}🟢 Available Servers:{Colors.RESET} {online_count} / {len(BACKEND_PORTS)}"
|
|
670
|
+
)
|
|
671
|
+
print(
|
|
672
|
+
f"{Colors.MAGENTA}📊 Total Backend Connections:{Colors.RESET} {total_backend_connections}"
|
|
673
|
+
)
|
|
674
|
+
print(
|
|
675
|
+
f"{Colors.MAGENTA}📈 Average Load per Online Server:{Colors.RESET} {avg_connections:.1f}"
|
|
676
|
+
)
|
|
604
677
|
|
|
605
678
|
except Exception as e:
|
|
606
679
|
logger.error(f"Error displaying status table: {e}")
|
|
@@ -609,15 +682,17 @@ async def display_status_dashboard():
|
|
|
609
682
|
# Footer with refresh info
|
|
610
683
|
print()
|
|
611
684
|
print(f"{Colors.BRIGHT_BLACK}{'─' * dash_width}{Colors.RESET}")
|
|
612
|
-
print(
|
|
685
|
+
print(
|
|
686
|
+
f"{Colors.DIM}🔄 Auto-refresh every {STATUS_PRINT_INTERVAL}s | Press Ctrl+C to stop{Colors.RESET}"
|
|
687
|
+
)
|
|
613
688
|
print(f"{Colors.BRIGHT_BLACK}{'─' * dash_width}{Colors.RESET}")
|
|
614
689
|
print()
|
|
615
690
|
|
|
616
691
|
|
|
617
|
-
|
|
618
692
|
# --- HTTP Stats Server ---
|
|
619
693
|
from aiohttp import web
|
|
620
694
|
|
|
695
|
+
|
|
621
696
|
async def stats_json(request):
|
|
622
697
|
async with state_lock:
|
|
623
698
|
# Build a list of all configured servers, with status and connections
|
|
@@ -626,12 +701,16 @@ async def stats_json(request):
|
|
|
626
701
|
for port in BACKEND_PORTS:
|
|
627
702
|
server = (BACKEND_HOST, port)
|
|
628
703
|
is_online = server in available_set
|
|
629
|
-
all_servers.append(
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
704
|
+
all_servers.append(
|
|
705
|
+
{
|
|
706
|
+
"host": BACKEND_HOST,
|
|
707
|
+
"port": port,
|
|
708
|
+
"active_connections": connection_counts.get(server, 0)
|
|
709
|
+
if is_online
|
|
710
|
+
else 0,
|
|
711
|
+
"status": "ONLINE" if is_online else "OFFLINE",
|
|
712
|
+
}
|
|
713
|
+
)
|
|
635
714
|
stats = {
|
|
636
715
|
"time": datetime.now().isoformat(),
|
|
637
716
|
"uptime": format_uptime(start_time),
|
|
@@ -643,10 +722,12 @@ async def stats_json(request):
|
|
|
643
722
|
"current_active_connections": current_active_connections,
|
|
644
723
|
"health_check_timeout": HEALTH_CHECK_TIMEOUT,
|
|
645
724
|
"status_update_interval": STATUS_PRINT_INTERVAL,
|
|
725
|
+
"throttle_ms": THROTTLE_MS,
|
|
646
726
|
"servers": all_servers,
|
|
647
727
|
}
|
|
648
728
|
return web.json_response(stats)
|
|
649
729
|
|
|
730
|
+
|
|
650
731
|
async def stats_page(request):
|
|
651
732
|
# High-quality HTML dashboard with auto-refresh and charts
|
|
652
733
|
return web.Response(
|
|
@@ -786,86 +867,101 @@ async def stats_page(request):
|
|
|
786
867
|
</script>
|
|
787
868
|
</body>
|
|
788
869
|
</html>
|
|
789
|
-
"""
|
|
870
|
+
""",
|
|
790
871
|
)
|
|
791
872
|
|
|
873
|
+
|
|
792
874
|
async def start_stats_server(loop):
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
875
|
+
app = web.Application()
|
|
876
|
+
app.router.add_get("/stats", stats_page)
|
|
877
|
+
app.router.add_get("/stats.json", stats_json)
|
|
878
|
+
runner = web.AppRunner(app)
|
|
879
|
+
await runner.setup()
|
|
880
|
+
site = web.TCPSite(runner, LOAD_BALANCER_HOST, STATS_PORT)
|
|
881
|
+
await site.start()
|
|
882
|
+
logger.info(
|
|
883
|
+
f"Stats HTTP server running at http://{LOAD_BALANCER_HOST}:{STATS_PORT}/stats"
|
|
884
|
+
)
|
|
885
|
+
|
|
801
886
|
|
|
802
887
|
async def main():
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
# Start background tasks
|
|
809
|
-
scan_task = asyncio.create_task(scan_and_update_servers())
|
|
810
|
-
status_task = asyncio.create_task(print_status_periodically())
|
|
811
|
-
|
|
812
|
-
# Start HTTP stats server (on STATS_PORT)
|
|
813
|
-
loop = asyncio.get_running_loop()
|
|
814
|
-
await start_stats_server(loop)
|
|
815
|
-
|
|
816
|
-
# Start TCP server (on LOAD_BALANCER_PORT)
|
|
817
|
-
server = await asyncio.start_server(
|
|
818
|
-
handle_client, LOAD_BALANCER_HOST, LOAD_BALANCER_PORT
|
|
819
|
-
)
|
|
888
|
+
global start_time
|
|
889
|
+
start_time = time.time()
|
|
890
|
+
clear_terminal()
|
|
891
|
+
print_banner()
|
|
820
892
|
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
893
|
+
# Start background tasks
|
|
894
|
+
scan_task = asyncio.create_task(scan_and_update_servers())
|
|
895
|
+
status_task = asyncio.create_task(print_status_periodically())
|
|
896
|
+
|
|
897
|
+
# Start HTTP stats server (on STATS_PORT)
|
|
898
|
+
loop = asyncio.get_running_loop()
|
|
899
|
+
await start_stats_server(loop)
|
|
900
|
+
|
|
901
|
+
# Start TCP server (on LOAD_BALANCER_PORT)
|
|
902
|
+
server = await asyncio.start_server(
|
|
903
|
+
handle_client, LOAD_BALANCER_HOST, LOAD_BALANCER_PORT
|
|
904
|
+
)
|
|
832
905
|
|
|
833
|
-
|
|
906
|
+
addrs = ", ".join(str(sock.getsockname()) for sock in server.sockets)
|
|
907
|
+
logger.info(f"Load balancer serving on {addrs}")
|
|
908
|
+
logger.info(f"Configured backend ports: {BACKEND_PORTS} on host {BACKEND_HOST}")
|
|
909
|
+
print(f"{Colors.BRIGHT_GREEN}✅ Load balancer started successfully!{Colors.RESET}")
|
|
910
|
+
print(f"{Colors.BRIGHT_GREEN}🌐 Proxy listening on: {addrs}{Colors.RESET}")
|
|
911
|
+
print(
|
|
912
|
+
f"{Colors.BRIGHT_GREEN}📊 Stats dashboard: http://localhost:{STATS_PORT}/stats{Colors.RESET}"
|
|
913
|
+
)
|
|
914
|
+
print(f"{Colors.YELLOW}🔍 Scanning backend servers...{Colors.RESET}")
|
|
915
|
+
print()
|
|
916
|
+
await asyncio.sleep(2)
|
|
917
|
+
|
|
918
|
+
async with server:
|
|
919
|
+
try:
|
|
920
|
+
await server.serve_forever()
|
|
921
|
+
except asyncio.CancelledError:
|
|
922
|
+
print(f"\n{Colors.YELLOW}🛑 Shutdown signal received...{Colors.RESET}")
|
|
923
|
+
logger.info("Load balancer server shutting down.")
|
|
924
|
+
except KeyboardInterrupt:
|
|
925
|
+
print(f"\n{Colors.YELLOW}🛑 Shutdown requested by user...{Colors.RESET}")
|
|
926
|
+
logger.info("Shutdown requested by user.")
|
|
927
|
+
finally:
|
|
928
|
+
print(f"{Colors.CYAN}🔄 Cleaning up background tasks...{Colors.RESET}")
|
|
929
|
+
logger.info("Cancelling background tasks...")
|
|
930
|
+
scan_task.cancel()
|
|
931
|
+
status_task.cancel()
|
|
834
932
|
try:
|
|
835
|
-
await
|
|
933
|
+
await asyncio.gather(scan_task, status_task, return_exceptions=True)
|
|
836
934
|
except asyncio.CancelledError:
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
logger.info("Shutdown requested by user.")
|
|
842
|
-
finally:
|
|
843
|
-
print(f"{Colors.CYAN}🔄 Cleaning up background tasks...{Colors.RESET}")
|
|
844
|
-
logger.info("Cancelling background tasks...")
|
|
845
|
-
scan_task.cancel()
|
|
846
|
-
status_task.cancel()
|
|
847
|
-
try:
|
|
848
|
-
await asyncio.gather(scan_task, status_task, return_exceptions=True)
|
|
849
|
-
except asyncio.CancelledError:
|
|
850
|
-
pass
|
|
851
|
-
print(f"{Colors.BRIGHT_GREEN}✅ Shutdown complete. Goodbye!{Colors.RESET}")
|
|
852
|
-
logger.info("Background tasks finished.")
|
|
935
|
+
pass
|
|
936
|
+
print(f"{Colors.BRIGHT_GREEN}✅ Shutdown complete. Goodbye!{Colors.RESET}")
|
|
937
|
+
logger.info("Background tasks finished.")
|
|
938
|
+
|
|
853
939
|
|
|
854
940
|
def run_load_balancer():
|
|
855
|
-
global
|
|
941
|
+
global \
|
|
942
|
+
LOAD_BALANCER_PORT, \
|
|
943
|
+
BACKEND_PORTS, \
|
|
944
|
+
BACKEND_HOST, \
|
|
945
|
+
STATUS_PRINT_INTERVAL, \
|
|
946
|
+
HEALTH_CHECK_TIMEOUT, \
|
|
947
|
+
THROTTLE_MS, \
|
|
948
|
+
STATS_PORT
|
|
856
949
|
args = parse_args()
|
|
857
950
|
LOAD_BALANCER_PORT = args.port
|
|
858
951
|
BACKEND_HOST = args.host
|
|
859
952
|
BACKEND_PORTS = [int(p.strip()) for p in args.ports.split(",") if p.strip()]
|
|
860
953
|
STATUS_PRINT_INTERVAL = args.status_interval
|
|
861
954
|
HEALTH_CHECK_TIMEOUT = args.health_timeout
|
|
955
|
+
THROTTLE_MS = args.throttle_ms
|
|
862
956
|
if args.stats_port is not None:
|
|
863
957
|
STATS_PORT = args.stats_port
|
|
864
958
|
else:
|
|
865
959
|
STATS_PORT = LOAD_BALANCER_PORT + 1
|
|
866
960
|
if not BACKEND_PORTS:
|
|
867
961
|
print(f"{Colors.BG_RED}{Colors.WHITE} ❌ ERROR {Colors.RESET}")
|
|
868
|
-
print(
|
|
962
|
+
print(
|
|
963
|
+
f"{Colors.RED}No backend ports specified. Use --ports 8140,8150 ...{Colors.RESET}"
|
|
964
|
+
)
|
|
869
965
|
logger.critical("No backend ports specified. Use --ports 8140,8150 ...")
|
|
870
966
|
sys.exit(1)
|
|
871
967
|
try:
|
|
@@ -878,5 +974,6 @@ def run_load_balancer():
|
|
|
878
974
|
print(f"{Colors.RED}Critical error in main execution: {e}{Colors.RESET}")
|
|
879
975
|
logger.critical(f"Critical error in main execution: {e}")
|
|
880
976
|
|
|
977
|
+
|
|
881
978
|
if __name__ == "__main__":
|
|
882
|
-
run_load_balancer()
|
|
979
|
+
run_load_balancer()
|
speedy_utils/__init__.py
CHANGED
|
@@ -108,7 +108,7 @@ from .common.notebook_utils import (
|
|
|
108
108
|
)
|
|
109
109
|
|
|
110
110
|
# Cache utilities
|
|
111
|
-
from .common.utils_cache import identify, identify_uuid, memoize
|
|
111
|
+
from .common.utils_cache import amemoize, identify, identify_uuid, memoize
|
|
112
112
|
|
|
113
113
|
# IO utilities
|
|
114
114
|
from .common.utils_io import (
|
|
@@ -197,6 +197,7 @@ __all__ = [
|
|
|
197
197
|
# Function decorators
|
|
198
198
|
"retry_runtime",
|
|
199
199
|
# Cache utilities
|
|
200
|
+
"amemoize",
|
|
200
201
|
"memoize",
|
|
201
202
|
"identify",
|
|
202
203
|
"identify_uuid",
|
|
@@ -226,4 +227,5 @@ __all__ = [
|
|
|
226
227
|
"multi_thread",
|
|
227
228
|
# Notebook utilities
|
|
228
229
|
"change_dir",
|
|
230
|
+
"amemoize",
|
|
229
231
|
]
|