speedy-utils 1.1.5__py3-none-any.whl → 1.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,6 +17,7 @@ from speedy_utils import setup_logger
17
17
 
18
18
  setup_logger(min_interval=5)
19
19
 
20
+
20
21
  # --- CLI Argument Parsing ---
21
22
  def parse_args():
22
23
  parser = argparse.ArgumentParser(
@@ -27,14 +28,16 @@ Examples:
27
28
  python vllm_load_balancer.py 8001 --ports 8140,8150,8160
28
29
  python vllm_load_balancer.py 8080 --ports 8140,8150 --host 192.168.1.100
29
30
  python vllm_load_balancer.py 8001 --ports 8140,8150 --status-interval 3
31
+ python vllm_load_balancer.py 8001 --ports 8140,8150 --throttle-ms 10
30
32
 
31
33
  Features:
32
34
  • Real-time dashboard with color-coded status
33
35
  • Automatic health checks and failover
34
36
  • Least-connections load balancing
37
+ • Request throttling to prevent server overload
35
38
  • Professional terminal interface
36
39
  • Connection statistics and monitoring
37
- """
40
+ """,
38
41
  )
39
42
  parser.add_argument(
40
43
  "port",
@@ -71,8 +74,15 @@ Features:
71
74
  default=None,
72
75
  help="Port for the HTTP stats dashboard (default: proxy port + 1)",
73
76
  )
77
+ parser.add_argument(
78
+ "--throttle-ms",
79
+ type=float,
80
+ default=30.0,
81
+ help="Minimum milliseconds between requests to same server (default: 5ms)",
82
+ )
74
83
  return parser.parse_args()
75
84
 
85
+
76
86
  # --- Configuration (populated from CLI) ---
77
87
  LOAD_BALANCER_HOST = "0.0.0.0"
78
88
  LOAD_BALANCER_PORT = 8008 # Will be overwritten by CLI
@@ -81,11 +91,13 @@ BACKEND_HOST = "localhost" # Will be overwritten by CLI
81
91
  BACKEND_PORTS = [] # Will be overwritten by CLI
82
92
  STATUS_PRINT_INTERVAL = 5
83
93
  HEALTH_CHECK_TIMEOUT = 2
94
+ THROTTLE_MS = 5.0 # Will be overwritten by CLI
84
95
  BUFFER_SIZE = 4096
85
96
 
86
97
  # --- Global Shared State ---
87
98
  available_servers = []
88
99
  connection_counts = defaultdict(int)
100
+ last_send_times = defaultdict(float) # Track last send time per server
89
101
  state_lock = asyncio.Lock()
90
102
  start_time = None
91
103
  total_connections_served = 0
@@ -95,10 +107,10 @@ current_active_connections = 0
95
107
  # --- Terminal Utilities ---
96
108
  def clear_terminal():
97
109
  """Clear terminal screen with cross-platform support."""
98
- if os.name == 'nt': # Windows
99
- os.system('cls')
110
+ if os.name == "nt": # Windows
111
+ os.system("cls")
100
112
  else: # Unix/Linux/MacOS
101
- os.system('clear')
113
+ os.system("clear")
102
114
 
103
115
 
104
116
  def get_terminal_size():
@@ -114,12 +126,12 @@ def format_uptime(start_time):
114
126
  """Format uptime in a human-readable way."""
115
127
  if not start_time:
116
128
  return "Unknown"
117
-
129
+
118
130
  uptime_seconds = time.time() - start_time
119
131
  hours = int(uptime_seconds // 3600)
120
132
  minutes = int((uptime_seconds % 3600) // 60)
121
133
  seconds = int(uptime_seconds % 60)
122
-
134
+
123
135
  if hours > 0:
124
136
  return f"{hours}h {minutes}m {seconds}s"
125
137
  elif minutes > 0:
@@ -132,7 +144,7 @@ def print_banner():
132
144
  """Print a professional startup banner."""
133
145
  columns, _ = get_terminal_size()
134
146
  banner_width = min(columns - 4, 80)
135
-
147
+
136
148
  print("=" * banner_width)
137
149
  print(f"{'🚀 vLLM Load Balancer':^{banner_width}}")
138
150
  print(f"{'High-Performance Async TCP/HTTP Load Balancer':^{banner_width}}")
@@ -143,41 +155,42 @@ def print_banner():
143
155
  print(f"Backend Ports: {', '.join(map(str, BACKEND_PORTS))}")
144
156
  print(f"Health Check Interval: 10s (Timeout: {HEALTH_CHECK_TIMEOUT}s)")
145
157
  print(f"Status Update Interval: {STATUS_PRINT_INTERVAL}s")
158
+ print(f"Request Throttling: {THROTTLE_MS}ms minimum between requests")
146
159
  print("=" * banner_width)
147
160
  print()
148
161
 
149
162
 
150
163
  # --- ANSI Color Codes ---
151
164
  class Colors:
152
- RESET = '\033[0m'
153
- BOLD = '\033[1m'
154
- DIM = '\033[2m'
155
-
165
+ RESET = "\033[0m"
166
+ BOLD = "\033[1m"
167
+ DIM = "\033[2m"
168
+
156
169
  # Foreground colors
157
- BLACK = '\033[30m'
158
- RED = '\033[31m'
159
- GREEN = '\033[32m'
160
- YELLOW = '\033[33m'
161
- BLUE = '\033[34m'
162
- MAGENTA = '\033[35m'
163
- CYAN = '\033[36m'
164
- WHITE = '\033[37m'
165
-
170
+ BLACK = "\033[30m"
171
+ RED = "\033[31m"
172
+ GREEN = "\033[32m"
173
+ YELLOW = "\033[33m"
174
+ BLUE = "\033[34m"
175
+ MAGENTA = "\033[35m"
176
+ CYAN = "\033[36m"
177
+ WHITE = "\033[37m"
178
+
166
179
  # Bright colors
167
- BRIGHT_BLACK = '\033[90m'
168
- BRIGHT_RED = '\033[91m'
169
- BRIGHT_GREEN = '\033[92m'
170
- BRIGHT_YELLOW = '\033[93m'
171
- BRIGHT_BLUE = '\033[94m'
172
- BRIGHT_MAGENTA = '\033[95m'
173
- BRIGHT_CYAN = '\033[96m'
174
- BRIGHT_WHITE = '\033[97m'
175
-
180
+ BRIGHT_BLACK = "\033[90m"
181
+ BRIGHT_RED = "\033[91m"
182
+ BRIGHT_GREEN = "\033[92m"
183
+ BRIGHT_YELLOW = "\033[93m"
184
+ BRIGHT_BLUE = "\033[94m"
185
+ BRIGHT_MAGENTA = "\033[95m"
186
+ BRIGHT_CYAN = "\033[96m"
187
+ BRIGHT_WHITE = "\033[97m"
188
+
176
189
  # Background colors
177
- BG_RED = '\033[41m'
178
- BG_GREEN = '\033[42m'
179
- BG_YELLOW = '\033[43m'
180
- BG_BLUE = '\033[44m'
190
+ BG_RED = "\033[41m"
191
+ BG_GREEN = "\033[42m"
192
+ BG_YELLOW = "\033[43m"
193
+ BG_BLUE = "\033[44m"
181
194
 
182
195
 
183
196
  # --- Helper Functions --- (relay_data and safe_close_writer remain the same)
@@ -224,7 +237,6 @@ async def safe_close_writer(writer):
224
237
  logger.debug(f"Error closing writer in context manager: {e}")
225
238
 
226
239
 
227
-
228
240
  # --- Health Check for Provided Ports ---
229
241
  async def check_server_health(session, host, port):
230
242
  """Performs an HTTP GET request to the /health endpoint."""
@@ -313,6 +325,11 @@ async def scan_and_update_servers():
313
325
  logger.debug(
314
326
  f"Removed connection count entry for unavailable server {server}"
315
327
  )
328
+ if server in last_send_times:
329
+ del last_send_times[server]
330
+ logger.debug(
331
+ f"Removed throttling timestamp for unavailable server {server}"
332
+ )
316
333
 
317
334
  available_servers = sorted(list(current_set))
318
335
  for server in available_servers:
@@ -337,7 +354,6 @@ async def scan_and_update_servers():
337
354
  async def handle_client(client_reader, client_writer):
338
355
  """Handles a single client connection."""
339
356
  client_addr = client_writer.get_extra_info("peername")
340
- logger.info(f"Accepted connection from {client_addr}")
341
357
 
342
358
  backend_server = None
343
359
  backend_reader = None
@@ -376,15 +392,11 @@ async def handle_client(client_reader, client_writer):
376
392
  connection_counts[selected_server] += 1
377
393
  backend_server = selected_server
378
394
  server_selected = True
379
-
395
+
380
396
  # Update global statistics
381
397
  global total_connections_served, current_active_connections
382
398
  total_connections_served += 1
383
399
  current_active_connections += 1
384
-
385
- logger.info(
386
- f"Routing {client_addr} to {backend_server} (Current connections: {connection_counts[backend_server]})"
387
- )
388
400
  else:
389
401
  logger.error(
390
402
  f"Logic error: No server chosen despite available servers list not being empty for {client_addr}."
@@ -402,6 +414,29 @@ async def handle_client(client_reader, client_writer):
402
414
  pass
403
415
  server_selected = False
404
416
  return
417
+
418
+ # --- Throttling Logic ---
419
+ # Check if we need to throttle requests to avoid overwhelming the backend
420
+ current_time = time.time() * 1000 # Convert to milliseconds
421
+ sleep_time = 0
422
+ async with state_lock:
423
+ last_send_time = last_send_times.get(backend_server, 0)
424
+ time_since_last_send = current_time - last_send_time
425
+
426
+ if time_since_last_send < THROTTLE_MS:
427
+ sleep_time = (THROTTLE_MS - time_since_last_send) / 1000 # Convert to seconds
428
+ logger.debug(
429
+ f"Throttling request to {backend_server} for {sleep_time:.3f}s (last send: {time_since_last_send:.1f}ms ago)"
430
+ )
431
+
432
+ # Sleep outside the lock to avoid blocking other clients
433
+ if sleep_time > 0:
434
+ await asyncio.sleep(sleep_time)
435
+
436
+ # Update last send time after throttling
437
+ async with state_lock:
438
+ last_send_times[backend_server] = time.time() * 1000
439
+
405
440
  try:
406
441
  logger.debug(
407
442
  f"Attempting connection to backend {backend_server} for {client_addr}"
@@ -473,16 +508,14 @@ async def handle_client(client_reader, client_writer):
473
508
  except Exception as e:
474
509
  logger.error(f"Error handling client {client_addr}: {e}")
475
510
  finally:
476
- logger.info(f"Closing connection for {client_addr}")
477
511
  # Decrement connection count only if we successfully selected/incremented
478
512
  if backend_server and server_selected:
479
513
  async with state_lock:
480
514
  if backend_server in connection_counts:
481
515
  if connection_counts[backend_server] > 0:
482
516
  connection_counts[backend_server] -= 1
483
- current_active_connections = max(0, current_active_connections - 1)
484
- logger.info(
485
- f"Connection closed for {client_addr}. Backend {backend_server} connections: {connection_counts[backend_server]}"
517
+ current_active_connections = max(
518
+ 0, current_active_connections - 1
486
519
  )
487
520
  else:
488
521
  logger.warning(
@@ -491,7 +524,6 @@ async def handle_client(client_reader, client_writer):
491
524
  connection_counts[backend_server] = 0
492
525
 
493
526
 
494
-
495
527
  # --- Status Reporting Task ---
496
528
  async def print_status_periodically():
497
529
  """Periodically displays a professional real-time status dashboard."""
@@ -514,35 +546,52 @@ async def display_status_dashboard():
514
546
  # Get terminal dimensions for responsive layout
515
547
  columns, rows = get_terminal_size()
516
548
  dash_width = min(columns - 4, 100)
517
-
549
+
518
550
  # Header with title and timestamp
519
551
  current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
520
552
  print(f"{Colors.BOLD}{Colors.BRIGHT_CYAN}{'=' * dash_width}{Colors.RESET}")
521
- print(f"{Colors.BOLD}{Colors.BRIGHT_CYAN}{'🚀 vLLM Load Balancer Dashboard':^{dash_width}}{Colors.RESET}")
522
- print(f"{Colors.BRIGHT_CYAN}{'Real-time Status & Monitoring':^{dash_width}}{Colors.RESET}")
553
+ print(
554
+ f"{Colors.BOLD}{Colors.BRIGHT_CYAN}{'🚀 vLLM Load Balancer Dashboard':^{dash_width}}{Colors.RESET}"
555
+ )
556
+ print(
557
+ f"{Colors.BRIGHT_CYAN}{'Real-time Status & Monitoring':^{dash_width}}{Colors.RESET}"
558
+ )
523
559
  print(f"{Colors.BOLD}{Colors.BRIGHT_CYAN}{'=' * dash_width}{Colors.RESET}")
524
560
  print()
525
-
561
+
526
562
  # System Information Section
527
563
  uptime = format_uptime(start_time)
528
564
  print(f"{Colors.BOLD}{Colors.BRIGHT_WHITE}📊 System Information{Colors.RESET}")
529
565
  print(f"{Colors.BRIGHT_BLACK}{'─' * (dash_width // 2)}{Colors.RESET}")
530
566
  print(f"{Colors.YELLOW}🕐 Current Time:{Colors.RESET} {current_time}")
531
567
  print(f"{Colors.YELLOW}⏱️ Uptime:{Colors.RESET} {uptime}")
532
- print(f"{Colors.YELLOW}🌐 Load Balancer:{Colors.RESET} {LOAD_BALANCER_HOST}:{LOAD_BALANCER_PORT}")
568
+ print(
569
+ f"{Colors.YELLOW}🌐 Load Balancer:{Colors.RESET} {LOAD_BALANCER_HOST}:{LOAD_BALANCER_PORT}"
570
+ )
533
571
  print(f"{Colors.YELLOW}🎯 Backend Host:{Colors.RESET} {BACKEND_HOST}")
534
- print(f"{Colors.YELLOW}🔧 Configured Ports:{Colors.RESET} {', '.join(map(str, BACKEND_PORTS))}")
572
+ print(
573
+ f"{Colors.YELLOW}🔧 Configured Ports:{Colors.RESET} {', '.join(map(str, BACKEND_PORTS))}"
574
+ )
575
+ print(f"{Colors.YELLOW}⚡ Request Throttling:{Colors.RESET} {THROTTLE_MS}ms minimum")
535
576
  print()
536
-
577
+
537
578
  # Connection Statistics Section
538
579
  print(f"{Colors.BOLD}{Colors.BRIGHT_WHITE}📈 Connection Statistics{Colors.RESET}")
539
580
  print(f"{Colors.BRIGHT_BLACK}{'─' * (dash_width // 2)}{Colors.RESET}")
540
- print(f"{Colors.GREEN}📊 Total Connections Served:{Colors.RESET} {total_connections_served:,}")
541
- print(f"{Colors.GREEN}🔗 Currently Active:{Colors.RESET} {current_active_connections}")
542
- print(f"{Colors.GREEN}⚡ Health Check Timeout:{Colors.RESET} {HEALTH_CHECK_TIMEOUT}s")
543
- print(f"{Colors.GREEN}🔄 Status Update Interval:{Colors.RESET} {STATUS_PRINT_INTERVAL}s")
581
+ print(
582
+ f"{Colors.GREEN}📊 Total Connections Served:{Colors.RESET} {total_connections_served:,}"
583
+ )
584
+ print(
585
+ f"{Colors.GREEN}🔗 Currently Active:{Colors.RESET} {current_active_connections}"
586
+ )
587
+ print(
588
+ f"{Colors.GREEN}⚡ Health Check Timeout:{Colors.RESET} {HEALTH_CHECK_TIMEOUT}s"
589
+ )
590
+ print(
591
+ f"{Colors.GREEN}🔄 Status Update Interval:{Colors.RESET} {STATUS_PRINT_INTERVAL}s"
592
+ )
544
593
  print()
545
-
594
+
546
595
  # Backend Servers Status
547
596
  print(f"{Colors.BOLD}{Colors.BRIGHT_WHITE}Backend Servers Status{Colors.RESET}")
548
597
  print(f"{Colors.BRIGHT_BLACK}{'─' * (dash_width // 2)}{Colors.RESET}")
@@ -552,7 +601,7 @@ async def display_status_dashboard():
552
601
  f"{Colors.BOLD}Host{Colors.RESET}",
553
602
  f"{Colors.BOLD}Port{Colors.RESET}",
554
603
  f"{Colors.BOLD}Active Conn.{Colors.RESET}",
555
- f"{Colors.BOLD}Status{Colors.RESET}"
604
+ f"{Colors.BOLD}Status{Colors.RESET}",
556
605
  ]
557
606
 
558
607
  table_data = []
@@ -580,13 +629,15 @@ async def display_status_dashboard():
580
629
  else f"{Colors.BG_RED}{Colors.WHITE} OFFLINE {Colors.RESET}"
581
630
  )
582
631
 
583
- table_data.append([
584
- f"{Colors.CYAN}{BACKEND_HOST}:{port}{Colors.RESET}",
585
- BACKEND_HOST,
586
- str(port),
587
- conn_display,
588
- status_display
589
- ])
632
+ table_data.append(
633
+ [
634
+ f"{Colors.CYAN}{BACKEND_HOST}:{port}{Colors.RESET}",
635
+ BACKEND_HOST,
636
+ str(port),
637
+ conn_display,
638
+ status_display,
639
+ ]
640
+ )
590
641
 
591
642
  try:
592
643
  table = tabulate(table_data, headers=headers, tablefmt="fancy_grid")
@@ -594,13 +645,23 @@ async def display_status_dashboard():
594
645
  print()
595
646
 
596
647
  # Summary metrics
597
- online_count = sum(1 for port in BACKEND_PORTS if (BACKEND_HOST, port) in current_available)
598
- avg_connections = total_backend_connections / online_count if online_count else 0
648
+ online_count = sum(
649
+ 1 for port in BACKEND_PORTS if (BACKEND_HOST, port) in current_available
650
+ )
651
+ avg_connections = (
652
+ total_backend_connections / online_count if online_count else 0
653
+ )
599
654
  print(f"{Colors.BOLD}{Colors.BRIGHT_WHITE}📋 Summary{Colors.RESET}")
600
655
  print(f"{Colors.BRIGHT_BLACK}{'─' * (dash_width // 4)}{Colors.RESET}")
601
- print(f"{Colors.MAGENTA}🟢 Available Servers:{Colors.RESET} {online_count} / {len(BACKEND_PORTS)}")
602
- print(f"{Colors.MAGENTA}📊 Total Backend Connections:{Colors.RESET} {total_backend_connections}")
603
- print(f"{Colors.MAGENTA}📈 Average Load per Online Server:{Colors.RESET} {avg_connections:.1f}")
656
+ print(
657
+ f"{Colors.MAGENTA}🟢 Available Servers:{Colors.RESET} {online_count} / {len(BACKEND_PORTS)}"
658
+ )
659
+ print(
660
+ f"{Colors.MAGENTA}📊 Total Backend Connections:{Colors.RESET} {total_backend_connections}"
661
+ )
662
+ print(
663
+ f"{Colors.MAGENTA}📈 Average Load per Online Server:{Colors.RESET} {avg_connections:.1f}"
664
+ )
604
665
 
605
666
  except Exception as e:
606
667
  logger.error(f"Error displaying status table: {e}")
@@ -609,15 +670,17 @@ async def display_status_dashboard():
609
670
  # Footer with refresh info
610
671
  print()
611
672
  print(f"{Colors.BRIGHT_BLACK}{'─' * dash_width}{Colors.RESET}")
612
- print(f"{Colors.DIM}🔄 Auto-refresh every {STATUS_PRINT_INTERVAL}s | Press Ctrl+C to stop{Colors.RESET}")
673
+ print(
674
+ f"{Colors.DIM}🔄 Auto-refresh every {STATUS_PRINT_INTERVAL}s | Press Ctrl+C to stop{Colors.RESET}"
675
+ )
613
676
  print(f"{Colors.BRIGHT_BLACK}{'─' * dash_width}{Colors.RESET}")
614
677
  print()
615
678
 
616
679
 
617
-
618
680
  # --- HTTP Stats Server ---
619
681
  from aiohttp import web
620
682
 
683
+
621
684
  async def stats_json(request):
622
685
  async with state_lock:
623
686
  # Build a list of all configured servers, with status and connections
@@ -626,12 +689,16 @@ async def stats_json(request):
626
689
  for port in BACKEND_PORTS:
627
690
  server = (BACKEND_HOST, port)
628
691
  is_online = server in available_set
629
- all_servers.append({
630
- "host": BACKEND_HOST,
631
- "port": port,
632
- "active_connections": connection_counts.get(server, 0) if is_online else 0,
633
- "status": "ONLINE" if is_online else "OFFLINE",
634
- })
692
+ all_servers.append(
693
+ {
694
+ "host": BACKEND_HOST,
695
+ "port": port,
696
+ "active_connections": connection_counts.get(server, 0)
697
+ if is_online
698
+ else 0,
699
+ "status": "ONLINE" if is_online else "OFFLINE",
700
+ }
701
+ )
635
702
  stats = {
636
703
  "time": datetime.now().isoformat(),
637
704
  "uptime": format_uptime(start_time),
@@ -643,10 +710,12 @@ async def stats_json(request):
643
710
  "current_active_connections": current_active_connections,
644
711
  "health_check_timeout": HEALTH_CHECK_TIMEOUT,
645
712
  "status_update_interval": STATUS_PRINT_INTERVAL,
713
+ "throttle_ms": THROTTLE_MS,
646
714
  "servers": all_servers,
647
715
  }
648
716
  return web.json_response(stats)
649
717
 
718
+
650
719
  async def stats_page(request):
651
720
  # High-quality HTML dashboard with auto-refresh and charts
652
721
  return web.Response(
@@ -786,86 +855,101 @@ async def stats_page(request):
786
855
  </script>
787
856
  </body>
788
857
  </html>
789
- """
858
+ """,
790
859
  )
791
860
 
861
+
792
862
  async def start_stats_server(loop):
793
- app = web.Application()
794
- app.router.add_get('/stats', stats_page)
795
- app.router.add_get('/stats.json', stats_json)
796
- runner = web.AppRunner(app)
797
- await runner.setup()
798
- site = web.TCPSite(runner, LOAD_BALANCER_HOST, STATS_PORT)
799
- await site.start()
800
- logger.info(f"Stats HTTP server running at http://{LOAD_BALANCER_HOST}:{STATS_PORT}/stats")
863
+ app = web.Application()
864
+ app.router.add_get("/stats", stats_page)
865
+ app.router.add_get("/stats.json", stats_json)
866
+ runner = web.AppRunner(app)
867
+ await runner.setup()
868
+ site = web.TCPSite(runner, LOAD_BALANCER_HOST, STATS_PORT)
869
+ await site.start()
870
+ logger.info(
871
+ f"Stats HTTP server running at http://{LOAD_BALANCER_HOST}:{STATS_PORT}/stats"
872
+ )
873
+
801
874
 
802
875
  async def main():
803
- global start_time
804
- start_time = time.time()
805
- clear_terminal()
806
- print_banner()
807
-
808
- # Start background tasks
809
- scan_task = asyncio.create_task(scan_and_update_servers())
810
- status_task = asyncio.create_task(print_status_periodically())
811
-
812
- # Start HTTP stats server (on STATS_PORT)
813
- loop = asyncio.get_running_loop()
814
- await start_stats_server(loop)
815
-
816
- # Start TCP server (on LOAD_BALANCER_PORT)
817
- server = await asyncio.start_server(
818
- handle_client, LOAD_BALANCER_HOST, LOAD_BALANCER_PORT
819
- )
876
+ global start_time
877
+ start_time = time.time()
878
+ clear_terminal()
879
+ print_banner()
820
880
 
821
- addrs = ", ".join(str(sock.getsockname()) for sock in server.sockets)
822
- logger.info(f"Load balancer serving on {addrs}")
823
- logger.info(
824
- f"Configured backend ports: {BACKEND_PORTS} on host {BACKEND_HOST}"
825
- )
826
- print(f"{Colors.BRIGHT_GREEN}✅ Load balancer started successfully!{Colors.RESET}")
827
- print(f"{Colors.BRIGHT_GREEN}🌐 Proxy listening on: {addrs}{Colors.RESET}")
828
- print(f"{Colors.BRIGHT_GREEN}📊 Stats dashboard: http://localhost:{STATS_PORT}/stats{Colors.RESET}")
829
- print(f"{Colors.YELLOW}🔍 Scanning backend servers...{Colors.RESET}")
830
- print()
831
- await asyncio.sleep(2)
881
+ # Start background tasks
882
+ scan_task = asyncio.create_task(scan_and_update_servers())
883
+ status_task = asyncio.create_task(print_status_periodically())
884
+
885
+ # Start HTTP stats server (on STATS_PORT)
886
+ loop = asyncio.get_running_loop()
887
+ await start_stats_server(loop)
888
+
889
+ # Start TCP server (on LOAD_BALANCER_PORT)
890
+ server = await asyncio.start_server(
891
+ handle_client, LOAD_BALANCER_HOST, LOAD_BALANCER_PORT
892
+ )
893
+
894
+ addrs = ", ".join(str(sock.getsockname()) for sock in server.sockets)
895
+ logger.info(f"Load balancer serving on {addrs}")
896
+ logger.info(f"Configured backend ports: {BACKEND_PORTS} on host {BACKEND_HOST}")
897
+ print(f"{Colors.BRIGHT_GREEN}✅ Load balancer started successfully!{Colors.RESET}")
898
+ print(f"{Colors.BRIGHT_GREEN}🌐 Proxy listening on: {addrs}{Colors.RESET}")
899
+ print(
900
+ f"{Colors.BRIGHT_GREEN}📊 Stats dashboard: http://localhost:{STATS_PORT}/stats{Colors.RESET}"
901
+ )
902
+ print(f"{Colors.YELLOW}🔍 Scanning backend servers...{Colors.RESET}")
903
+ print()
904
+ await asyncio.sleep(2)
832
905
 
833
- async with server:
906
+ async with server:
907
+ try:
908
+ await server.serve_forever()
909
+ except asyncio.CancelledError:
910
+ print(f"\n{Colors.YELLOW}🛑 Shutdown signal received...{Colors.RESET}")
911
+ logger.info("Load balancer server shutting down.")
912
+ except KeyboardInterrupt:
913
+ print(f"\n{Colors.YELLOW}🛑 Shutdown requested by user...{Colors.RESET}")
914
+ logger.info("Shutdown requested by user.")
915
+ finally:
916
+ print(f"{Colors.CYAN}🔄 Cleaning up background tasks...{Colors.RESET}")
917
+ logger.info("Cancelling background tasks...")
918
+ scan_task.cancel()
919
+ status_task.cancel()
834
920
  try:
835
- await server.serve_forever()
921
+ await asyncio.gather(scan_task, status_task, return_exceptions=True)
836
922
  except asyncio.CancelledError:
837
- print(f"\n{Colors.YELLOW}🛑 Shutdown signal received...{Colors.RESET}")
838
- logger.info("Load balancer server shutting down.")
839
- except KeyboardInterrupt:
840
- print(f"\n{Colors.YELLOW}🛑 Shutdown requested by user...{Colors.RESET}")
841
- logger.info("Shutdown requested by user.")
842
- finally:
843
- print(f"{Colors.CYAN}🔄 Cleaning up background tasks...{Colors.RESET}")
844
- logger.info("Cancelling background tasks...")
845
- scan_task.cancel()
846
- status_task.cancel()
847
- try:
848
- await asyncio.gather(scan_task, status_task, return_exceptions=True)
849
- except asyncio.CancelledError:
850
- pass
851
- print(f"{Colors.BRIGHT_GREEN}✅ Shutdown complete. Goodbye!{Colors.RESET}")
852
- logger.info("Background tasks finished.")
923
+ pass
924
+ print(f"{Colors.BRIGHT_GREEN}✅ Shutdown complete. Goodbye!{Colors.RESET}")
925
+ logger.info("Background tasks finished.")
926
+
853
927
 
854
928
  def run_load_balancer():
855
- global LOAD_BALANCER_PORT, BACKEND_PORTS, BACKEND_HOST, STATUS_PRINT_INTERVAL, HEALTH_CHECK_TIMEOUT, STATS_PORT
929
+ global \
930
+ LOAD_BALANCER_PORT, \
931
+ BACKEND_PORTS, \
932
+ BACKEND_HOST, \
933
+ STATUS_PRINT_INTERVAL, \
934
+ HEALTH_CHECK_TIMEOUT, \
935
+ THROTTLE_MS, \
936
+ STATS_PORT
856
937
  args = parse_args()
857
938
  LOAD_BALANCER_PORT = args.port
858
939
  BACKEND_HOST = args.host
859
940
  BACKEND_PORTS = [int(p.strip()) for p in args.ports.split(",") if p.strip()]
860
941
  STATUS_PRINT_INTERVAL = args.status_interval
861
942
  HEALTH_CHECK_TIMEOUT = args.health_timeout
943
+ THROTTLE_MS = args.throttle_ms
862
944
  if args.stats_port is not None:
863
945
  STATS_PORT = args.stats_port
864
946
  else:
865
947
  STATS_PORT = LOAD_BALANCER_PORT + 1
866
948
  if not BACKEND_PORTS:
867
949
  print(f"{Colors.BG_RED}{Colors.WHITE} ❌ ERROR {Colors.RESET}")
868
- print(f"{Colors.RED}No backend ports specified. Use --ports 8140,8150 ...{Colors.RESET}")
950
+ print(
951
+ f"{Colors.RED}No backend ports specified. Use --ports 8140,8150 ...{Colors.RESET}"
952
+ )
869
953
  logger.critical("No backend ports specified. Use --ports 8140,8150 ...")
870
954
  sys.exit(1)
871
955
  try:
@@ -878,5 +962,6 @@ def run_load_balancer():
878
962
  print(f"{Colors.RED}Critical error in main execution: {e}{Colors.RESET}")
879
963
  logger.critical(f"Critical error in main execution: {e}")
880
964
 
965
+
881
966
  if __name__ == "__main__":
882
967
  run_load_balancer()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: speedy-utils
3
- Version: 1.1.5
3
+ Version: 1.1.6
4
4
  Summary: Fast and easy-to-use package for data science
5
5
  Author: AnhVTH
6
6
  Author-email: anhvth.226@gmail.com
@@ -1,17 +1,20 @@
1
1
  llm_utils/__init__.py,sha256=SlaCMArn_uKVw4r0psz0q0IOQ1VFGdgCFOAKxQ81WTI,694
2
2
  llm_utils/chat_format/__init__.py,sha256=8dBIUqFJvkgQYedxBtcyxt-4tt8JxAKVap2JlTXmgaM,737
3
- llm_utils/chat_format/display.py,sha256=qaEGADGP8iQFzWOuzEO7_HyrqAFdEnUfkHAH28b0ym0,9772
3
+ llm_utils/chat_format/display.py,sha256=M-__JpcJSqjqeP4LiW7-yF8fVL37yUEUdaNC4VEgIo8,10181
4
4
  llm_utils/chat_format/transform.py,sha256=8TZhvUS5DrjUeMNtDIuWY54B_QZ7jjpXEL9c8F5z79w,5400
5
5
  llm_utils/chat_format/utils.py,sha256=xTxN4HrLHcRO2PfCTR43nH1M5zCa7v0kTTdzAcGkZg0,1229
6
6
  llm_utils/group_messages.py,sha256=8CU9nKOja3xeuhdrX5CvYVveSqSKb2zQ0eeNzA88aTQ,3621
7
7
  llm_utils/lm/__init__.py,sha256=rX36_MsnekM5GHwWS56XELbm4W5x2TDwnPERDTfo0eU,194
8
- llm_utils/lm/async_lm.py,sha256=eTyI9x4iZc4ZhYdwNadTYap5HgBJygiV_EBDZ-Og1cQ,34357
8
+ llm_utils/lm/async_lm/__init__.py,sha256=ouN2z1G24OwFglo2asBZ5w18RYcvTZ5r3ylx1aYp2rQ,70
9
+ llm_utils/lm/async_lm/_utils.py,sha256=16yks9grCmFotuqFKxVBfPvrrrquBfhzFUZ22mno3LY,5946
10
+ llm_utils/lm/async_lm/async_llm_task.py,sha256=JIU18Q7sPu_LCUR-geZmqiQ37stM8CqXmMzzkRUYibM,5352
11
+ llm_utils/lm/async_lm/async_lm.py,sha256=LzX1zMKiAENtqPtQ-laVbUyQ1g6q9Xhuyr_IWIjxdVk,28589
9
12
  llm_utils/lm/chat_html.py,sha256=FkGo0Dv_nAHYBMZzXfMu_bGQKaCx302goh3XaT-_ETc,8674
10
13
  llm_utils/lm/lm_json.py,sha256=fMt42phzFV2f6ulrtWcDXsWHi8WcG7gGkCzpIq8VSSM,1975
11
14
  llm_utils/lm/sync_lm.py,sha256=ANw_m5KiWcRwwoeQ5no6dzPFLc6j9o2oEcJtkMKqrn8,34640
12
15
  llm_utils/lm/utils.py,sha256=gUejbVZPYg97g4ftYEptYN52WhH3TAKOFW81sjLvi08,4585
13
16
  llm_utils/scripts/README.md,sha256=yuOLnLa2od2jp4wVy3rV0rESeiV3o8zol5MNMsZx0DY,999
14
- llm_utils/scripts/vllm_load_balancer.py,sha256=zz5aTaYwy5tYrv2RIhrizrGP-PnPAohgrl9kQvvJywA,35091
17
+ llm_utils/scripts/vllm_load_balancer.py,sha256=DxZNDGl8tViE0SuGQPneBHqkkagyuEm70bNzWyqa_9Q,36837
15
18
  llm_utils/scripts/vllm_serve.py,sha256=4NaqpVs7LBvxtvTCMPsNCAOfqiWkKRttxWMmWY7SitA,14729
16
19
  speedy_utils/__init__.py,sha256=YCpiReW22zG4KkQXQe6V9BQ8bn7PtiXolOaW_iL8T4M,5734
17
20
  speedy_utils/all.py,sha256=t-HKzDmhF1MTFnmq7xRnPs5nFG_aZaLH9Ua0RM6nQ9Y,4855
@@ -31,7 +34,7 @@ speedy_utils/multi_worker/thread.py,sha256=u_hTwXh7_FciMa5EukdEA1fDCY_vUC4moDceB
31
34
  speedy_utils/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
35
  speedy_utils/scripts/mpython.py,sha256=73PHm1jqbCt2APN4xuNjD0VDKwzOj4EZsViEMQiZU2g,3853
33
36
  speedy_utils/scripts/openapi_client_codegen.py,sha256=f2125S_q0PILgH5dyzoKRz7pIvNEjCkzpi4Q4pPFRZE,9683
34
- speedy_utils-1.1.5.dist-info/METADATA,sha256=j4-glLlCDTANN-zB9g7NMG3viJwBH2FhYGWu_HpD4kc,7441
35
- speedy_utils-1.1.5.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
36
- speedy_utils-1.1.5.dist-info/entry_points.txt,sha256=T1t85jwx8fK6m5msdkBGIXH5R5Kd0zSL0S6erXERPzg,237
37
- speedy_utils-1.1.5.dist-info/RECORD,,
37
+ speedy_utils-1.1.6.dist-info/METADATA,sha256=vfJU1DO0R5VkRqYrN1omYIgcHNDvvKJ4F87l77wjP2s,7441
38
+ speedy_utils-1.1.6.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
39
+ speedy_utils-1.1.6.dist-info/entry_points.txt,sha256=T1t85jwx8fK6m5msdkBGIXH5R5Kd0zSL0S6erXERPzg,237
40
+ speedy_utils-1.1.6.dist-info/RECORD,,