speedy-utils 1.1.5__py3-none-any.whl → 1.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,22 +1,21 @@
1
1
  import argparse
2
- import sys
3
- import os
4
- import time
5
- from datetime import datetime
6
- from collections import defaultdict
7
-
8
2
  import asyncio
9
3
  import contextlib
4
+ import os
10
5
  import random
6
+ import sys
7
+ import time
8
+ from collections import defaultdict
9
+ from datetime import datetime
11
10
 
12
11
  import aiohttp
13
12
  from loguru import logger
14
- from tabulate import tabulate
15
-
16
13
  from speedy_utils import setup_logger
14
+ from tabulate import tabulate
17
15
 
18
16
  setup_logger(min_interval=5)
19
17
 
18
+
20
19
  # --- CLI Argument Parsing ---
21
20
  def parse_args():
22
21
  parser = argparse.ArgumentParser(
@@ -27,14 +26,16 @@ Examples:
27
26
  python vllm_load_balancer.py 8001 --ports 8140,8150,8160
28
27
  python vllm_load_balancer.py 8080 --ports 8140,8150 --host 192.168.1.100
29
28
  python vllm_load_balancer.py 8001 --ports 8140,8150 --status-interval 3
29
+ python vllm_load_balancer.py 8001 --ports 8140,8150 --throttle-ms 100
30
30
 
31
31
  Features:
32
32
  • Real-time dashboard with color-coded status
33
33
  • Automatic health checks and failover
34
34
  • Least-connections load balancing
35
+ • Request throttling to prevent server overload
35
36
  • Professional terminal interface
36
37
  • Connection statistics and monitoring
37
- """
38
+ """,
38
39
  )
39
40
  parser.add_argument(
40
41
  "port",
@@ -71,8 +72,15 @@ Features:
71
72
  default=None,
72
73
  help="Port for the HTTP stats dashboard (default: proxy port + 1)",
73
74
  )
75
+ parser.add_argument(
76
+ "--throttle-ms",
77
+ type=float,
78
+ default=30.0,
79
+ help="Minimum milliseconds between requests to same server (default: 30ms)",
80
+ )
74
81
  return parser.parse_args()
75
82
 
83
+
76
84
  # --- Configuration (populated from CLI) ---
77
85
  LOAD_BALANCER_HOST = "0.0.0.0"
78
86
  LOAD_BALANCER_PORT = 8008 # Will be overwritten by CLI
@@ -81,11 +89,13 @@ BACKEND_HOST = "localhost" # Will be overwritten by CLI
81
89
  BACKEND_PORTS = [] # Will be overwritten by CLI
82
90
  STATUS_PRINT_INTERVAL = 5
83
91
  HEALTH_CHECK_TIMEOUT = 2
92
+ THROTTLE_MS = 30.0 # Will be overwritten by CLI
84
93
  BUFFER_SIZE = 4096
85
94
 
86
95
  # --- Global Shared State ---
87
96
  available_servers = []
88
97
  connection_counts = defaultdict(int)
98
+ last_send_times = defaultdict(float) # Track last dispatch time per server
89
99
  state_lock = asyncio.Lock()
90
100
  start_time = None
91
101
  total_connections_served = 0
@@ -95,10 +105,10 @@ current_active_connections = 0
95
105
  # --- Terminal Utilities ---
96
106
  def clear_terminal():
97
107
  """Clear terminal screen with cross-platform support."""
98
- if os.name == 'nt': # Windows
99
- os.system('cls')
108
+ if os.name == "nt": # Windows
109
+ os.system("cls")
100
110
  else: # Unix/Linux/MacOS
101
- os.system('clear')
111
+ os.system("clear")
102
112
 
103
113
 
104
114
  def get_terminal_size():
@@ -114,12 +124,12 @@ def format_uptime(start_time):
114
124
  """Format uptime in a human-readable way."""
115
125
  if not start_time:
116
126
  return "Unknown"
117
-
127
+
118
128
  uptime_seconds = time.time() - start_time
119
129
  hours = int(uptime_seconds // 3600)
120
130
  minutes = int((uptime_seconds % 3600) // 60)
121
131
  seconds = int(uptime_seconds % 60)
122
-
132
+
123
133
  if hours > 0:
124
134
  return f"{hours}h {minutes}m {seconds}s"
125
135
  elif minutes > 0:
@@ -132,7 +142,7 @@ def print_banner():
132
142
  """Print a professional startup banner."""
133
143
  columns, _ = get_terminal_size()
134
144
  banner_width = min(columns - 4, 80)
135
-
145
+
136
146
  print("=" * banner_width)
137
147
  print(f"{'🚀 vLLM Load Balancer':^{banner_width}}")
138
148
  print(f"{'High-Performance Async TCP/HTTP Load Balancer':^{banner_width}}")
@@ -143,44 +153,45 @@ def print_banner():
143
153
  print(f"Backend Ports: {', '.join(map(str, BACKEND_PORTS))}")
144
154
  print(f"Health Check Interval: 10s (Timeout: {HEALTH_CHECK_TIMEOUT}s)")
145
155
  print(f"Status Update Interval: {STATUS_PRINT_INTERVAL}s")
156
+ print(f"Request Throttling: {THROTTLE_MS}ms minimum between requests")
146
157
  print("=" * banner_width)
147
158
  print()
148
159
 
149
160
 
150
161
  # --- ANSI Color Codes ---
151
162
  class Colors:
152
- RESET = '\033[0m'
153
- BOLD = '\033[1m'
154
- DIM = '\033[2m'
155
-
163
+ RESET = "\033[0m"
164
+ BOLD = "\033[1m"
165
+ DIM = "\033[2m"
166
+
156
167
  # Foreground colors
157
- BLACK = '\033[30m'
158
- RED = '\033[31m'
159
- GREEN = '\033[32m'
160
- YELLOW = '\033[33m'
161
- BLUE = '\033[34m'
162
- MAGENTA = '\033[35m'
163
- CYAN = '\033[36m'
164
- WHITE = '\033[37m'
165
-
168
+ BLACK = "\033[30m"
169
+ RED = "\033[31m"
170
+ GREEN = "\033[32m"
171
+ YELLOW = "\033[33m"
172
+ BLUE = "\033[34m"
173
+ MAGENTA = "\033[35m"
174
+ CYAN = "\033[36m"
175
+ WHITE = "\033[37m"
176
+
166
177
  # Bright colors
167
- BRIGHT_BLACK = '\033[90m'
168
- BRIGHT_RED = '\033[91m'
169
- BRIGHT_GREEN = '\033[92m'
170
- BRIGHT_YELLOW = '\033[93m'
171
- BRIGHT_BLUE = '\033[94m'
172
- BRIGHT_MAGENTA = '\033[95m'
173
- BRIGHT_CYAN = '\033[96m'
174
- BRIGHT_WHITE = '\033[97m'
175
-
178
+ BRIGHT_BLACK = "\033[90m"
179
+ BRIGHT_RED = "\033[91m"
180
+ BRIGHT_GREEN = "\033[92m"
181
+ BRIGHT_YELLOW = "\033[93m"
182
+ BRIGHT_BLUE = "\033[94m"
183
+ BRIGHT_MAGENTA = "\033[95m"
184
+ BRIGHT_CYAN = "\033[96m"
185
+ BRIGHT_WHITE = "\033[97m"
186
+
176
187
  # Background colors
177
- BG_RED = '\033[41m'
178
- BG_GREEN = '\033[42m'
179
- BG_YELLOW = '\033[43m'
180
- BG_BLUE = '\033[44m'
188
+ BG_RED = "\033[41m"
189
+ BG_GREEN = "\033[42m"
190
+ BG_YELLOW = "\033[43m"
191
+ BG_BLUE = "\033[44m"
181
192
 
182
193
 
183
- # --- Helper Functions --- (relay_data and safe_close_writer remain the same)
194
+ # --- Helper Functions ---
184
195
  async def relay_data(reader, writer, direction):
185
196
  """Reads data from reader and writes to writer until EOF or error."""
186
197
  try:
@@ -224,7 +235,6 @@ async def safe_close_writer(writer):
224
235
  logger.debug(f"Error closing writer in context manager: {e}")
225
236
 
226
237
 
227
-
228
238
  # --- Health Check for Provided Ports ---
229
239
  async def check_server_health(session, host, port):
230
240
  """Performs an HTTP GET request to the /health endpoint."""
@@ -313,6 +323,11 @@ async def scan_and_update_servers():
313
323
  logger.debug(
314
324
  f"Removed connection count entry for unavailable server {server}"
315
325
  )
326
+ if server in last_send_times:
327
+ del last_send_times[server]
328
+ logger.debug(
329
+ f"Removed throttling timestamp for unavailable server {server}"
330
+ )
316
331
 
317
332
  available_servers = sorted(list(current_set))
318
333
  for server in available_servers:
@@ -333,11 +348,10 @@ async def scan_and_update_servers():
333
348
  await asyncio.sleep(10)
334
349
 
335
350
 
336
- # --- Core Load Balancer Logic (handle_client remains the same) ---
351
+ # --- Core Load Balancer Logic ---
337
352
  async def handle_client(client_reader, client_writer):
338
353
  """Handles a single client connection."""
339
354
  client_addr = client_writer.get_extra_info("peername")
340
- logger.info(f"Accepted connection from {client_addr}")
341
355
 
342
356
  backend_server = None
343
357
  backend_reader = None
@@ -376,15 +390,10 @@ async def handle_client(client_reader, client_writer):
376
390
  connection_counts[selected_server] += 1
377
391
  backend_server = selected_server
378
392
  server_selected = True
379
-
393
+
380
394
  # Update global statistics
381
- global total_connections_served, current_active_connections
382
395
  total_connections_served += 1
383
396
  current_active_connections += 1
384
-
385
- logger.info(
386
- f"Routing {client_addr} to {backend_server} (Current connections: {connection_counts[backend_server]})"
387
- )
388
397
  else:
389
398
  logger.error(
390
399
  f"Logic error: No server chosen despite available servers list not being empty for {client_addr}."
@@ -402,6 +411,42 @@ async def handle_client(client_reader, client_writer):
402
411
  pass
403
412
  server_selected = False
404
413
  return
414
+
415
+ # --- Throttling Logic (Concurrency Safe) ---
416
+ # Atomically schedule the next request to avoid thundering herd on a single server.
417
+ sleep_duration_s = 0
418
+ now_ms = time.time() * 1000
419
+
420
+ async with state_lock:
421
+ # Get the time the last request was DISPATCHED to this server.
422
+ last_dispatch_ms = last_send_times.get(backend_server, 0)
423
+
424
+ # Calculate when the next request is allowed to be sent.
425
+ next_allowed_ms = last_dispatch_ms + THROTTLE_MS
426
+
427
+ # If the next allowed time is in the future, we must wait.
428
+ if next_allowed_ms > now_ms:
429
+ sleep_duration_s = (next_allowed_ms - now_ms) / 1000
430
+ # The next request after this one will be scheduled relative to when
431
+ # THIS request is actually sent (i.e., after its delay).
432
+ new_dispatch_time_ms = next_allowed_ms
433
+ else:
434
+ # We can send immediately.
435
+ sleep_duration_s = 0
436
+ new_dispatch_time_ms = now_ms
437
+
438
+ # CRITICAL: Update the dispatch time for the *next* caller immediately inside the lock.
439
+ last_send_times[backend_server] = new_dispatch_time_ms
440
+
441
+ if sleep_duration_s > 0:
442
+ logger.debug(
443
+ f"Throttling request to {backend_server} for {sleep_duration_s:.3f}s to maintain >{THROTTLE_MS}ms interval."
444
+ )
445
+
446
+ # Sleep outside the lock to avoid blocking other tasks.
447
+ if sleep_duration_s > 0:
448
+ await asyncio.sleep(sleep_duration_s)
449
+
405
450
  try:
406
451
  logger.debug(
407
452
  f"Attempting connection to backend {backend_server} for {client_addr}"
@@ -473,16 +518,14 @@ async def handle_client(client_reader, client_writer):
473
518
  except Exception as e:
474
519
  logger.error(f"Error handling client {client_addr}: {e}")
475
520
  finally:
476
- logger.info(f"Closing connection for {client_addr}")
477
521
  # Decrement connection count only if we successfully selected/incremented
478
522
  if backend_server and server_selected:
479
523
  async with state_lock:
480
524
  if backend_server in connection_counts:
481
525
  if connection_counts[backend_server] > 0:
482
526
  connection_counts[backend_server] -= 1
483
- current_active_connections = max(0, current_active_connections - 1)
484
- logger.info(
485
- f"Connection closed for {client_addr}. Backend {backend_server} connections: {connection_counts[backend_server]}"
527
+ current_active_connections = max(
528
+ 0, current_active_connections - 1
486
529
  )
487
530
  else:
488
531
  logger.warning(
@@ -491,7 +534,6 @@ async def handle_client(client_reader, client_writer):
491
534
  connection_counts[backend_server] = 0
492
535
 
493
536
 
494
-
495
537
  # --- Status Reporting Task ---
496
538
  async def print_status_periodically():
497
539
  """Periodically displays a professional real-time status dashboard."""
@@ -514,35 +556,54 @@ async def display_status_dashboard():
514
556
  # Get terminal dimensions for responsive layout
515
557
  columns, rows = get_terminal_size()
516
558
  dash_width = min(columns - 4, 100)
517
-
559
+
518
560
  # Header with title and timestamp
519
561
  current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
520
562
  print(f"{Colors.BOLD}{Colors.BRIGHT_CYAN}{'=' * dash_width}{Colors.RESET}")
521
- print(f"{Colors.BOLD}{Colors.BRIGHT_CYAN}{'🚀 vLLM Load Balancer Dashboard':^{dash_width}}{Colors.RESET}")
522
- print(f"{Colors.BRIGHT_CYAN}{'Real-time Status & Monitoring':^{dash_width}}{Colors.RESET}")
563
+ print(
564
+ f"{Colors.BOLD}{Colors.BRIGHT_CYAN}{'🚀 vLLM Load Balancer Dashboard':^{dash_width}}{Colors.RESET}"
565
+ )
566
+ print(
567
+ f"{Colors.BRIGHT_CYAN}{'Real-time Status & Monitoring':^{dash_width}}{Colors.RESET}"
568
+ )
523
569
  print(f"{Colors.BOLD}{Colors.BRIGHT_CYAN}{'=' * dash_width}{Colors.RESET}")
524
570
  print()
525
-
571
+
526
572
  # System Information Section
527
573
  uptime = format_uptime(start_time)
528
574
  print(f"{Colors.BOLD}{Colors.BRIGHT_WHITE}📊 System Information{Colors.RESET}")
529
575
  print(f"{Colors.BRIGHT_BLACK}{'─' * (dash_width // 2)}{Colors.RESET}")
530
576
  print(f"{Colors.YELLOW}🕐 Current Time:{Colors.RESET} {current_time}")
531
577
  print(f"{Colors.YELLOW}⏱️ Uptime:{Colors.RESET} {uptime}")
532
- print(f"{Colors.YELLOW}🌐 Load Balancer:{Colors.RESET} {LOAD_BALANCER_HOST}:{LOAD_BALANCER_PORT}")
578
+ print(
579
+ f"{Colors.YELLOW}🌐 Load Balancer:{Colors.RESET} {LOAD_BALANCER_HOST}:{LOAD_BALANCER_PORT}"
580
+ )
533
581
  print(f"{Colors.YELLOW}🎯 Backend Host:{Colors.RESET} {BACKEND_HOST}")
534
- print(f"{Colors.YELLOW}🔧 Configured Ports:{Colors.RESET} {', '.join(map(str, BACKEND_PORTS))}")
582
+ print(
583
+ f"{Colors.YELLOW}🔧 Configured Ports:{Colors.RESET} {', '.join(map(str, BACKEND_PORTS))}"
584
+ )
585
+ print(
586
+ f"{Colors.YELLOW}⚡ Request Throttling:{Colors.RESET} {THROTTLE_MS}ms minimum"
587
+ )
535
588
  print()
536
-
589
+
537
590
  # Connection Statistics Section
538
591
  print(f"{Colors.BOLD}{Colors.BRIGHT_WHITE}📈 Connection Statistics{Colors.RESET}")
539
592
  print(f"{Colors.BRIGHT_BLACK}{'─' * (dash_width // 2)}{Colors.RESET}")
540
- print(f"{Colors.GREEN}📊 Total Connections Served:{Colors.RESET} {total_connections_served:,}")
541
- print(f"{Colors.GREEN}🔗 Currently Active:{Colors.RESET} {current_active_connections}")
542
- print(f"{Colors.GREEN}⚡ Health Check Timeout:{Colors.RESET} {HEALTH_CHECK_TIMEOUT}s")
543
- print(f"{Colors.GREEN}🔄 Status Update Interval:{Colors.RESET} {STATUS_PRINT_INTERVAL}s")
593
+ print(
594
+ f"{Colors.GREEN}📊 Total Connections Served:{Colors.RESET} {total_connections_served:,}"
595
+ )
596
+ print(
597
+ f"{Colors.GREEN}🔗 Currently Active:{Colors.RESET} {current_active_connections}"
598
+ )
599
+ print(
600
+ f"{Colors.GREEN}⚡ Health Check Timeout:{Colors.RESET} {HEALTH_CHECK_TIMEOUT}s"
601
+ )
602
+ print(
603
+ f"{Colors.GREEN}🔄 Status Update Interval:{Colors.RESET} {STATUS_PRINT_INTERVAL}s"
604
+ )
544
605
  print()
545
-
606
+
546
607
  # Backend Servers Status
547
608
  print(f"{Colors.BOLD}{Colors.BRIGHT_WHITE}Backend Servers Status{Colors.RESET}")
548
609
  print(f"{Colors.BRIGHT_BLACK}{'─' * (dash_width // 2)}{Colors.RESET}")
@@ -552,7 +613,7 @@ async def display_status_dashboard():
552
613
  f"{Colors.BOLD}Host{Colors.RESET}",
553
614
  f"{Colors.BOLD}Port{Colors.RESET}",
554
615
  f"{Colors.BOLD}Active Conn.{Colors.RESET}",
555
- f"{Colors.BOLD}Status{Colors.RESET}"
616
+ f"{Colors.BOLD}Status{Colors.RESET}",
556
617
  ]
557
618
 
558
619
  table_data = []
@@ -580,13 +641,15 @@ async def display_status_dashboard():
580
641
  else f"{Colors.BG_RED}{Colors.WHITE} OFFLINE {Colors.RESET}"
581
642
  )
582
643
 
583
- table_data.append([
584
- f"{Colors.CYAN}{BACKEND_HOST}:{port}{Colors.RESET}",
585
- BACKEND_HOST,
586
- str(port),
587
- conn_display,
588
- status_display
589
- ])
644
+ table_data.append(
645
+ [
646
+ f"{Colors.CYAN}{BACKEND_HOST}:{port}{Colors.RESET}",
647
+ BACKEND_HOST,
648
+ str(port),
649
+ conn_display,
650
+ status_display,
651
+ ]
652
+ )
590
653
 
591
654
  try:
592
655
  table = tabulate(table_data, headers=headers, tablefmt="fancy_grid")
@@ -594,13 +657,23 @@ async def display_status_dashboard():
594
657
  print()
595
658
 
596
659
  # Summary metrics
597
- online_count = sum(1 for port in BACKEND_PORTS if (BACKEND_HOST, port) in current_available)
598
- avg_connections = total_backend_connections / online_count if online_count else 0
660
+ online_count = sum(
661
+ 1 for port in BACKEND_PORTS if (BACKEND_HOST, port) in current_available
662
+ )
663
+ avg_connections = (
664
+ total_backend_connections / online_count if online_count else 0
665
+ )
599
666
  print(f"{Colors.BOLD}{Colors.BRIGHT_WHITE}📋 Summary{Colors.RESET}")
600
667
  print(f"{Colors.BRIGHT_BLACK}{'─' * (dash_width // 4)}{Colors.RESET}")
601
- print(f"{Colors.MAGENTA}🟢 Available Servers:{Colors.RESET} {online_count} / {len(BACKEND_PORTS)}")
602
- print(f"{Colors.MAGENTA}📊 Total Backend Connections:{Colors.RESET} {total_backend_connections}")
603
- print(f"{Colors.MAGENTA}📈 Average Load per Online Server:{Colors.RESET} {avg_connections:.1f}")
668
+ print(
669
+ f"{Colors.MAGENTA}🟢 Available Servers:{Colors.RESET} {online_count} / {len(BACKEND_PORTS)}"
670
+ )
671
+ print(
672
+ f"{Colors.MAGENTA}📊 Total Backend Connections:{Colors.RESET} {total_backend_connections}"
673
+ )
674
+ print(
675
+ f"{Colors.MAGENTA}📈 Average Load per Online Server:{Colors.RESET} {avg_connections:.1f}"
676
+ )
604
677
 
605
678
  except Exception as e:
606
679
  logger.error(f"Error displaying status table: {e}")
@@ -609,15 +682,17 @@ async def display_status_dashboard():
609
682
  # Footer with refresh info
610
683
  print()
611
684
  print(f"{Colors.BRIGHT_BLACK}{'─' * dash_width}{Colors.RESET}")
612
- print(f"{Colors.DIM}🔄 Auto-refresh every {STATUS_PRINT_INTERVAL}s | Press Ctrl+C to stop{Colors.RESET}")
685
+ print(
686
+ f"{Colors.DIM}🔄 Auto-refresh every {STATUS_PRINT_INTERVAL}s | Press Ctrl+C to stop{Colors.RESET}"
687
+ )
613
688
  print(f"{Colors.BRIGHT_BLACK}{'─' * dash_width}{Colors.RESET}")
614
689
  print()
615
690
 
616
691
 
617
-
618
692
  # --- HTTP Stats Server ---
619
693
  from aiohttp import web
620
694
 
695
+
621
696
  async def stats_json(request):
622
697
  async with state_lock:
623
698
  # Build a list of all configured servers, with status and connections
@@ -626,12 +701,16 @@ async def stats_json(request):
626
701
  for port in BACKEND_PORTS:
627
702
  server = (BACKEND_HOST, port)
628
703
  is_online = server in available_set
629
- all_servers.append({
630
- "host": BACKEND_HOST,
631
- "port": port,
632
- "active_connections": connection_counts.get(server, 0) if is_online else 0,
633
- "status": "ONLINE" if is_online else "OFFLINE",
634
- })
704
+ all_servers.append(
705
+ {
706
+ "host": BACKEND_HOST,
707
+ "port": port,
708
+ "active_connections": connection_counts.get(server, 0)
709
+ if is_online
710
+ else 0,
711
+ "status": "ONLINE" if is_online else "OFFLINE",
712
+ }
713
+ )
635
714
  stats = {
636
715
  "time": datetime.now().isoformat(),
637
716
  "uptime": format_uptime(start_time),
@@ -643,10 +722,12 @@ async def stats_json(request):
643
722
  "current_active_connections": current_active_connections,
644
723
  "health_check_timeout": HEALTH_CHECK_TIMEOUT,
645
724
  "status_update_interval": STATUS_PRINT_INTERVAL,
725
+ "throttle_ms": THROTTLE_MS,
646
726
  "servers": all_servers,
647
727
  }
648
728
  return web.json_response(stats)
649
729
 
730
+
650
731
  async def stats_page(request):
651
732
  # High-quality HTML dashboard with auto-refresh and charts
652
733
  return web.Response(
@@ -786,86 +867,101 @@ async def stats_page(request):
786
867
  </script>
787
868
  </body>
788
869
  </html>
789
- """
870
+ """,
790
871
  )
791
872
 
873
+
792
874
  async def start_stats_server(loop):
793
- app = web.Application()
794
- app.router.add_get('/stats', stats_page)
795
- app.router.add_get('/stats.json', stats_json)
796
- runner = web.AppRunner(app)
797
- await runner.setup()
798
- site = web.TCPSite(runner, LOAD_BALANCER_HOST, STATS_PORT)
799
- await site.start()
800
- logger.info(f"Stats HTTP server running at http://{LOAD_BALANCER_HOST}:{STATS_PORT}/stats")
875
+ app = web.Application()
876
+ app.router.add_get("/stats", stats_page)
877
+ app.router.add_get("/stats.json", stats_json)
878
+ runner = web.AppRunner(app)
879
+ await runner.setup()
880
+ site = web.TCPSite(runner, LOAD_BALANCER_HOST, STATS_PORT)
881
+ await site.start()
882
+ logger.info(
883
+ f"Stats HTTP server running at http://{LOAD_BALANCER_HOST}:{STATS_PORT}/stats"
884
+ )
885
+
801
886
 
802
887
  async def main():
803
- global start_time
804
- start_time = time.time()
805
- clear_terminal()
806
- print_banner()
807
-
808
- # Start background tasks
809
- scan_task = asyncio.create_task(scan_and_update_servers())
810
- status_task = asyncio.create_task(print_status_periodically())
811
-
812
- # Start HTTP stats server (on STATS_PORT)
813
- loop = asyncio.get_running_loop()
814
- await start_stats_server(loop)
815
-
816
- # Start TCP server (on LOAD_BALANCER_PORT)
817
- server = await asyncio.start_server(
818
- handle_client, LOAD_BALANCER_HOST, LOAD_BALANCER_PORT
819
- )
888
+ global start_time
889
+ start_time = time.time()
890
+ clear_terminal()
891
+ print_banner()
820
892
 
821
- addrs = ", ".join(str(sock.getsockname()) for sock in server.sockets)
822
- logger.info(f"Load balancer serving on {addrs}")
823
- logger.info(
824
- f"Configured backend ports: {BACKEND_PORTS} on host {BACKEND_HOST}"
825
- )
826
- print(f"{Colors.BRIGHT_GREEN}✅ Load balancer started successfully!{Colors.RESET}")
827
- print(f"{Colors.BRIGHT_GREEN}🌐 Proxy listening on: {addrs}{Colors.RESET}")
828
- print(f"{Colors.BRIGHT_GREEN}📊 Stats dashboard: http://localhost:{STATS_PORT}/stats{Colors.RESET}")
829
- print(f"{Colors.YELLOW}🔍 Scanning backend servers...{Colors.RESET}")
830
- print()
831
- await asyncio.sleep(2)
893
+ # Start background tasks
894
+ scan_task = asyncio.create_task(scan_and_update_servers())
895
+ status_task = asyncio.create_task(print_status_periodically())
896
+
897
+ # Start HTTP stats server (on STATS_PORT)
898
+ loop = asyncio.get_running_loop()
899
+ await start_stats_server(loop)
900
+
901
+ # Start TCP server (on LOAD_BALANCER_PORT)
902
+ server = await asyncio.start_server(
903
+ handle_client, LOAD_BALANCER_HOST, LOAD_BALANCER_PORT
904
+ )
832
905
 
833
- async with server:
906
+ addrs = ", ".join(str(sock.getsockname()) for sock in server.sockets)
907
+ logger.info(f"Load balancer serving on {addrs}")
908
+ logger.info(f"Configured backend ports: {BACKEND_PORTS} on host {BACKEND_HOST}")
909
+ print(f"{Colors.BRIGHT_GREEN}✅ Load balancer started successfully!{Colors.RESET}")
910
+ print(f"{Colors.BRIGHT_GREEN}🌐 Proxy listening on: {addrs}{Colors.RESET}")
911
+ print(
912
+ f"{Colors.BRIGHT_GREEN}📊 Stats dashboard: http://localhost:{STATS_PORT}/stats{Colors.RESET}"
913
+ )
914
+ print(f"{Colors.YELLOW}🔍 Scanning backend servers...{Colors.RESET}")
915
+ print()
916
+ await asyncio.sleep(2)
917
+
918
+ async with server:
919
+ try:
920
+ await server.serve_forever()
921
+ except asyncio.CancelledError:
922
+ print(f"\n{Colors.YELLOW}🛑 Shutdown signal received...{Colors.RESET}")
923
+ logger.info("Load balancer server shutting down.")
924
+ except KeyboardInterrupt:
925
+ print(f"\n{Colors.YELLOW}🛑 Shutdown requested by user...{Colors.RESET}")
926
+ logger.info("Shutdown requested by user.")
927
+ finally:
928
+ print(f"{Colors.CYAN}🔄 Cleaning up background tasks...{Colors.RESET}")
929
+ logger.info("Cancelling background tasks...")
930
+ scan_task.cancel()
931
+ status_task.cancel()
834
932
  try:
835
- await server.serve_forever()
933
+ await asyncio.gather(scan_task, status_task, return_exceptions=True)
836
934
  except asyncio.CancelledError:
837
- print(f"\n{Colors.YELLOW}🛑 Shutdown signal received...{Colors.RESET}")
838
- logger.info("Load balancer server shutting down.")
839
- except KeyboardInterrupt:
840
- print(f"\n{Colors.YELLOW}🛑 Shutdown requested by user...{Colors.RESET}")
841
- logger.info("Shutdown requested by user.")
842
- finally:
843
- print(f"{Colors.CYAN}🔄 Cleaning up background tasks...{Colors.RESET}")
844
- logger.info("Cancelling background tasks...")
845
- scan_task.cancel()
846
- status_task.cancel()
847
- try:
848
- await asyncio.gather(scan_task, status_task, return_exceptions=True)
849
- except asyncio.CancelledError:
850
- pass
851
- print(f"{Colors.BRIGHT_GREEN}✅ Shutdown complete. Goodbye!{Colors.RESET}")
852
- logger.info("Background tasks finished.")
935
+ pass
936
+ print(f"{Colors.BRIGHT_GREEN}✅ Shutdown complete. Goodbye!{Colors.RESET}")
937
+ logger.info("Background tasks finished.")
938
+
853
939
 
854
940
  def run_load_balancer():
855
- global LOAD_BALANCER_PORT, BACKEND_PORTS, BACKEND_HOST, STATUS_PRINT_INTERVAL, HEALTH_CHECK_TIMEOUT, STATS_PORT
941
+ global \
942
+ LOAD_BALANCER_PORT, \
943
+ BACKEND_PORTS, \
944
+ BACKEND_HOST, \
945
+ STATUS_PRINT_INTERVAL, \
946
+ HEALTH_CHECK_TIMEOUT, \
947
+ THROTTLE_MS, \
948
+ STATS_PORT
856
949
  args = parse_args()
857
950
  LOAD_BALANCER_PORT = args.port
858
951
  BACKEND_HOST = args.host
859
952
  BACKEND_PORTS = [int(p.strip()) for p in args.ports.split(",") if p.strip()]
860
953
  STATUS_PRINT_INTERVAL = args.status_interval
861
954
  HEALTH_CHECK_TIMEOUT = args.health_timeout
955
+ THROTTLE_MS = args.throttle_ms
862
956
  if args.stats_port is not None:
863
957
  STATS_PORT = args.stats_port
864
958
  else:
865
959
  STATS_PORT = LOAD_BALANCER_PORT + 1
866
960
  if not BACKEND_PORTS:
867
961
  print(f"{Colors.BG_RED}{Colors.WHITE} ❌ ERROR {Colors.RESET}")
868
- print(f"{Colors.RED}No backend ports specified. Use --ports 8140,8150 ...{Colors.RESET}")
962
+ print(
963
+ f"{Colors.RED}No backend ports specified. Use --ports 8140,8150 ...{Colors.RESET}"
964
+ )
869
965
  logger.critical("No backend ports specified. Use --ports 8140,8150 ...")
870
966
  sys.exit(1)
871
967
  try:
@@ -878,5 +974,6 @@ def run_load_balancer():
878
974
  print(f"{Colors.RED}Critical error in main execution: {e}{Colors.RESET}")
879
975
  logger.critical(f"Critical error in main execution: {e}")
880
976
 
977
+
881
978
  if __name__ == "__main__":
882
- run_load_balancer()
979
+ run_load_balancer()
speedy_utils/__init__.py CHANGED
@@ -108,7 +108,7 @@ from .common.notebook_utils import (
108
108
  )
109
109
 
110
110
  # Cache utilities
111
- from .common.utils_cache import identify, identify_uuid, memoize
111
+ from .common.utils_cache import amemoize, identify, identify_uuid, memoize
112
112
 
113
113
  # IO utilities
114
114
  from .common.utils_io import (
@@ -197,6 +197,7 @@ __all__ = [
197
197
  # Function decorators
198
198
  "retry_runtime",
199
199
  # Cache utilities
200
+ "amemoize",
200
201
  "memoize",
201
202
  "identify",
202
203
  "identify_uuid",
@@ -226,4 +227,5 @@ __all__ = [
226
227
  "multi_thread",
227
228
  # Notebook utilities
228
229
  "change_dir",
230
+ "amemoize",
229
231
  ]