PyPI - speedy-utils - Versions diffs - 1.1.5__py3-none-any.whl → 1.1.7__py3-none-any.whl - Mend

speedy-utils 1.1.5py3-none-any.whl → 1.1.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

llm_utils/__init__.py +1 -5
llm_utils/chat_format/display.py +17 -4
llm_utils/chat_format/transform.py +9 -9
llm_utils/group_messages.py +1 -1
llm_utils/lm/async_lm/__init__.py +7 -0
llm_utils/lm/async_lm/_utils.py +201 -0
llm_utils/lm/async_lm/async_llm_task.py +509 -0
llm_utils/lm/async_lm/async_lm.py +387 -0
llm_utils/lm/async_lm/async_lm_base.py +405 -0
llm_utils/lm/async_lm/lm_specific.py +136 -0
llm_utils/lm/utils.py +1 -3
llm_utils/scripts/vllm_load_balancer.py +244 -147
speedy_utils/__init__.py +3 -1
speedy_utils/common/notebook_utils.py +4 -4
speedy_utils/common/report_manager.py +2 -3
speedy_utils/common/utils_cache.py +233 -3
speedy_utils/common/utils_io.py +2 -0
speedy_utils/scripts/mpython.py +1 -3
{speedy_utils-1.1.5.dist-info → speedy_utils-1.1.7.dist-info}/METADATA +1 -1
speedy_utils-1.1.7.dist-info/RECORD +39 -0
llm_utils/lm/async_lm.py +0 -942
llm_utils/lm/chat_html.py +0 -246
llm_utils/lm/lm_json.py +0 -68
llm_utils/lm/sync_lm.py +0 -943
speedy_utils-1.1.5.dist-info/RECORD +0 -37
{speedy_utils-1.1.5.dist-info → speedy_utils-1.1.7.dist-info}/WHEEL +0 -0
{speedy_utils-1.1.5.dist-info → speedy_utils-1.1.7.dist-info}/entry_points.txt +0 -0

llm_utils/scripts/vllm_load_balancer.py CHANGED Viewed

@@ -1,22 +1,21 @@
 import argparse
-import sys
-import os
-import time
-from datetime import datetime
-from collections import defaultdict
 import asyncio
 import contextlib
+import os
 import random
+import sys
+import time
+from collections import defaultdict
+from datetime import datetime
 import aiohttp
 from loguru import logger
-from tabulate import tabulate
 from speedy_utils import setup_logger
+from tabulate import tabulate
 setup_logger(min_interval=5)
 # --- CLI Argument Parsing ---
 def parse_args():
     parser = argparse.ArgumentParser(
@@ -27,14 +26,16 @@ Examples:
   python vllm_load_balancer.py 8001 --ports 8140,8150,8160
   python vllm_load_balancer.py 8080 --ports 8140,8150 --host 192.168.1.100
   python vllm_load_balancer.py 8001 --ports 8140,8150 --status-interval 3
+  python vllm_load_balancer.py 8001 --ports 8140,8150 --throttle-ms 100
 Features:
   • Real-time dashboard with color-coded status
   • Automatic health checks and failover
   • Least-connections load balancing
+  • Request throttling to prevent server overload
   • Professional terminal interface
   • Connection statistics and monitoring
-        """
+        """,
     )
     parser.add_argument(
         "port",
@@ -71,8 +72,15 @@ Features:
         default=None,
         help="Port for the HTTP stats dashboard (default: proxy port + 1)",
     )
+    parser.add_argument(
+        "--throttle-ms",
+        type=float,
+        default=30.0,
+        help="Minimum milliseconds between requests to same server (default: 30ms)",
+    )
     return parser.parse_args()
 # --- Configuration (populated from CLI) ---
 LOAD_BALANCER_HOST = "0.0.0.0"
 LOAD_BALANCER_PORT = 8008  # Will be overwritten by CLI
@@ -81,11 +89,13 @@ BACKEND_HOST = "localhost"  # Will be overwritten by CLI
 BACKEND_PORTS = []  # Will be overwritten by CLI
 STATUS_PRINT_INTERVAL = 5
 HEALTH_CHECK_TIMEOUT = 2
+THROTTLE_MS = 30.0  # Will be overwritten by CLI
 BUFFER_SIZE = 4096
 # --- Global Shared State ---
 available_servers = []
 connection_counts = defaultdict(int)
+last_send_times = defaultdict(float)  # Track last dispatch time per server
 state_lock = asyncio.Lock()
 start_time = None
 total_connections_served = 0
@@ -95,10 +105,10 @@ current_active_connections = 0
 # --- Terminal Utilities ---
 def clear_terminal():
     """Clear terminal screen with cross-platform support."""
-    if os.name == 'nt':  # Windows
-        os.system('cls')
+    if os.name == "nt":  # Windows
+        os.system("cls")
     else:  # Unix/Linux/MacOS
-        os.system('clear')
+        os.system("clear")
 def get_terminal_size():
@@ -114,12 +124,12 @@ def format_uptime(start_time):
     """Format uptime in a human-readable way."""
     if not start_time:
         return "Unknown"
     uptime_seconds = time.time() - start_time
     hours = int(uptime_seconds // 3600)
     minutes = int((uptime_seconds % 3600) // 60)
     seconds = int(uptime_seconds % 60)
     if hours > 0:
         return f"{hours}h {minutes}m {seconds}s"
     elif minutes > 0:
@@ -132,7 +142,7 @@ def print_banner():
     """Print a professional startup banner."""
     columns, _ = get_terminal_size()
     banner_width = min(columns - 4, 80)
     print("=" * banner_width)
     print(f"{'🚀 vLLM Load Balancer':^{banner_width}}")
     print(f"{'High-Performance Async TCP/HTTP Load Balancer':^{banner_width}}")
@@ -143,44 +153,45 @@ def print_banner():
     print(f"Backend Ports: {', '.join(map(str, BACKEND_PORTS))}")
     print(f"Health Check Interval: 10s (Timeout: {HEALTH_CHECK_TIMEOUT}s)")
     print(f"Status Update Interval: {STATUS_PRINT_INTERVAL}s")
+    print(f"Request Throttling: {THROTTLE_MS}ms minimum between requests")
     print("=" * banner_width)
     print()
 # --- ANSI Color Codes ---
 class Colors:
-    RESET = '\033[0m'
-    BOLD = '\033[1m'
-    DIM = '\033[2m'
+    RESET = "\033[0m"
+    BOLD = "\033[1m"
+    DIM = "\033[2m"
     # Foreground colors
-    BLACK = '\033[30m'
-    RED = '\033[31m'
-    GREEN = '\033[32m'
-    YELLOW = '\033[33m'
-    BLUE = '\033[34m'
-    MAGENTA = '\033[35m'
-    CYAN = '\033[36m'
-    WHITE = '\033[37m'
+    BLACK = "\033[30m"
+    RED = "\033[31m"
+    GREEN = "\033[32m"
+    YELLOW = "\033[33m"
+    BLUE = "\033[34m"
+    MAGENTA = "\033[35m"
+    CYAN = "\033[36m"
+    WHITE = "\033[37m"
     # Bright colors
-    BRIGHT_BLACK = '\033[90m'
-    BRIGHT_RED = '\033[91m'
-    BRIGHT_GREEN = '\033[92m'
-    BRIGHT_YELLOW = '\033[93m'
-    BRIGHT_BLUE = '\033[94m'
-    BRIGHT_MAGENTA = '\033[95m'
-    BRIGHT_CYAN = '\033[96m'
-    BRIGHT_WHITE = '\033[97m'
+    BRIGHT_BLACK = "\033[90m"
+    BRIGHT_RED = "\033[91m"
+    BRIGHT_GREEN = "\033[92m"
+    BRIGHT_YELLOW = "\033[93m"
+    BRIGHT_BLUE = "\033[94m"
+    BRIGHT_MAGENTA = "\033[95m"
+    BRIGHT_CYAN = "\033[96m"
+    BRIGHT_WHITE = "\033[97m"
     # Background colors
-    BG_RED = '\033[41m'
-    BG_GREEN = '\033[42m'
-    BG_YELLOW = '\033[43m'
-    BG_BLUE = '\033[44m'
+    BG_RED = "\033[41m"
+    BG_GREEN = "\033[42m"
+    BG_YELLOW = "\033[43m"
+    BG_BLUE = "\033[44m"
-# --- Helper Functions --- (relay_data and safe_close_writer remain the same)
+# --- Helper Functions ---
 async def relay_data(reader, writer, direction):
     """Reads data from reader and writes to writer until EOF or error."""
     try:
@@ -224,7 +235,6 @@ async def safe_close_writer(writer):
                 logger.debug(f"Error closing writer in context manager: {e}")
 # --- Health Check for Provided Ports ---
 async def check_server_health(session, host, port):
     """Performs an HTTP GET request to the /health endpoint."""
@@ -313,6 +323,11 @@ async def scan_and_update_servers():
                             logger.debug(
                                 f"Removed connection count entry for unavailable server {server}"
                             )
+                        if server in last_send_times:
+                            del last_send_times[server]
+                            logger.debug(
+                                f"Removed throttling timestamp for unavailable server {server}"
+                            )
                 available_servers = sorted(list(current_set))
                 for server in available_servers:
@@ -333,11 +348,10 @@ async def scan_and_update_servers():
         await asyncio.sleep(10)
-# --- Core Load Balancer Logic (handle_client remains the same) ---
+# --- Core Load Balancer Logic ---
 async def handle_client(client_reader, client_writer):
     """Handles a single client connection."""
     client_addr = client_writer.get_extra_info("peername")
-    logger.info(f"Accepted connection from {client_addr}")
     backend_server = None
     backend_reader = None
@@ -376,15 +390,10 @@ async def handle_client(client_reader, client_writer):
                 connection_counts[selected_server] += 1
                 backend_server = selected_server
                 server_selected = True
                 # Update global statistics
-                global total_connections_served, current_active_connections
                 total_connections_served += 1
                 current_active_connections += 1
-                logger.info(
-                    f"Routing {client_addr} to {backend_server} (Current connections: {connection_counts[backend_server]})"
-                )
             else:
                 logger.error(
                     f"Logic error: No server chosen despite available servers list not being empty for {client_addr}."
@@ -402,6 +411,42 @@ async def handle_client(client_reader, client_writer):
                 pass
             server_selected = False
             return
+        # --- Throttling Logic (Concurrency Safe) ---
+        # Atomically schedule the next request to avoid thundering herd on a single server.
+        sleep_duration_s = 0
+        now_ms = time.time() * 1000
+        async with state_lock:
+            # Get the time the last request was DISPATCHED to this server.
+            last_dispatch_ms = last_send_times.get(backend_server, 0)
+            # Calculate when the next request is allowed to be sent.
+            next_allowed_ms = last_dispatch_ms + THROTTLE_MS
+            # If the next allowed time is in the future, we must wait.
+            if next_allowed_ms > now_ms:
+                sleep_duration_s = (next_allowed_ms - now_ms) / 1000
+                # The next request after this one will be scheduled relative to when
+                # THIS request is actually sent (i.e., after its delay).
+                new_dispatch_time_ms = next_allowed_ms
+            else:
+                # We can send immediately.
+                sleep_duration_s = 0
+                new_dispatch_time_ms = now_ms
+            # CRITICAL: Update the dispatch time for the *next* caller immediately inside the lock.
+            last_send_times[backend_server] = new_dispatch_time_ms
+            if sleep_duration_s > 0:
+                logger.debug(
+                    f"Throttling request to {backend_server} for {sleep_duration_s:.3f}s to maintain >{THROTTLE_MS}ms interval."
+                )
+        # Sleep outside the lock to avoid blocking other tasks.
+        if sleep_duration_s > 0:
+            await asyncio.sleep(sleep_duration_s)
         try:
             logger.debug(
                 f"Attempting connection to backend {backend_server} for {client_addr}"
@@ -473,16 +518,14 @@ async def handle_client(client_reader, client_writer):
     except Exception as e:
         logger.error(f"Error handling client {client_addr}: {e}")
     finally:
-        logger.info(f"Closing connection for {client_addr}")
         # Decrement connection count only if we successfully selected/incremented
         if backend_server and server_selected:
             async with state_lock:
                 if backend_server in connection_counts:
                     if connection_counts[backend_server] > 0:
                         connection_counts[backend_server] -= 1
-                        current_active_connections = max(0, current_active_connections - 1)
-                        logger.info(
-                            f"Connection closed for {client_addr}. Backend {backend_server} connections: {connection_counts[backend_server]}"
+                        current_active_connections = max(
+                            0, current_active_connections - 1
                         )
                     else:
                         logger.warning(
@@ -491,7 +534,6 @@ async def handle_client(client_reader, client_writer):
                         connection_counts[backend_server] = 0
 # --- Status Reporting Task ---
 async def print_status_periodically():
     """Periodically displays a professional real-time status dashboard."""
@@ -514,35 +556,54 @@ async def display_status_dashboard():
     # Get terminal dimensions for responsive layout
     columns, rows = get_terminal_size()
     dash_width = min(columns - 4, 100)
     # Header with title and timestamp
     current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     print(f"{Colors.BOLD}{Colors.BRIGHT_CYAN}{'=' * dash_width}{Colors.RESET}")
-    print(f"{Colors.BOLD}{Colors.BRIGHT_CYAN}{'🚀 vLLM Load Balancer Dashboard':^{dash_width}}{Colors.RESET}")
-    print(f"{Colors.BRIGHT_CYAN}{'Real-time Status & Monitoring':^{dash_width}}{Colors.RESET}")
+    print(
+        f"{Colors.BOLD}{Colors.BRIGHT_CYAN}{'🚀 vLLM Load Balancer Dashboard':^{dash_width}}{Colors.RESET}"
+    )
+    print(
+        f"{Colors.BRIGHT_CYAN}{'Real-time Status & Monitoring':^{dash_width}}{Colors.RESET}"
+    )
     print(f"{Colors.BOLD}{Colors.BRIGHT_CYAN}{'=' * dash_width}{Colors.RESET}")
     print()
     # System Information Section
     uptime = format_uptime(start_time)
     print(f"{Colors.BOLD}{Colors.BRIGHT_WHITE}📊 System Information{Colors.RESET}")
     print(f"{Colors.BRIGHT_BLACK}{'─' * (dash_width // 2)}{Colors.RESET}")
     print(f"{Colors.YELLOW}🕐 Current Time:{Colors.RESET} {current_time}")
     print(f"{Colors.YELLOW}⏱️  Uptime:{Colors.RESET} {uptime}")
-    print(f"{Colors.YELLOW}🌐 Load Balancer:{Colors.RESET} {LOAD_BALANCER_HOST}:{LOAD_BALANCER_PORT}")
+    print(
+        f"{Colors.YELLOW}🌐 Load Balancer:{Colors.RESET} {LOAD_BALANCER_HOST}:{LOAD_BALANCER_PORT}"
+    )
     print(f"{Colors.YELLOW}🎯 Backend Host:{Colors.RESET} {BACKEND_HOST}")
-    print(f"{Colors.YELLOW}🔧 Configured Ports:{Colors.RESET} {', '.join(map(str, BACKEND_PORTS))}")
+    print(
+        f"{Colors.YELLOW}🔧 Configured Ports:{Colors.RESET} {', '.join(map(str, BACKEND_PORTS))}"
+    )
+    print(
+        f"{Colors.YELLOW}⚡ Request Throttling:{Colors.RESET} {THROTTLE_MS}ms minimum"
+    )
     print()
     # Connection Statistics Section
     print(f"{Colors.BOLD}{Colors.BRIGHT_WHITE}📈 Connection Statistics{Colors.RESET}")
     print(f"{Colors.BRIGHT_BLACK}{'─' * (dash_width // 2)}{Colors.RESET}")
-    print(f"{Colors.GREEN}📊 Total Connections Served:{Colors.RESET} {total_connections_served:,}")
-    print(f"{Colors.GREEN}🔗 Currently Active:{Colors.RESET} {current_active_connections}")
-    print(f"{Colors.GREEN}⚡ Health Check Timeout:{Colors.RESET} {HEALTH_CHECK_TIMEOUT}s")
-    print(f"{Colors.GREEN}🔄 Status Update Interval:{Colors.RESET} {STATUS_PRINT_INTERVAL}s")
+    print(
+        f"{Colors.GREEN}📊 Total Connections Served:{Colors.RESET} {total_connections_served:,}"
+    )
+    print(
+        f"{Colors.GREEN}🔗 Currently Active:{Colors.RESET} {current_active_connections}"
+    )
+    print(
+        f"{Colors.GREEN}⚡ Health Check Timeout:{Colors.RESET} {HEALTH_CHECK_TIMEOUT}s"
+    )
+    print(
+        f"{Colors.GREEN}🔄 Status Update Interval:{Colors.RESET} {STATUS_PRINT_INTERVAL}s"
+    )
     print()
     # Backend Servers Status
     print(f"{Colors.BOLD}{Colors.BRIGHT_WHITE}Backend Servers Status{Colors.RESET}")
     print(f"{Colors.BRIGHT_BLACK}{'─' * (dash_width // 2)}{Colors.RESET}")
@@ -552,7 +613,7 @@ async def display_status_dashboard():
         f"{Colors.BOLD}Host{Colors.RESET}",
         f"{Colors.BOLD}Port{Colors.RESET}",
         f"{Colors.BOLD}Active Conn.{Colors.RESET}",
-        f"{Colors.BOLD}Status{Colors.RESET}"
+        f"{Colors.BOLD}Status{Colors.RESET}",
     ]
     table_data = []
@@ -580,13 +641,15 @@ async def display_status_dashboard():
             else f"{Colors.BG_RED}{Colors.WHITE} OFFLINE {Colors.RESET}"
         )
-        table_data.append([
-            f"{Colors.CYAN}{BACKEND_HOST}:{port}{Colors.RESET}",
-            BACKEND_HOST,
-            str(port),
-            conn_display,
-            status_display
-        ])
+        table_data.append(
+            [
+                f"{Colors.CYAN}{BACKEND_HOST}:{port}{Colors.RESET}",
+                BACKEND_HOST,
+                str(port),
+                conn_display,
+                status_display,
+            ]
+        )
     try:
         table = tabulate(table_data, headers=headers, tablefmt="fancy_grid")
@@ -594,13 +657,23 @@ async def display_status_dashboard():
         print()
         # Summary metrics
-        online_count = sum(1 for port in BACKEND_PORTS if (BACKEND_HOST, port) in current_available)
-        avg_connections = total_backend_connections / online_count if online_count else 0
+        online_count = sum(
+            1 for port in BACKEND_PORTS if (BACKEND_HOST, port) in current_available
+        )
+        avg_connections = (
+            total_backend_connections / online_count if online_count else 0
+        )
         print(f"{Colors.BOLD}{Colors.BRIGHT_WHITE}📋 Summary{Colors.RESET}")
         print(f"{Colors.BRIGHT_BLACK}{'─' * (dash_width // 4)}{Colors.RESET}")
-        print(f"{Colors.MAGENTA}🟢 Available Servers:{Colors.RESET} {online_count} / {len(BACKEND_PORTS)}")
-        print(f"{Colors.MAGENTA}📊 Total Backend Connections:{Colors.RESET} {total_backend_connections}")
-        print(f"{Colors.MAGENTA}📈 Average Load per Online Server:{Colors.RESET} {avg_connections:.1f}")
+        print(
+            f"{Colors.MAGENTA}🟢 Available Servers:{Colors.RESET} {online_count} / {len(BACKEND_PORTS)}"
+        )
+        print(
+            f"{Colors.MAGENTA}📊 Total Backend Connections:{Colors.RESET} {total_backend_connections}"
+        )
+        print(
+            f"{Colors.MAGENTA}📈 Average Load per Online Server:{Colors.RESET} {avg_connections:.1f}"
+        )
     except Exception as e:
         logger.error(f"Error displaying status table: {e}")
@@ -609,15 +682,17 @@ async def display_status_dashboard():
     # Footer with refresh info
     print()
     print(f"{Colors.BRIGHT_BLACK}{'─' * dash_width}{Colors.RESET}")
-    print(f"{Colors.DIM}🔄 Auto-refresh every {STATUS_PRINT_INTERVAL}s | Press Ctrl+C to stop{Colors.RESET}")
+    print(
+        f"{Colors.DIM}🔄 Auto-refresh every {STATUS_PRINT_INTERVAL}s | Press Ctrl+C to stop{Colors.RESET}"
+    )
     print(f"{Colors.BRIGHT_BLACK}{'─' * dash_width}{Colors.RESET}")
     print()
 # --- HTTP Stats Server ---
 from aiohttp import web
 async def stats_json(request):
     async with state_lock:
         # Build a list of all configured servers, with status and connections
@@ -626,12 +701,16 @@ async def stats_json(request):
         for port in BACKEND_PORTS:
             server = (BACKEND_HOST, port)
             is_online = server in available_set
-            all_servers.append({
-                "host": BACKEND_HOST,
-                "port": port,
-                "active_connections": connection_counts.get(server, 0) if is_online else 0,
-                "status": "ONLINE" if is_online else "OFFLINE",
-            })
+            all_servers.append(
+                {
+                    "host": BACKEND_HOST,
+                    "port": port,
+                    "active_connections": connection_counts.get(server, 0)
+                    if is_online
+                    else 0,
+                    "status": "ONLINE" if is_online else "OFFLINE",
+                }
+            )
         stats = {
             "time": datetime.now().isoformat(),
             "uptime": format_uptime(start_time),
@@ -643,10 +722,12 @@ async def stats_json(request):
             "current_active_connections": current_active_connections,
             "health_check_timeout": HEALTH_CHECK_TIMEOUT,
             "status_update_interval": STATUS_PRINT_INTERVAL,
+            "throttle_ms": THROTTLE_MS,
             "servers": all_servers,
         }
     return web.json_response(stats)
 async def stats_page(request):
     # High-quality HTML dashboard with auto-refresh and charts
     return web.Response(
@@ -786,86 +867,101 @@ async def stats_page(request):
     </script>
 </body>
 </html>
-            """
+            """,
     )
 async def start_stats_server(loop):
-        app = web.Application()
-        app.router.add_get('/stats', stats_page)
-        app.router.add_get('/stats.json', stats_json)
-        runner = web.AppRunner(app)
-        await runner.setup()
-        site = web.TCPSite(runner, LOAD_BALANCER_HOST, STATS_PORT)
-        await site.start()
-        logger.info(f"Stats HTTP server running at http://{LOAD_BALANCER_HOST}:{STATS_PORT}/stats")
+    app = web.Application()
+    app.router.add_get("/stats", stats_page)
+    app.router.add_get("/stats.json", stats_json)
+    runner = web.AppRunner(app)
+    await runner.setup()
+    site = web.TCPSite(runner, LOAD_BALANCER_HOST, STATS_PORT)
+    await site.start()
+    logger.info(
+        f"Stats HTTP server running at http://{LOAD_BALANCER_HOST}:{STATS_PORT}/stats"
+    )
 async def main():
-        global start_time
-        start_time = time.time()
-        clear_terminal()
-        print_banner()
-        # Start background tasks
-        scan_task = asyncio.create_task(scan_and_update_servers())
-        status_task = asyncio.create_task(print_status_periodically())
-        # Start HTTP stats server (on STATS_PORT)
-        loop = asyncio.get_running_loop()
-        await start_stats_server(loop)
-        # Start TCP server (on LOAD_BALANCER_PORT)
-        server = await asyncio.start_server(
-            handle_client, LOAD_BALANCER_HOST, LOAD_BALANCER_PORT
-        )
+    global start_time
+    start_time = time.time()
+    clear_terminal()
+    print_banner()
-        addrs = ", ".join(str(sock.getsockname()) for sock in server.sockets)
-        logger.info(f"Load balancer serving on {addrs}")
-        logger.info(
-            f"Configured backend ports: {BACKEND_PORTS} on host {BACKEND_HOST}"
-        )
-        print(f"{Colors.BRIGHT_GREEN}✅ Load balancer started successfully!{Colors.RESET}")
-        print(f"{Colors.BRIGHT_GREEN}🌐 Proxy listening on: {addrs}{Colors.RESET}")
-        print(f"{Colors.BRIGHT_GREEN}📊 Stats dashboard: http://localhost:{STATS_PORT}/stats{Colors.RESET}")
-        print(f"{Colors.YELLOW}🔍 Scanning backend servers...{Colors.RESET}")
-        print()
-        await asyncio.sleep(2)
+    # Start background tasks
+    scan_task = asyncio.create_task(scan_and_update_servers())
+    status_task = asyncio.create_task(print_status_periodically())
+    # Start HTTP stats server (on STATS_PORT)
+    loop = asyncio.get_running_loop()
+    await start_stats_server(loop)
+    # Start TCP server (on LOAD_BALANCER_PORT)
+    server = await asyncio.start_server(
+        handle_client, LOAD_BALANCER_HOST, LOAD_BALANCER_PORT
+    )
-        async with server:
+    addrs = ", ".join(str(sock.getsockname()) for sock in server.sockets)
+    logger.info(f"Load balancer serving on {addrs}")
+    logger.info(f"Configured backend ports: {BACKEND_PORTS} on host {BACKEND_HOST}")
+    print(f"{Colors.BRIGHT_GREEN}✅ Load balancer started successfully!{Colors.RESET}")
+    print(f"{Colors.BRIGHT_GREEN}🌐 Proxy listening on: {addrs}{Colors.RESET}")
+    print(
+        f"{Colors.BRIGHT_GREEN}📊 Stats dashboard: http://localhost:{STATS_PORT}/stats{Colors.RESET}"
+    )
+    print(f"{Colors.YELLOW}🔍 Scanning backend servers...{Colors.RESET}")
+    print()
+    await asyncio.sleep(2)
+    async with server:
+        try:
+            await server.serve_forever()
+        except asyncio.CancelledError:
+            print(f"\n{Colors.YELLOW}🛑 Shutdown signal received...{Colors.RESET}")
+            logger.info("Load balancer server shutting down.")
+        except KeyboardInterrupt:
+            print(f"\n{Colors.YELLOW}🛑 Shutdown requested by user...{Colors.RESET}")
+            logger.info("Shutdown requested by user.")
+        finally:
+            print(f"{Colors.CYAN}🔄 Cleaning up background tasks...{Colors.RESET}")
+            logger.info("Cancelling background tasks...")
+            scan_task.cancel()
+            status_task.cancel()
             try:
-                await server.serve_forever()
+                await asyncio.gather(scan_task, status_task, return_exceptions=True)
             except asyncio.CancelledError:
-                print(f"\n{Colors.YELLOW}🛑 Shutdown signal received...{Colors.RESET}")
-                logger.info("Load balancer server shutting down.")
-            except KeyboardInterrupt:
-                print(f"\n{Colors.YELLOW}🛑 Shutdown requested by user...{Colors.RESET}")
-                logger.info("Shutdown requested by user.")
-            finally:
-                print(f"{Colors.CYAN}🔄 Cleaning up background tasks...{Colors.RESET}")
-                logger.info("Cancelling background tasks...")
-                scan_task.cancel()
-                status_task.cancel()
-                try:
-                    await asyncio.gather(scan_task, status_task, return_exceptions=True)
-                except asyncio.CancelledError:
-                    pass
-                print(f"{Colors.BRIGHT_GREEN}✅ Shutdown complete. Goodbye!{Colors.RESET}")
-                logger.info("Background tasks finished.")
+                pass
+            print(f"{Colors.BRIGHT_GREEN}✅ Shutdown complete. Goodbye!{Colors.RESET}")
+            logger.info("Background tasks finished.")
 def run_load_balancer():
-    global LOAD_BALANCER_PORT, BACKEND_PORTS, BACKEND_HOST, STATUS_PRINT_INTERVAL, HEALTH_CHECK_TIMEOUT, STATS_PORT
+    global \
+        LOAD_BALANCER_PORT, \
+        BACKEND_PORTS, \
+        BACKEND_HOST, \
+        STATUS_PRINT_INTERVAL, \
+        HEALTH_CHECK_TIMEOUT, \
+        THROTTLE_MS, \
+        STATS_PORT
     args = parse_args()
     LOAD_BALANCER_PORT = args.port
     BACKEND_HOST = args.host
     BACKEND_PORTS = [int(p.strip()) for p in args.ports.split(",") if p.strip()]
     STATUS_PRINT_INTERVAL = args.status_interval
     HEALTH_CHECK_TIMEOUT = args.health_timeout
+    THROTTLE_MS = args.throttle_ms
     if args.stats_port is not None:
         STATS_PORT = args.stats_port
     else:
         STATS_PORT = LOAD_BALANCER_PORT + 1
     if not BACKEND_PORTS:
         print(f"{Colors.BG_RED}{Colors.WHITE} ❌ ERROR {Colors.RESET}")
-        print(f"{Colors.RED}No backend ports specified. Use --ports 8140,8150 ...{Colors.RESET}")
+        print(
+            f"{Colors.RED}No backend ports specified. Use --ports 8140,8150 ...{Colors.RESET}"
+        )
         logger.critical("No backend ports specified. Use --ports 8140,8150 ...")
         sys.exit(1)
     try:
@@ -878,5 +974,6 @@ def run_load_balancer():
         print(f"{Colors.RED}Critical error in main execution: {e}{Colors.RESET}")
         logger.critical(f"Critical error in main execution: {e}")
 if __name__ == "__main__":
-    run_load_balancer()
+    run_load_balancer()

speedy_utils/__init__.py CHANGED Viewed

@@ -108,7 +108,7 @@ from .common.notebook_utils import (
 )
 # Cache utilities
-from .common.utils_cache import identify, identify_uuid, memoize
+from .common.utils_cache import amemoize, identify, identify_uuid, memoize
 # IO utilities
 from .common.utils_io import (
@@ -197,6 +197,7 @@ __all__ = [
     # Function decorators
     "retry_runtime",
     # Cache utilities
+    "amemoize",
     "memoize",
     "identify",
     "identify_uuid",
@@ -226,4 +227,5 @@ __all__ = [
     "multi_thread",
     # Notebook utilities
     "change_dir",
+    "amemoize",
 ]

speedy-utils 1.1.5__py3-none-any.whl → 1.1.7__py3-none-any.whl

speedy-utils 1.1.5py3-none-any.whl → 1.1.7py3-none-any.whl