speedy-utils 1.0.3__py3-none-any.whl → 1.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_utils/__init__.py +29 -0
- llm_utils/chat_format.py +427 -0
- llm_utils/group_messages.py +120 -0
- llm_utils/lm/__init__.py +8 -0
- llm_utils/lm/base_lm.py +304 -0
- llm_utils/lm/utils.py +130 -0
- llm_utils/scripts/vllm_load_balancer.py +353 -0
- llm_utils/scripts/vllm_serve.py +416 -0
- speedy_utils/__init__.py +85 -0
- speedy_utils/all.py +159 -0
- {speedy → speedy_utils}/common/__init__.py +0 -0
- speedy_utils/common/clock.py +215 -0
- speedy_utils/common/function_decorator.py +66 -0
- speedy_utils/common/logger.py +207 -0
- speedy_utils/common/report_manager.py +112 -0
- speedy_utils/common/utils_cache.py +264 -0
- {speedy → speedy_utils}/common/utils_io.py +66 -19
- {speedy → speedy_utils}/common/utils_misc.py +25 -11
- speedy_utils/common/utils_print.py +216 -0
- speedy_utils/multi_worker/__init__.py +0 -0
- speedy_utils/multi_worker/process.py +198 -0
- speedy_utils/multi_worker/thread.py +327 -0
- speedy_utils/scripts/mpython.py +108 -0
- speedy_utils-1.0.5.dist-info/METADATA +279 -0
- speedy_utils-1.0.5.dist-info/RECORD +27 -0
- {speedy_utils-1.0.3.dist-info → speedy_utils-1.0.5.dist-info}/WHEEL +1 -2
- speedy_utils-1.0.5.dist-info/entry_points.txt +3 -0
- speedy/__init__.py +0 -53
- speedy/common/clock.py +0 -68
- speedy/common/utils_cache.py +0 -170
- speedy/common/utils_print.py +0 -138
- speedy/multi_worker.py +0 -121
- speedy_utils-1.0.3.dist-info/METADATA +0 -22
- speedy_utils-1.0.3.dist-info/RECORD +0 -12
- speedy_utils-1.0.3.dist-info/top_level.txt +0 -1
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import random
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
import time
|
|
5
|
+
from tabulate import tabulate
|
|
6
|
+
import logging
|
|
7
|
+
import contextlib
|
|
8
|
+
import aiohttp # <-- Import aiohttp
|
|
9
|
+
|
|
10
|
+
# --- Configuration ---
|
|
11
|
+
LOAD_BALANCER_HOST = '0.0.0.0'
|
|
12
|
+
LOAD_BALANCER_PORT = 8008
|
|
13
|
+
|
|
14
|
+
SCAN_TARGET_HOST = 'localhost'
|
|
15
|
+
SCAN_PORT_START = 8150
|
|
16
|
+
SCAN_PORT_END = 8170 # Inclusive
|
|
17
|
+
SCAN_INTERVAL = 30
|
|
18
|
+
# Timeout applies to the HTTP health check request now
|
|
19
|
+
HEALTH_CHECK_TIMEOUT = 2.0 # Increased slightly for HTTP requests
|
|
20
|
+
|
|
21
|
+
STATUS_PRINT_INTERVAL = 5
|
|
22
|
+
BUFFER_SIZE = 4096
|
|
23
|
+
|
|
24
|
+
# Setup basic logging
|
|
25
|
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
26
|
+
|
|
27
|
+
# --- Global Shared State ---
|
|
28
|
+
available_servers = []
|
|
29
|
+
connection_counts = defaultdict(int)
|
|
30
|
+
state_lock = asyncio.Lock()
|
|
31
|
+
|
|
32
|
+
# --- Helper Functions --- (relay_data and safe_close_writer remain the same)
|
|
33
|
+
async def relay_data(reader, writer, direction):
|
|
34
|
+
"""Reads data from reader and writes to writer until EOF or error."""
|
|
35
|
+
try:
|
|
36
|
+
while True:
|
|
37
|
+
data = await reader.read(BUFFER_SIZE)
|
|
38
|
+
if not data:
|
|
39
|
+
logging.debug(f"EOF received on {direction} stream.")
|
|
40
|
+
break
|
|
41
|
+
writer.write(data)
|
|
42
|
+
await writer.drain()
|
|
43
|
+
except ConnectionResetError:
|
|
44
|
+
logging.warning(f"Connection reset on {direction} stream.")
|
|
45
|
+
except asyncio.CancelledError:
|
|
46
|
+
logging.debug(f"Relay task cancelled for {direction}.")
|
|
47
|
+
raise
|
|
48
|
+
except Exception as e:
|
|
49
|
+
logging.warning(f"Error during data relay ({direction}): {e}")
|
|
50
|
+
finally:
|
|
51
|
+
if not writer.is_closing():
|
|
52
|
+
try:
|
|
53
|
+
writer.close()
|
|
54
|
+
await writer.wait_closed()
|
|
55
|
+
logging.debug(f"Closed writer for {direction}")
|
|
56
|
+
except Exception as close_err:
|
|
57
|
+
logging.debug(f"Error closing writer for {direction} (might be expected): {close_err}")
|
|
58
|
+
|
|
59
|
+
@contextlib.asynccontextmanager
|
|
60
|
+
async def safe_close_writer(writer):
|
|
61
|
+
"""Async context manager to safely close an asyncio StreamWriter."""
|
|
62
|
+
try:
|
|
63
|
+
yield writer
|
|
64
|
+
finally:
|
|
65
|
+
if writer and not writer.is_closing():
|
|
66
|
+
try:
|
|
67
|
+
writer.close()
|
|
68
|
+
await writer.wait_closed()
|
|
69
|
+
except Exception as e:
|
|
70
|
+
logging.debug(f"Error closing writer in context manager: {e}")
|
|
71
|
+
|
|
72
|
+
# --- Server Scanning and Health Check (Modified) ---
|
|
73
|
+
|
|
74
|
+
async def check_server_health(session, host, port):
|
|
75
|
+
"""Performs an HTTP GET request to the /health endpoint."""
|
|
76
|
+
url = f"http://{host}:{port}/health"
|
|
77
|
+
try:
|
|
78
|
+
# Use the provided aiohttp session to make the GET request
|
|
79
|
+
async with session.get(url, timeout=HEALTH_CHECK_TIMEOUT) as response:
|
|
80
|
+
# Check for a successful status code (2xx range)
|
|
81
|
+
if 200 <= response.status < 300:
|
|
82
|
+
logging.debug(f"Health check success for {url} (Status: {response.status})")
|
|
83
|
+
# Ensure the connection is released back to the pool
|
|
84
|
+
await response.release()
|
|
85
|
+
return True
|
|
86
|
+
else:
|
|
87
|
+
logging.debug(f"Health check failed for {url} (Status: {response.status})")
|
|
88
|
+
await response.release()
|
|
89
|
+
return False
|
|
90
|
+
except asyncio.TimeoutError:
|
|
91
|
+
logging.debug(f"Health check HTTP request timeout for {url}")
|
|
92
|
+
return False
|
|
93
|
+
except aiohttp.ClientConnectorError as e:
|
|
94
|
+
# Handles connection refused, DNS errors etc. - server likely down
|
|
95
|
+
logging.debug(f"Health check connection error for {url}: {e}")
|
|
96
|
+
return False
|
|
97
|
+
except aiohttp.ClientError as e:
|
|
98
|
+
# Catch other potential client errors (e.g., invalid URL structure, too many redirects)
|
|
99
|
+
logging.warning(f"Health check client error for {url}: {e}")
|
|
100
|
+
return False
|
|
101
|
+
except Exception as e:
|
|
102
|
+
# Catch any other unexpected errors during the check
|
|
103
|
+
logging.error(f"Unexpected health check error for {url}: {e}")
|
|
104
|
+
return False
|
|
105
|
+
|
|
106
|
+
async def scan_and_update_servers():
|
|
107
|
+
"""Periodically scans ports using HTTP /health check and updates available servers."""
|
|
108
|
+
global available_servers
|
|
109
|
+
logging.debug(f"Starting server scan task (HTTP GET /health on Ports {SCAN_PORT_START}-{SCAN_PORT_END} every {SCAN_INTERVAL}s)")
|
|
110
|
+
while True:
|
|
111
|
+
try:
|
|
112
|
+
current_scan_results = []
|
|
113
|
+
scan_tasks = []
|
|
114
|
+
ports_to_scan = range(SCAN_PORT_START, SCAN_PORT_END + 1)
|
|
115
|
+
|
|
116
|
+
# Create ONE aiohttp session for all checks within this scan cycle for efficiency
|
|
117
|
+
async with aiohttp.ClientSession() as session:
|
|
118
|
+
# Create health check tasks for all ports, passing the shared session
|
|
119
|
+
for port in ports_to_scan:
|
|
120
|
+
task = asyncio.create_task(check_server_health(session, SCAN_TARGET_HOST, port))
|
|
121
|
+
scan_tasks.append((task, port))
|
|
122
|
+
|
|
123
|
+
# Wait for all health checks to complete
|
|
124
|
+
# return_exceptions=True prevents gather from stopping if one check fails
|
|
125
|
+
await asyncio.gather(*(task for task, port in scan_tasks), return_exceptions=True)
|
|
126
|
+
|
|
127
|
+
# Collect results from completed tasks
|
|
128
|
+
for task, port in scan_tasks:
|
|
129
|
+
try:
|
|
130
|
+
# Check if task finished, wasn't cancelled, and returned True
|
|
131
|
+
if task.done() and not task.cancelled() and task.result() is True:
|
|
132
|
+
current_scan_results.append((SCAN_TARGET_HOST, port))
|
|
133
|
+
except Exception as e:
|
|
134
|
+
# Log errors from the health check task itself if gather didn't catch them
|
|
135
|
+
logging.error(f"Error retrieving health check result for port {port}: {e}")
|
|
136
|
+
|
|
137
|
+
# --- Update Shared State (Locked) ---
|
|
138
|
+
async with state_lock:
|
|
139
|
+
previous_servers = set(available_servers)
|
|
140
|
+
current_set = set(current_scan_results)
|
|
141
|
+
|
|
142
|
+
added = current_set - previous_servers
|
|
143
|
+
removed = previous_servers - current_set
|
|
144
|
+
|
|
145
|
+
if added:
|
|
146
|
+
logging.info(f"Servers added (passed /health check): {sorted(list(added))}")
|
|
147
|
+
if removed:
|
|
148
|
+
logging.info(f"Servers removed (failed /health check or stopped): {sorted(list(removed))}")
|
|
149
|
+
for server in removed:
|
|
150
|
+
if server in connection_counts:
|
|
151
|
+
del connection_counts[server]
|
|
152
|
+
logging.debug(f"Removed connection count entry for unavailable server {server}")
|
|
153
|
+
|
|
154
|
+
available_servers = sorted(list(current_set))
|
|
155
|
+
for server in available_servers:
|
|
156
|
+
if server not in connection_counts:
|
|
157
|
+
connection_counts[server] = 0
|
|
158
|
+
|
|
159
|
+
logging.debug(f"Scan complete. Active servers: {available_servers}")
|
|
160
|
+
|
|
161
|
+
except asyncio.CancelledError:
|
|
162
|
+
logging.info("Server scan task cancelled.")
|
|
163
|
+
break
|
|
164
|
+
except Exception as e:
|
|
165
|
+
logging.error(f"Error in scan_and_update_servers loop: {e}")
|
|
166
|
+
await asyncio.sleep(SCAN_INTERVAL / 2) # Avoid tight loop on error
|
|
167
|
+
|
|
168
|
+
await asyncio.sleep(SCAN_INTERVAL)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
# --- Core Load Balancer Logic (handle_client remains the same) ---
|
|
172
|
+
async def handle_client(client_reader, client_writer):
|
|
173
|
+
"""Handles a single client connection."""
|
|
174
|
+
client_addr = client_writer.get_extra_info('peername')
|
|
175
|
+
logging.info(f"Accepted connection from {client_addr}")
|
|
176
|
+
|
|
177
|
+
backend_server = None
|
|
178
|
+
backend_reader = None
|
|
179
|
+
backend_writer = None
|
|
180
|
+
server_selected = False
|
|
181
|
+
|
|
182
|
+
try:
|
|
183
|
+
# --- Select Backend Server (Least Connections from Available) ---
|
|
184
|
+
selected_server = None
|
|
185
|
+
async with state_lock: # Lock to safely access available_servers and connection_counts
|
|
186
|
+
if not available_servers:
|
|
187
|
+
logging.warning(f"No backend servers available (failed health checks?) for client {client_addr}. Closing connection.")
|
|
188
|
+
async with safe_close_writer(client_writer): pass
|
|
189
|
+
return
|
|
190
|
+
|
|
191
|
+
min_connections = float('inf')
|
|
192
|
+
least_used_available_servers = []
|
|
193
|
+
for server in available_servers: # Iterate only over servers that passed health check
|
|
194
|
+
count = connection_counts.get(server, 0)
|
|
195
|
+
if count < min_connections:
|
|
196
|
+
min_connections = count
|
|
197
|
+
least_used_available_servers = [server]
|
|
198
|
+
elif count == min_connections:
|
|
199
|
+
least_used_available_servers.append(server)
|
|
200
|
+
|
|
201
|
+
if least_used_available_servers:
|
|
202
|
+
selected_server = random.choice(least_used_available_servers)
|
|
203
|
+
connection_counts[selected_server] += 1
|
|
204
|
+
backend_server = selected_server
|
|
205
|
+
server_selected = True
|
|
206
|
+
logging.info(f"Routing {client_addr} to {backend_server} (Current connections: {connection_counts[backend_server]})")
|
|
207
|
+
else:
|
|
208
|
+
logging.error(f"Logic error: No server chosen despite available servers list not being empty for {client_addr}.")
|
|
209
|
+
async with safe_close_writer(client_writer): pass
|
|
210
|
+
return
|
|
211
|
+
|
|
212
|
+
# --- Connect to Backend Server ---
|
|
213
|
+
if not backend_server:
|
|
214
|
+
logging.error(f"No backend server selected for {client_addr} before connection attempt.")
|
|
215
|
+
async with safe_close_writer(client_writer): pass
|
|
216
|
+
server_selected = False
|
|
217
|
+
return
|
|
218
|
+
|
|
219
|
+
try:
|
|
220
|
+
logging.debug(f"Attempting connection to backend {backend_server} for {client_addr}")
|
|
221
|
+
backend_reader, backend_writer = await asyncio.open_connection(
|
|
222
|
+
backend_server[0], backend_server[1]
|
|
223
|
+
)
|
|
224
|
+
logging.debug(f"Successfully connected to backend {backend_server} for {client_addr}")
|
|
225
|
+
|
|
226
|
+
# Handle connection failure AFTER selection (server might go down between health check and selection)
|
|
227
|
+
except ConnectionRefusedError:
|
|
228
|
+
logging.error(f"Connection refused by selected backend server {backend_server} for {client_addr}")
|
|
229
|
+
async with state_lock: # Decrement count under lock
|
|
230
|
+
if backend_server in connection_counts and connection_counts[backend_server] > 0: connection_counts[backend_server] -= 1
|
|
231
|
+
server_selected = False # Mark failure
|
|
232
|
+
async with safe_close_writer(client_writer): pass
|
|
233
|
+
return
|
|
234
|
+
except Exception as e:
|
|
235
|
+
logging.error(f"Failed to connect to selected backend server {backend_server} for {client_addr}: {e}")
|
|
236
|
+
async with state_lock: # Decrement count under lock
|
|
237
|
+
if backend_server in connection_counts and connection_counts[backend_server] > 0: connection_counts[backend_server] -= 1
|
|
238
|
+
server_selected = False # Mark failure
|
|
239
|
+
async with safe_close_writer(client_writer): pass
|
|
240
|
+
return
|
|
241
|
+
|
|
242
|
+
# --- Relay Data Bidirectionally ---
|
|
243
|
+
async with safe_close_writer(backend_writer): # Ensure backend writer is closed
|
|
244
|
+
client_to_backend = asyncio.create_task(
|
|
245
|
+
relay_data(client_reader, backend_writer, f"{client_addr} -> {backend_server}")
|
|
246
|
+
)
|
|
247
|
+
backend_to_client = asyncio.create_task(
|
|
248
|
+
relay_data(backend_reader, client_writer, f"{backend_server} -> {client_addr}")
|
|
249
|
+
)
|
|
250
|
+
done, pending = await asyncio.wait(
|
|
251
|
+
[client_to_backend, backend_to_client], return_when=asyncio.FIRST_COMPLETED
|
|
252
|
+
)
|
|
253
|
+
for task in pending: task.cancel()
|
|
254
|
+
for task in done:
|
|
255
|
+
with contextlib.suppress(asyncio.CancelledError):
|
|
256
|
+
if task.exception(): logging.warning(f"Relay task finished with error: {task.exception()}")
|
|
257
|
+
|
|
258
|
+
except asyncio.CancelledError:
|
|
259
|
+
logging.info(f"Client handler for {client_addr} cancelled.")
|
|
260
|
+
except Exception as e:
|
|
261
|
+
logging.error(f"Error handling client {client_addr}: {e}")
|
|
262
|
+
finally:
|
|
263
|
+
logging.info(f"Closing connection for {client_addr}")
|
|
264
|
+
# Decrement connection count only if we successfully selected/incremented
|
|
265
|
+
if backend_server and server_selected:
|
|
266
|
+
async with state_lock:
|
|
267
|
+
if backend_server in connection_counts:
|
|
268
|
+
if connection_counts[backend_server] > 0:
|
|
269
|
+
connection_counts[backend_server] -= 1
|
|
270
|
+
logging.info(f"Connection closed for {client_addr}. Backend {backend_server} connections: {connection_counts[backend_server]}")
|
|
271
|
+
else:
|
|
272
|
+
logging.warning(f"Attempted to decrement count below zero for {backend_server} on close")
|
|
273
|
+
connection_counts[backend_server] = 0
|
|
274
|
+
|
|
275
|
+
# --- Status Reporting Task (print_status_periodically remains the same) ---
|
|
276
|
+
async def print_status_periodically():
|
|
277
|
+
"""Periodically prints the connection status based on available servers."""
|
|
278
|
+
while True:
|
|
279
|
+
await asyncio.sleep(STATUS_PRINT_INTERVAL)
|
|
280
|
+
async with state_lock:
|
|
281
|
+
headers = ["Backend Server", "Host", "Port", "Active Connections", "Status"]
|
|
282
|
+
table_data = []
|
|
283
|
+
total_connections = 0
|
|
284
|
+
current_available = available_servers[:]
|
|
285
|
+
current_counts = connection_counts.copy()
|
|
286
|
+
|
|
287
|
+
if not current_available:
|
|
288
|
+
# clear terminal and print status
|
|
289
|
+
print("\033[H\033[J", end="") # Clear terminal
|
|
290
|
+
print("\n----- Load Balancer Status -----")
|
|
291
|
+
print("No backend servers currently available (failed /health check).")
|
|
292
|
+
print("------------------------------\n")
|
|
293
|
+
continue
|
|
294
|
+
|
|
295
|
+
for server in current_available:
|
|
296
|
+
host, port = server
|
|
297
|
+
count = current_counts.get(server, 0)
|
|
298
|
+
table_data.append([f"{host}:{port}", host, port, count, "Available"])
|
|
299
|
+
total_connections += count
|
|
300
|
+
|
|
301
|
+
table_data.sort(key=lambda row: (row[1], row[2]))
|
|
302
|
+
|
|
303
|
+
try:
|
|
304
|
+
table = tabulate(table_data, headers=headers, tablefmt="grid")
|
|
305
|
+
print("\n----- Load Balancer Status -----")
|
|
306
|
+
print(f"Scanning Ports: {SCAN_PORT_START}-{SCAN_PORT_END} on {SCAN_TARGET_HOST} (using /health endpoint)")
|
|
307
|
+
print(f"Scan Interval: {SCAN_INTERVAL}s | Health Check Timeout: {HEALTH_CHECK_TIMEOUT}s")
|
|
308
|
+
print(table)
|
|
309
|
+
print(f"Total Active Connections (on available servers): {total_connections}")
|
|
310
|
+
print("------------------------------\n")
|
|
311
|
+
except Exception as e:
|
|
312
|
+
logging.error(f"Error printing status table: {e}")
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
# --- Main Execution (main remains the same) ---
|
|
316
|
+
async def main():
|
|
317
|
+
scan_task = asyncio.create_task(scan_and_update_servers())
|
|
318
|
+
status_task = asyncio.create_task(print_status_periodically())
|
|
319
|
+
|
|
320
|
+
server = await asyncio.start_server(
|
|
321
|
+
handle_client, LOAD_BALANCER_HOST, LOAD_BALANCER_PORT
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
addrs = ', '.join(str(sock.getsockname()) for sock in server.sockets)
|
|
325
|
+
logging.info(f'Load balancer serving on {addrs}')
|
|
326
|
+
logging.info(f'Dynamically discovering servers via HTTP /health on {SCAN_TARGET_HOST}:{SCAN_PORT_START}-{SCAN_PORT_END}')
|
|
327
|
+
|
|
328
|
+
async with server:
|
|
329
|
+
try:
|
|
330
|
+
await server.serve_forever()
|
|
331
|
+
except asyncio.CancelledError:
|
|
332
|
+
logging.info("Load balancer server shutting down.")
|
|
333
|
+
finally:
|
|
334
|
+
logging.info("Cancelling background tasks...")
|
|
335
|
+
scan_task.cancel()
|
|
336
|
+
status_task.cancel()
|
|
337
|
+
try:
|
|
338
|
+
await asyncio.gather(scan_task, status_task, return_exceptions=True)
|
|
339
|
+
except asyncio.CancelledError:
|
|
340
|
+
pass
|
|
341
|
+
logging.info("Background tasks finished.")
|
|
342
|
+
|
|
343
|
+
def run_load_balancer():
|
|
344
|
+
# Make sure to install aiohttp: pip install aiohttp
|
|
345
|
+
try:
|
|
346
|
+
asyncio.run(main())
|
|
347
|
+
except KeyboardInterrupt:
|
|
348
|
+
logging.info("Shutdown requested by user.")
|
|
349
|
+
except Exception as e:
|
|
350
|
+
logging.critical(f"Critical error in main execution: {e}")
|
|
351
|
+
|
|
352
|
+
if __name__ == "__main__":
|
|
353
|
+
run_load_balancer()
|