speedy-utils 1.1.6__py3-none-any.whl → 1.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_utils/__init__.py +1 -5
- llm_utils/chat_format/transform.py +9 -9
- llm_utils/group_messages.py +1 -1
- llm_utils/lm/async_lm/__init__.py +6 -1
- llm_utils/lm/async_lm/_utils.py +7 -4
- llm_utils/lm/async_lm/async_llm_task.py +472 -110
- llm_utils/lm/async_lm/async_lm.py +273 -665
- llm_utils/lm/async_lm/async_lm_base.py +407 -0
- llm_utils/lm/async_lm/lm_specific.py +136 -0
- llm_utils/lm/utils.py +1 -3
- llm_utils/scripts/vllm_load_balancer.py +49 -37
- speedy_utils/__init__.py +3 -1
- speedy_utils/common/notebook_utils.py +4 -4
- speedy_utils/common/report_manager.py +2 -3
- speedy_utils/common/utils_cache.py +233 -3
- speedy_utils/common/utils_io.py +2 -0
- speedy_utils/scripts/mpython.py +1 -3
- {speedy_utils-1.1.6.dist-info → speedy_utils-1.1.8.dist-info}/METADATA +1 -1
- speedy_utils-1.1.8.dist-info/RECORD +39 -0
- llm_utils/lm/chat_html.py +0 -246
- llm_utils/lm/lm_json.py +0 -68
- llm_utils/lm/sync_lm.py +0 -943
- speedy_utils-1.1.6.dist-info/RECORD +0 -40
- {speedy_utils-1.1.6.dist-info → speedy_utils-1.1.8.dist-info}/WHEEL +0 -0
- {speedy_utils-1.1.6.dist-info → speedy_utils-1.1.8.dist-info}/entry_points.txt +0 -0
|
@@ -1,19 +1,17 @@
|
|
|
1
1
|
import argparse
|
|
2
|
-
import sys
|
|
3
|
-
import os
|
|
4
|
-
import time
|
|
5
|
-
from datetime import datetime
|
|
6
|
-
from collections import defaultdict
|
|
7
|
-
|
|
8
2
|
import asyncio
|
|
9
3
|
import contextlib
|
|
4
|
+
import os
|
|
10
5
|
import random
|
|
6
|
+
import sys
|
|
7
|
+
import time
|
|
8
|
+
from collections import defaultdict
|
|
9
|
+
from datetime import datetime
|
|
11
10
|
|
|
12
11
|
import aiohttp
|
|
13
12
|
from loguru import logger
|
|
14
|
-
from tabulate import tabulate
|
|
15
|
-
|
|
16
13
|
from speedy_utils import setup_logger
|
|
14
|
+
from tabulate import tabulate
|
|
17
15
|
|
|
18
16
|
setup_logger(min_interval=5)
|
|
19
17
|
|
|
@@ -28,7 +26,7 @@ Examples:
|
|
|
28
26
|
python vllm_load_balancer.py 8001 --ports 8140,8150,8160
|
|
29
27
|
python vllm_load_balancer.py 8080 --ports 8140,8150 --host 192.168.1.100
|
|
30
28
|
python vllm_load_balancer.py 8001 --ports 8140,8150 --status-interval 3
|
|
31
|
-
python vllm_load_balancer.py 8001 --ports 8140,8150 --throttle-ms
|
|
29
|
+
python vllm_load_balancer.py 8001 --ports 8140,8150 --throttle-ms 100
|
|
32
30
|
|
|
33
31
|
Features:
|
|
34
32
|
• Real-time dashboard with color-coded status
|
|
@@ -78,7 +76,7 @@ Features:
|
|
|
78
76
|
"--throttle-ms",
|
|
79
77
|
type=float,
|
|
80
78
|
default=30.0,
|
|
81
|
-
help="Minimum milliseconds between requests to same server (default:
|
|
79
|
+
help="Minimum milliseconds between requests to same server (default: 30ms)",
|
|
82
80
|
)
|
|
83
81
|
return parser.parse_args()
|
|
84
82
|
|
|
@@ -91,13 +89,13 @@ BACKEND_HOST = "localhost" # Will be overwritten by CLI
|
|
|
91
89
|
BACKEND_PORTS = [] # Will be overwritten by CLI
|
|
92
90
|
STATUS_PRINT_INTERVAL = 5
|
|
93
91
|
HEALTH_CHECK_TIMEOUT = 2
|
|
94
|
-
THROTTLE_MS =
|
|
92
|
+
THROTTLE_MS = 30.0 # Will be overwritten by CLI
|
|
95
93
|
BUFFER_SIZE = 4096
|
|
96
94
|
|
|
97
95
|
# --- Global Shared State ---
|
|
98
96
|
available_servers = []
|
|
99
97
|
connection_counts = defaultdict(int)
|
|
100
|
-
last_send_times = defaultdict(float) # Track last
|
|
98
|
+
last_send_times = defaultdict(float) # Track last dispatch time per server
|
|
101
99
|
state_lock = asyncio.Lock()
|
|
102
100
|
start_time = None
|
|
103
101
|
total_connections_served = 0
|
|
@@ -193,7 +191,7 @@ class Colors:
|
|
|
193
191
|
BG_BLUE = "\033[44m"
|
|
194
192
|
|
|
195
193
|
|
|
196
|
-
# --- Helper Functions ---
|
|
194
|
+
# --- Helper Functions ---
|
|
197
195
|
async def relay_data(reader, writer, direction):
|
|
198
196
|
"""Reads data from reader and writes to writer until EOF or error."""
|
|
199
197
|
try:
|
|
@@ -350,7 +348,7 @@ async def scan_and_update_servers():
|
|
|
350
348
|
await asyncio.sleep(10)
|
|
351
349
|
|
|
352
350
|
|
|
353
|
-
# --- Core Load Balancer Logic
|
|
351
|
+
# --- Core Load Balancer Logic ---
|
|
354
352
|
async def handle_client(client_reader, client_writer):
|
|
355
353
|
"""Handles a single client connection."""
|
|
356
354
|
client_addr = client_writer.get_extra_info("peername")
|
|
@@ -394,7 +392,6 @@ async def handle_client(client_reader, client_writer):
|
|
|
394
392
|
server_selected = True
|
|
395
393
|
|
|
396
394
|
# Update global statistics
|
|
397
|
-
global total_connections_served, current_active_connections
|
|
398
395
|
total_connections_served += 1
|
|
399
396
|
current_active_connections += 1
|
|
400
397
|
else:
|
|
@@ -414,29 +411,42 @@ async def handle_client(client_reader, client_writer):
|
|
|
414
411
|
pass
|
|
415
412
|
server_selected = False
|
|
416
413
|
return
|
|
417
|
-
|
|
418
|
-
# --- Throttling Logic ---
|
|
419
|
-
#
|
|
420
|
-
|
|
421
|
-
|
|
414
|
+
|
|
415
|
+
# --- Throttling Logic (Concurrency Safe) ---
|
|
416
|
+
# Atomically schedule the next request to avoid thundering herd on a single server.
|
|
417
|
+
sleep_duration_s = 0
|
|
418
|
+
now_ms = time.time() * 1000
|
|
419
|
+
|
|
422
420
|
async with state_lock:
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
421
|
+
# Get the time the last request was DISPATCHED to this server.
|
|
422
|
+
last_dispatch_ms = last_send_times.get(backend_server, 0)
|
|
423
|
+
|
|
424
|
+
# Calculate when the next request is allowed to be sent.
|
|
425
|
+
next_allowed_ms = last_dispatch_ms + THROTTLE_MS
|
|
426
|
+
|
|
427
|
+
# If the next allowed time is in the future, we must wait.
|
|
428
|
+
if next_allowed_ms > now_ms:
|
|
429
|
+
sleep_duration_s = (next_allowed_ms - now_ms) / 1000
|
|
430
|
+
# The next request after this one will be scheduled relative to when
|
|
431
|
+
# THIS request is actually sent (i.e., after its delay).
|
|
432
|
+
new_dispatch_time_ms = next_allowed_ms
|
|
433
|
+
else:
|
|
434
|
+
# We can send immediately.
|
|
435
|
+
sleep_duration_s = 0
|
|
436
|
+
new_dispatch_time_ms = now_ms
|
|
437
|
+
|
|
438
|
+
# CRITICAL: Update the dispatch time for the *next* caller immediately inside the lock.
|
|
439
|
+
last_send_times[backend_server] = new_dispatch_time_ms
|
|
440
|
+
|
|
441
|
+
if sleep_duration_s > 0:
|
|
428
442
|
logger.debug(
|
|
429
|
-
f"Throttling request to {backend_server} for {
|
|
443
|
+
f"Throttling request to {backend_server} for {sleep_duration_s:.3f}s to maintain >{THROTTLE_MS}ms interval."
|
|
430
444
|
)
|
|
431
|
-
|
|
432
|
-
# Sleep outside the lock to avoid blocking other
|
|
433
|
-
if
|
|
434
|
-
await asyncio.sleep(
|
|
435
|
-
|
|
436
|
-
# Update last send time after throttling
|
|
437
|
-
async with state_lock:
|
|
438
|
-
last_send_times[backend_server] = time.time() * 1000
|
|
439
|
-
|
|
445
|
+
|
|
446
|
+
# Sleep outside the lock to avoid blocking other tasks.
|
|
447
|
+
if sleep_duration_s > 0:
|
|
448
|
+
await asyncio.sleep(sleep_duration_s)
|
|
449
|
+
|
|
440
450
|
try:
|
|
441
451
|
logger.debug(
|
|
442
452
|
f"Attempting connection to backend {backend_server} for {client_addr}"
|
|
@@ -572,7 +582,9 @@ async def display_status_dashboard():
|
|
|
572
582
|
print(
|
|
573
583
|
f"{Colors.YELLOW}🔧 Configured Ports:{Colors.RESET} {', '.join(map(str, BACKEND_PORTS))}"
|
|
574
584
|
)
|
|
575
|
-
print(
|
|
585
|
+
print(
|
|
586
|
+
f"{Colors.YELLOW}⚡ Request Throttling:{Colors.RESET} {THROTTLE_MS}ms minimum"
|
|
587
|
+
)
|
|
576
588
|
print()
|
|
577
589
|
|
|
578
590
|
# Connection Statistics Section
|
|
@@ -964,4 +976,4 @@ def run_load_balancer():
|
|
|
964
976
|
|
|
965
977
|
|
|
966
978
|
if __name__ == "__main__":
|
|
967
|
-
run_load_balancer()
|
|
979
|
+
run_load_balancer()
|
speedy_utils/__init__.py
CHANGED
|
@@ -108,7 +108,7 @@ from .common.notebook_utils import (
|
|
|
108
108
|
)
|
|
109
109
|
|
|
110
110
|
# Cache utilities
|
|
111
|
-
from .common.utils_cache import identify, identify_uuid, memoize
|
|
111
|
+
from .common.utils_cache import amemoize, identify, identify_uuid, memoize
|
|
112
112
|
|
|
113
113
|
# IO utilities
|
|
114
114
|
from .common.utils_io import (
|
|
@@ -197,6 +197,7 @@ __all__ = [
|
|
|
197
197
|
# Function decorators
|
|
198
198
|
"retry_runtime",
|
|
199
199
|
# Cache utilities
|
|
200
|
+
"amemoize",
|
|
200
201
|
"memoize",
|
|
201
202
|
"identify",
|
|
202
203
|
"identify_uuid",
|
|
@@ -226,4 +227,5 @@ __all__ = [
|
|
|
226
227
|
"multi_thread",
|
|
227
228
|
# Notebook utilities
|
|
228
229
|
"change_dir",
|
|
230
|
+
"amemoize",
|
|
229
231
|
]
|
|
@@ -8,12 +8,12 @@ from IPython.display import HTML, display
|
|
|
8
8
|
from tabulate import tabulate
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
def change_dir(target_directory: str =
|
|
11
|
+
def change_dir(target_directory: str = "POLY") -> None:
|
|
12
12
|
"""Change directory to the first occurrence of x in the current path."""
|
|
13
|
-
cur_dir = pathlib.Path(
|
|
13
|
+
cur_dir = pathlib.Path("./")
|
|
14
14
|
target_dir = str(cur_dir.absolute()).split(target_directory)[0] + target_directory
|
|
15
15
|
os.chdir(target_dir)
|
|
16
|
-
print(f
|
|
16
|
+
print(f"Current dir: {target_dir}")
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def display_pretty_table_html(data: dict) -> None:
|
|
@@ -60,4 +60,4 @@ def print_table(data: Any, use_html: bool = True) -> None:
|
|
|
60
60
|
if use_html:
|
|
61
61
|
display(HTML(table))
|
|
62
62
|
else:
|
|
63
|
-
print(table)
|
|
63
|
+
print(table)
|
|
@@ -3,7 +3,6 @@ from collections import defaultdict
|
|
|
3
3
|
from datetime import datetime
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
|
|
7
6
|
class ReportManager:
|
|
8
7
|
def __init__(self):
|
|
9
8
|
self.cache_dir = os.path.expanduser("~/.cache/speedy_utils")
|
|
@@ -41,7 +40,7 @@ class ReportManager:
|
|
|
41
40
|
[
|
|
42
41
|
"\n### Results Overview",
|
|
43
42
|
f"- Total items processed: {len(results)}",
|
|
44
|
-
f"- Success rate: {(len(results) - len(errors))/len(results)*100:.1f}%",
|
|
43
|
+
f"- Success rate: {(len(results) - len(errors)) / len(results) * 100:.1f}%",
|
|
45
44
|
f"- Total errors: {len(errors)}",
|
|
46
45
|
]
|
|
47
46
|
)
|
|
@@ -49,7 +48,7 @@ class ReportManager:
|
|
|
49
48
|
if execution_time:
|
|
50
49
|
md_content.append(f"- Execution time: {execution_time:.2f}s")
|
|
51
50
|
md_content.append(
|
|
52
|
-
f"- Average speed: {len(results)/execution_time:.1f} items/second"
|
|
51
|
+
f"- Average speed: {len(results) / execution_time:.1f} items/second"
|
|
53
52
|
)
|
|
54
53
|
|
|
55
54
|
if error_groups:
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
import functools
|
|
2
3
|
import inspect
|
|
3
4
|
import json
|
|
@@ -6,7 +7,7 @@ import os.path as osp
|
|
|
6
7
|
import pickle
|
|
7
8
|
import uuid
|
|
8
9
|
from threading import Lock
|
|
9
|
-
from typing import Any, Literal
|
|
10
|
+
from typing import Any, Awaitable, Callable, Literal, TypeVar
|
|
10
11
|
|
|
11
12
|
import cachetools
|
|
12
13
|
import pandas as pd
|
|
@@ -26,6 +27,10 @@ thread_locker = Lock()
|
|
|
26
27
|
disk_lock = Lock()
|
|
27
28
|
mem_lock = Lock()
|
|
28
29
|
|
|
30
|
+
# Add async-specific types
|
|
31
|
+
T = TypeVar('T')
|
|
32
|
+
AsyncFunc = Callable[..., Awaitable[T]]
|
|
33
|
+
|
|
29
34
|
|
|
30
35
|
def compute_func_id(func, args, kwargs, ignore_self, keys):
|
|
31
36
|
func_source = get_source(func)
|
|
@@ -144,6 +149,61 @@ def _disk_memoize(func, keys, cache_dir, ignore_self, verbose):
|
|
|
144
149
|
return wrapper
|
|
145
150
|
|
|
146
151
|
|
|
152
|
+
def _async_disk_memoize(func, keys, cache_dir, ignore_self, verbose):
|
|
153
|
+
@functools.wraps(func)
|
|
154
|
+
async def wrapper(*args, **kwargs):
|
|
155
|
+
try:
|
|
156
|
+
# Compute cache path as before
|
|
157
|
+
func_source, sub_dir, key_id = compute_func_id(
|
|
158
|
+
func, args, kwargs, ignore_self, keys
|
|
159
|
+
)
|
|
160
|
+
if func_source is None:
|
|
161
|
+
return await func(*args, **kwargs)
|
|
162
|
+
if sub_dir == "funcs":
|
|
163
|
+
cache_path = osp.join(cache_dir, sub_dir, func.__name__, key_id)
|
|
164
|
+
else:
|
|
165
|
+
cache_path = osp.join(cache_dir, sub_dir, key_id)
|
|
166
|
+
mkdir_or_exist(osp.dirname(cache_path))
|
|
167
|
+
|
|
168
|
+
# First check with disk lock (run in thread to avoid blocking)
|
|
169
|
+
def check_cache():
|
|
170
|
+
with disk_lock:
|
|
171
|
+
if osp.exists(cache_path):
|
|
172
|
+
try:
|
|
173
|
+
return load_json_or_pickle(cache_path)
|
|
174
|
+
except Exception as e:
|
|
175
|
+
if osp.exists(cache_path):
|
|
176
|
+
os.remove(cache_path)
|
|
177
|
+
logger.opt(depth=1).warning(
|
|
178
|
+
f"Error loading cache: {str(e)[:100]}, continue to recompute"
|
|
179
|
+
)
|
|
180
|
+
return None
|
|
181
|
+
|
|
182
|
+
# Run cache check in thread pool to avoid blocking
|
|
183
|
+
loop = asyncio.get_event_loop()
|
|
184
|
+
cached_result = await loop.run_in_executor(None, check_cache)
|
|
185
|
+
if cached_result is not None:
|
|
186
|
+
return cached_result
|
|
187
|
+
|
|
188
|
+
result = await func(*args, **kwargs)
|
|
189
|
+
|
|
190
|
+
# Write result under disk lock (run in thread to avoid blocking)
|
|
191
|
+
def write_cache():
|
|
192
|
+
with disk_lock:
|
|
193
|
+
if not osp.exists(cache_path):
|
|
194
|
+
dump_json_or_pickle(result, cache_path)
|
|
195
|
+
|
|
196
|
+
await loop.run_in_executor(None, write_cache)
|
|
197
|
+
return result
|
|
198
|
+
except Exception as e:
|
|
199
|
+
logger.opt(depth=1).warning(
|
|
200
|
+
f"Failed to cache {func.__name__}: {e}, continue to recompute without cache"
|
|
201
|
+
)
|
|
202
|
+
return await func(*args, **kwargs)
|
|
203
|
+
|
|
204
|
+
return wrapper
|
|
205
|
+
|
|
206
|
+
|
|
147
207
|
def _memory_memoize(func, size, keys, ignore_self):
|
|
148
208
|
global LRU_MEM_CACHE
|
|
149
209
|
if LRU_MEM_CACHE.maxsize != size:
|
|
@@ -176,6 +236,38 @@ def _memory_memoize(func, size, keys, ignore_self):
|
|
|
176
236
|
return wrapper
|
|
177
237
|
|
|
178
238
|
|
|
239
|
+
def _async_memory_memoize(func, size, keys, ignore_self):
|
|
240
|
+
global LRU_MEM_CACHE
|
|
241
|
+
if LRU_MEM_CACHE.maxsize != size:
|
|
242
|
+
LRU_MEM_CACHE = cachetools.LRUCache(maxsize=size)
|
|
243
|
+
|
|
244
|
+
@functools.wraps(func)
|
|
245
|
+
async def wrapper(*args, **kwargs):
|
|
246
|
+
func_source, sub_dir, key_id = compute_func_id(
|
|
247
|
+
func, args, kwargs, ignore_self, keys
|
|
248
|
+
)
|
|
249
|
+
if func_source is None:
|
|
250
|
+
return await func(*args, **kwargs)
|
|
251
|
+
name = identify((func_source, sub_dir, key_id))
|
|
252
|
+
|
|
253
|
+
if not hasattr(func, "_mem_cache"):
|
|
254
|
+
func._mem_cache = LRU_MEM_CACHE
|
|
255
|
+
|
|
256
|
+
with mem_lock:
|
|
257
|
+
if name in func._mem_cache:
|
|
258
|
+
# logger.debug(f"Cache HIT (memory) for {func.__name__}, key={name}")
|
|
259
|
+
return func._mem_cache[name]
|
|
260
|
+
|
|
261
|
+
result = await func(*args, **kwargs)
|
|
262
|
+
|
|
263
|
+
with mem_lock:
|
|
264
|
+
if name not in func._mem_cache:
|
|
265
|
+
func._mem_cache[name] = result
|
|
266
|
+
return result
|
|
267
|
+
|
|
268
|
+
return wrapper
|
|
269
|
+
|
|
270
|
+
|
|
179
271
|
def both_memoize(func, keys, cache_dir, ignore_self):
|
|
180
272
|
@functools.wraps(func)
|
|
181
273
|
def wrapper(*args, **kwargs):
|
|
@@ -220,6 +312,63 @@ def both_memoize(func, keys, cache_dir, ignore_self):
|
|
|
220
312
|
return wrapper
|
|
221
313
|
|
|
222
314
|
|
|
315
|
+
def _async_both_memoize(func, keys, cache_dir, ignore_self):
|
|
316
|
+
@functools.wraps(func)
|
|
317
|
+
async def wrapper(*args, **kwargs):
|
|
318
|
+
func_source, sub_dir, key_id = compute_func_id(
|
|
319
|
+
func, args, kwargs, ignore_self, keys
|
|
320
|
+
)
|
|
321
|
+
if func_source is None:
|
|
322
|
+
return await func(*args, **kwargs)
|
|
323
|
+
|
|
324
|
+
mem_key = identify((func_source, sub_dir, key_id))
|
|
325
|
+
if not hasattr(func, "_mem_cache"):
|
|
326
|
+
func._mem_cache = LRU_MEM_CACHE
|
|
327
|
+
|
|
328
|
+
with mem_lock:
|
|
329
|
+
if mem_key in func._mem_cache:
|
|
330
|
+
# logger.debug(f"Cache HIT (memory) for {func.__name__}, key={mem_key}")
|
|
331
|
+
return func._mem_cache[mem_key]
|
|
332
|
+
|
|
333
|
+
if sub_dir == "funcs":
|
|
334
|
+
cache_path = osp.join(cache_dir, sub_dir, func.__name__, key_id)
|
|
335
|
+
else:
|
|
336
|
+
cache_path = osp.join(cache_dir, sub_dir, key_id)
|
|
337
|
+
mkdir_or_exist(osp.dirname(cache_path))
|
|
338
|
+
|
|
339
|
+
# Check disk cache in thread pool to avoid blocking
|
|
340
|
+
def check_disk_cache():
|
|
341
|
+
with disk_lock:
|
|
342
|
+
if osp.exists(cache_path):
|
|
343
|
+
return load_json_or_pickle(cache_path)
|
|
344
|
+
return None
|
|
345
|
+
|
|
346
|
+
loop = asyncio.get_event_loop()
|
|
347
|
+
disk_result = await loop.run_in_executor(None, check_disk_cache)
|
|
348
|
+
|
|
349
|
+
if disk_result is not None:
|
|
350
|
+
with mem_lock:
|
|
351
|
+
func._mem_cache[mem_key] = disk_result
|
|
352
|
+
return disk_result
|
|
353
|
+
|
|
354
|
+
# logger.debug(f"Cache MISS for {func.__name__}, key={cache_path}")
|
|
355
|
+
result = await func(*args, **kwargs)
|
|
356
|
+
|
|
357
|
+
# Write to disk in thread pool to avoid blocking
|
|
358
|
+
def write_disk_cache():
|
|
359
|
+
with disk_lock:
|
|
360
|
+
if not osp.exists(cache_path):
|
|
361
|
+
dump_json_or_pickle(result, cache_path)
|
|
362
|
+
|
|
363
|
+
await loop.run_in_executor(None, write_disk_cache)
|
|
364
|
+
|
|
365
|
+
with mem_lock:
|
|
366
|
+
func._mem_cache[mem_key] = result
|
|
367
|
+
return result
|
|
368
|
+
|
|
369
|
+
return wrapper
|
|
370
|
+
|
|
371
|
+
|
|
223
372
|
def memoize(
|
|
224
373
|
_func=None,
|
|
225
374
|
*,
|
|
@@ -234,7 +383,17 @@ def memoize(
|
|
|
234
383
|
cache_dir = osp.expanduser(cache_dir)
|
|
235
384
|
|
|
236
385
|
def decorator(func):
|
|
386
|
+
# Check if function is async
|
|
387
|
+
is_async = inspect.iscoroutinefunction(func)
|
|
388
|
+
|
|
237
389
|
if cache_type == "memory":
|
|
390
|
+
if is_async:
|
|
391
|
+
return _async_memory_memoize(
|
|
392
|
+
func,
|
|
393
|
+
size,
|
|
394
|
+
keys,
|
|
395
|
+
ignore_self,
|
|
396
|
+
)
|
|
238
397
|
return _memory_memoize(
|
|
239
398
|
func,
|
|
240
399
|
size,
|
|
@@ -242,6 +401,14 @@ def memoize(
|
|
|
242
401
|
ignore_self,
|
|
243
402
|
)
|
|
244
403
|
elif cache_type == "disk":
|
|
404
|
+
if is_async:
|
|
405
|
+
return _async_disk_memoize(
|
|
406
|
+
func,
|
|
407
|
+
keys,
|
|
408
|
+
cache_dir,
|
|
409
|
+
ignore_self,
|
|
410
|
+
verbose,
|
|
411
|
+
)
|
|
245
412
|
return _disk_memoize(
|
|
246
413
|
func,
|
|
247
414
|
keys,
|
|
@@ -249,6 +416,15 @@ def memoize(
|
|
|
249
416
|
ignore_self,
|
|
250
417
|
verbose,
|
|
251
418
|
)
|
|
419
|
+
|
|
420
|
+
# cache_type == "both"
|
|
421
|
+
if is_async:
|
|
422
|
+
return _async_both_memoize(
|
|
423
|
+
func,
|
|
424
|
+
keys,
|
|
425
|
+
cache_dir,
|
|
426
|
+
ignore_self,
|
|
427
|
+
)
|
|
252
428
|
return both_memoize(
|
|
253
429
|
func,
|
|
254
430
|
keys,
|
|
@@ -256,9 +432,63 @@ def memoize(
|
|
|
256
432
|
verbose,
|
|
257
433
|
)
|
|
258
434
|
|
|
435
|
+
# Handle both @memoize and @memoize() usage patterns
|
|
259
436
|
if _func is None:
|
|
260
437
|
return decorator
|
|
261
|
-
|
|
438
|
+
else:
|
|
439
|
+
return decorator(_func)
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
def amemoize(
|
|
443
|
+
_func=None,
|
|
444
|
+
*,
|
|
445
|
+
keys: list[str] | None = None,
|
|
446
|
+
cache_dir: str = SPEED_CACHE_DIR,
|
|
447
|
+
cache_type: Literal["memory", "disk", "both"] = "disk",
|
|
448
|
+
size: int = 10240,
|
|
449
|
+
ignore_self: bool = True,
|
|
450
|
+
verbose: bool = False,
|
|
451
|
+
):
|
|
452
|
+
"""
|
|
453
|
+
Async-specific memoization decorator for coroutine functions.
|
|
454
|
+
|
|
455
|
+
Args:
|
|
456
|
+
_func: The async function to memoize (when used without parentheses)
|
|
457
|
+
keys: Specific argument keys to use for cache key generation
|
|
458
|
+
cache_dir: Directory for disk cache storage
|
|
459
|
+
cache_type: Type of caching - "memory", "disk", or "both"
|
|
460
|
+
size: Size of memory cache (for memory/both types)
|
|
461
|
+
ignore_self: Whether to ignore 'self' parameter in cache key
|
|
462
|
+
verbose: Enable verbose logging
|
|
463
|
+
|
|
464
|
+
Returns:
|
|
465
|
+
Decorated async function with memoization
|
|
466
|
+
|
|
467
|
+
Example:
|
|
468
|
+
@amemoize(cache_type="both")
|
|
469
|
+
async def my_async_func(x: int) -> str:
|
|
470
|
+
return str(x)
|
|
471
|
+
"""
|
|
472
|
+
if "~/" in cache_dir:
|
|
473
|
+
cache_dir = osp.expanduser(cache_dir)
|
|
474
|
+
|
|
475
|
+
def decorator(func):
|
|
476
|
+
# Ensure the function is actually async
|
|
477
|
+
if not inspect.iscoroutinefunction(func):
|
|
478
|
+
raise ValueError(f"amemoize can only be used with async functions. {func.__name__} is not async.")
|
|
479
|
+
|
|
480
|
+
if cache_type == "memory":
|
|
481
|
+
return _async_memory_memoize(func, size, keys, ignore_self)
|
|
482
|
+
elif cache_type == "disk":
|
|
483
|
+
return _async_disk_memoize(func, keys, cache_dir, ignore_self, verbose)
|
|
484
|
+
else: # cache_type == "both"
|
|
485
|
+
return _async_both_memoize(func, keys, cache_dir, ignore_self)
|
|
486
|
+
|
|
487
|
+
# Handle both @amemoize and @amemoize() usage patterns
|
|
488
|
+
if _func is None:
|
|
489
|
+
return decorator
|
|
490
|
+
else:
|
|
491
|
+
return decorator(_func)
|
|
262
492
|
|
|
263
493
|
|
|
264
|
-
__all__ = ["memoize", "identify", "identify_uuid"]
|
|
494
|
+
__all__ = ["memoize", "identify", "identify_uuid", "amemoize"]
|
speedy_utils/common/utils_io.py
CHANGED
speedy_utils/scripts/mpython.py
CHANGED
|
@@ -91,9 +91,7 @@ def main():
|
|
|
91
91
|
cpu_end = ((i + 1) * cpu_per_process - 1) % args.total_cpu
|
|
92
92
|
ENV = f"CUDA_VISIBLE_DEVICES={gpu} MP_ID={i} MP_TOTAL={args.total_fold}"
|
|
93
93
|
if taskset_path:
|
|
94
|
-
fold_cmd =
|
|
95
|
-
f"{ENV} {taskset_path} -c {cpu_start}-{cpu_end} {path_python} {cmd_str}"
|
|
96
|
-
)
|
|
94
|
+
fold_cmd = f"{ENV} {taskset_path} -c {cpu_start}-{cpu_end} {path_python} {cmd_str}"
|
|
97
95
|
else:
|
|
98
96
|
fold_cmd = f"{ENV} {path_python} {cmd_str}"
|
|
99
97
|
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
llm_utils/__init__.py,sha256=AYmJ297r0OjYmV1uNWFwznbqfuLTjCV7L2Ee12jxhpw,602
|
|
2
|
+
llm_utils/chat_format/__init__.py,sha256=8dBIUqFJvkgQYedxBtcyxt-4tt8JxAKVap2JlTXmgaM,737
|
|
3
|
+
llm_utils/chat_format/display.py,sha256=M-__JpcJSqjqeP4LiW7-yF8fVL37yUEUdaNC4VEgIo8,10181
|
|
4
|
+
llm_utils/chat_format/transform.py,sha256=eU0c3PdAHCNLuGP1UqPwln0B34Lv3bt_uV9v9BrlCN4,5402
|
|
5
|
+
llm_utils/chat_format/utils.py,sha256=xTxN4HrLHcRO2PfCTR43nH1M5zCa7v0kTTdzAcGkZg0,1229
|
|
6
|
+
llm_utils/group_messages.py,sha256=Oe2tlhg-zRodG1-hodYebddrR77j9UdE05LzJw0EvYI,3622
|
|
7
|
+
llm_utils/lm/__init__.py,sha256=rX36_MsnekM5GHwWS56XELbm4W5x2TDwnPERDTfo0eU,194
|
|
8
|
+
llm_utils/lm/async_lm/__init__.py,sha256=PUBbCuf5u6-0GBUu-2PI6YAguzsyXj-LPkU6vccqT6E,121
|
|
9
|
+
llm_utils/lm/async_lm/_utils.py,sha256=P1-pUDf_0pDmo8WTIi43t5ARlyGA1RIJfpAhz-gfA5g,6105
|
|
10
|
+
llm_utils/lm/async_lm/async_llm_task.py,sha256=ts0CoXRgqmKnC16qkPp7cA_PVY0sVAeo_RFrA7upVmg,18892
|
|
11
|
+
llm_utils/lm/async_lm/async_lm.py,sha256=J1KC7qCpG_CyJMWca4q71la7JHoANiLLSNQrQH44-z0,14045
|
|
12
|
+
llm_utils/lm/async_lm/async_lm_base.py,sha256=KNf7BoB69cGPaqwo9DjFKOcsGkxwr6e66D7cd92Gm2c,14919
|
|
13
|
+
llm_utils/lm/async_lm/lm_specific.py,sha256=KmqdCm3SJ5MqN-dRJd6S5tq5-ve1X2eNWf2CMFtc_3s,3926
|
|
14
|
+
llm_utils/lm/utils.py,sha256=a0KJj8vjT2fHKb7GKGNJjJHhKLThwpxIL7vnV9Fr3ZY,4584
|
|
15
|
+
llm_utils/scripts/README.md,sha256=yuOLnLa2od2jp4wVy3rV0rESeiV3o8zol5MNMsZx0DY,999
|
|
16
|
+
llm_utils/scripts/vllm_load_balancer.py,sha256=TT5Ypq7gUcl52gRFp--ORFFjzhfGlcaX2rkRv8NxlxU,37259
|
|
17
|
+
llm_utils/scripts/vllm_serve.py,sha256=4NaqpVs7LBvxtvTCMPsNCAOfqiWkKRttxWMmWY7SitA,14729
|
|
18
|
+
speedy_utils/__init__.py,sha256=ZtnitBT13OS3xjmsVoVHjmL5RIWaH12PMcp6UDHQjaE,5776
|
|
19
|
+
speedy_utils/all.py,sha256=t-HKzDmhF1MTFnmq7xRnPs5nFG_aZaLH9Ua0RM6nQ9Y,4855
|
|
20
|
+
speedy_utils/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
+
speedy_utils/common/clock.py,sha256=3n4FkCW0dz46O8By09V5Pve1DSMgpLDRbWEVRryryeQ,7423
|
|
22
|
+
speedy_utils/common/function_decorator.py,sha256=BspJ0YuGL6elS7lWBAgELZ-sCfED_1N2P5fgH-fCRUQ,2132
|
|
23
|
+
speedy_utils/common/logger.py,sha256=JqW9gG4ujfq4RldNeYP2p52BYgCwjkYeGGYyzLn6mfY,6422
|
|
24
|
+
speedy_utils/common/notebook_utils.py,sha256=-97kehJ_Gg3TzDLubsLIYJcykqX1NXhbvBO6nniZSYM,2063
|
|
25
|
+
speedy_utils/common/report_manager.py,sha256=eBiw5KY6bWUhwki3B4lK5o8bFsp7L5x28X9GCI-Sd1w,3899
|
|
26
|
+
speedy_utils/common/utils_cache.py,sha256=G0M_iv3T8QqbBNNiS1LDz6MrRycQjiYLMzmHYpDUCjU,16348
|
|
27
|
+
speedy_utils/common/utils_io.py,sha256=tfptex3pbmhXOftr__V-3DbhuDVSm01j4vg39R5jbwI,4792
|
|
28
|
+
speedy_utils/common/utils_misc.py,sha256=cdEuBBpiB1xpuzj0UBDHDuTIerqsMIw37ENq6EXliOw,1795
|
|
29
|
+
speedy_utils/common/utils_print.py,sha256=iQqnOYw2EFC8TqeSDbrcnIQAUKT7FbB8Mec8b2aGAzw,4833
|
|
30
|
+
speedy_utils/multi_worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
|
+
speedy_utils/multi_worker/process.py,sha256=BI-sgzzQ0_N8kOfaS_3ZAGZ3d6panYzJ3-BGZthY4dQ,6824
|
|
32
|
+
speedy_utils/multi_worker/thread.py,sha256=u_hTwXh7_FciMa5EukdEA1fDCY_vUC4moDceBXk2b6w,16326
|
|
33
|
+
speedy_utils/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
|
+
speedy_utils/scripts/mpython.py,sha256=IvywP7Y0_V6tWfMP-4MjPvN5_KfxWF21xaLJsCIayCk,3821
|
|
35
|
+
speedy_utils/scripts/openapi_client_codegen.py,sha256=f2125S_q0PILgH5dyzoKRz7pIvNEjCkzpi4Q4pPFRZE,9683
|
|
36
|
+
speedy_utils-1.1.8.dist-info/METADATA,sha256=6QI2Y4BBZbNTn_gTZobOZdlqwY3x2EHFabMYPf9gv2Y,7441
|
|
37
|
+
speedy_utils-1.1.8.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
38
|
+
speedy_utils-1.1.8.dist-info/entry_points.txt,sha256=T1t85jwx8fK6m5msdkBGIXH5R5Kd0zSL0S6erXERPzg,237
|
|
39
|
+
speedy_utils-1.1.8.dist-info/RECORD,,
|