speedy-utils 1.1.36__py3-none-any.whl → 1.1.38__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_utils/scripts/fast_vllm.py +131 -0
- {speedy_utils-1.1.36.dist-info → speedy_utils-1.1.38.dist-info}/METADATA +1 -1
- {speedy_utils-1.1.36.dist-info → speedy_utils-1.1.38.dist-info}/RECORD +5 -4
- {speedy_utils-1.1.36.dist-info → speedy_utils-1.1.38.dist-info}/WHEEL +0 -0
- {speedy_utils-1.1.36.dist-info → speedy_utils-1.1.38.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import shutil
|
|
4
|
+
import time
|
|
5
|
+
import argparse
|
|
6
|
+
import subprocess
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
9
|
+
|
|
10
|
+
def get_hf_cache_home():
|
|
11
|
+
"""Locate the Hugging Face cache directory."""
|
|
12
|
+
if "HF_HOME" in os.environ:
|
|
13
|
+
return Path(os.environ["HF_HOME"]) / "hub"
|
|
14
|
+
return Path.home() / ".cache" / "huggingface" / "hub"
|
|
15
|
+
|
|
16
|
+
def resolve_model_path(model_id, cache_dir):
|
|
17
|
+
"""Find the physical snapshot directory for the given model ID."""
|
|
18
|
+
dir_name = "models--" + model_id.replace("/", "--")
|
|
19
|
+
model_root = cache_dir / dir_name
|
|
20
|
+
if not model_root.exists():
|
|
21
|
+
raise FileNotFoundError(f"Model folder not found at: {model_root}")
|
|
22
|
+
|
|
23
|
+
# 1. Try to find hash via refs/main
|
|
24
|
+
ref_path = model_root / "refs" / "main"
|
|
25
|
+
if ref_path.exists():
|
|
26
|
+
with open(ref_path, "r") as f:
|
|
27
|
+
commit_hash = f.read().strip()
|
|
28
|
+
snapshot_path = model_root / "snapshots" / commit_hash
|
|
29
|
+
if snapshot_path.exists():
|
|
30
|
+
return snapshot_path
|
|
31
|
+
|
|
32
|
+
# 2. Fallback to the newest snapshot folder
|
|
33
|
+
snapshots_dir = model_root / "snapshots"
|
|
34
|
+
if snapshots_dir.exists():
|
|
35
|
+
subdirs = [x for x in snapshots_dir.iterdir() if x.is_dir()]
|
|
36
|
+
if subdirs:
|
|
37
|
+
return sorted(subdirs, key=lambda x: x.stat().st_mtime, reverse=True)[0]
|
|
38
|
+
|
|
39
|
+
raise FileNotFoundError(f"No valid snapshot found in {model_root}")
|
|
40
|
+
|
|
41
|
+
def copy_worker(src, dst):
|
|
42
|
+
"""Copy a single file, following symlinks to capture actual data."""
|
|
43
|
+
try:
|
|
44
|
+
os.makedirs(os.path.dirname(dst), exist_ok=True)
|
|
45
|
+
# copy2 follows symlinks by default
|
|
46
|
+
shutil.copy2(src, dst)
|
|
47
|
+
return os.path.getsize(dst)
|
|
48
|
+
except Exception as e:
|
|
49
|
+
return str(e)
|
|
50
|
+
|
|
51
|
+
def cache_to_ram(model_id, shm_base, workers=64):
|
|
52
|
+
"""Parallel copy from HF cache to the specified RAM directory."""
|
|
53
|
+
cache_home = get_hf_cache_home()
|
|
54
|
+
src_path = resolve_model_path(model_id, cache_home)
|
|
55
|
+
|
|
56
|
+
safe_name = model_id.replace("/", "_")
|
|
57
|
+
dst_path = Path(shm_base) / safe_name
|
|
58
|
+
|
|
59
|
+
# Check available space in shm
|
|
60
|
+
shm_stats = shutil.disk_usage(shm_base)
|
|
61
|
+
print(f"📦 Source: {src_path}", file=sys.stderr)
|
|
62
|
+
print(f"🚀 Target RAM: {dst_path} (Available: {shm_stats.free/(1024**3):.1f} GB)", file=sys.stderr)
|
|
63
|
+
|
|
64
|
+
files_to_copy = []
|
|
65
|
+
for root, _, files in os.walk(src_path):
|
|
66
|
+
for file in files:
|
|
67
|
+
full_src = Path(root) / file
|
|
68
|
+
rel_path = full_src.relative_to(src_path)
|
|
69
|
+
files_to_copy.append((full_src, dst_path / rel_path))
|
|
70
|
+
|
|
71
|
+
total_bytes = 0
|
|
72
|
+
start = time.time()
|
|
73
|
+
with ThreadPoolExecutor(max_workers=workers) as pool:
|
|
74
|
+
futures = {pool.submit(copy_worker, s, d): s for s, d in files_to_copy}
|
|
75
|
+
for i, future in enumerate(as_completed(futures)):
|
|
76
|
+
res = future.result()
|
|
77
|
+
if isinstance(res, int):
|
|
78
|
+
total_bytes += res
|
|
79
|
+
if i % 100 == 0 or i == len(files_to_copy) - 1:
|
|
80
|
+
print(f" Progress: {i+1}/{len(files_to_copy)} files...", end="\r", file=sys.stderr)
|
|
81
|
+
|
|
82
|
+
elapsed = time.time() - start
|
|
83
|
+
print(f"\n✅ Copied {total_bytes/(1024**3):.2f} GB in {elapsed:.2f}s", file=sys.stderr)
|
|
84
|
+
return dst_path
|
|
85
|
+
|
|
86
|
+
def main():
|
|
87
|
+
parser = argparse.ArgumentParser(description="vLLM RAM-cached loader", add_help=False)
|
|
88
|
+
parser.add_argument("--model", type=str, required=True, help="HuggingFace Model ID")
|
|
89
|
+
parser.add_argument("--shm-dir", type=str, default="/dev/shm", help="RAM disk mount point")
|
|
90
|
+
parser.add_argument("--cache-workers", type=int, default=64, help="Threads for copying")
|
|
91
|
+
parser.add_argument("--keep-cache", action="store_true", help="Do not delete files from RAM on exit")
|
|
92
|
+
|
|
93
|
+
# Capture wrapper args vs vLLM args
|
|
94
|
+
args, vllm_args = parser.parse_known_args()
|
|
95
|
+
|
|
96
|
+
ram_path = None
|
|
97
|
+
try:
|
|
98
|
+
# 1. Sync weights to RAM disk
|
|
99
|
+
ram_path = cache_to_ram(args.model, args.shm_dir, args.cache_workers)
|
|
100
|
+
|
|
101
|
+
# 2. Prepare vLLM Command
|
|
102
|
+
# Point vLLM to the RAM files, but keep the original model ID for the API
|
|
103
|
+
cmd = [
|
|
104
|
+
"vllm", "serve", str(ram_path),
|
|
105
|
+
"--served-model-name", args.model
|
|
106
|
+
] + vllm_args
|
|
107
|
+
|
|
108
|
+
print(f"\n🔥 Launching vLLM...")
|
|
109
|
+
print(f" Command: {' '.join(cmd)}\n", file=sys.stderr)
|
|
110
|
+
|
|
111
|
+
# 3. Run vLLM and wait
|
|
112
|
+
subprocess.run(cmd, check=True)
|
|
113
|
+
|
|
114
|
+
except KeyboardInterrupt:
|
|
115
|
+
print("\n👋 Process interrupted by user.", file=sys.stderr)
|
|
116
|
+
except subprocess.CalledProcessError as e:
|
|
117
|
+
print(f"\n❌ vLLM exited with error: {e}", file=sys.stderr)
|
|
118
|
+
except Exception as e:
|
|
119
|
+
print(f"\n❌ Error: {e}", file=sys.stderr)
|
|
120
|
+
finally:
|
|
121
|
+
# 4. Cleanup RAM Disk
|
|
122
|
+
if ram_path and ram_path.exists() and not args.keep_cache:
|
|
123
|
+
print(f"🧹 Cleaning up RAM cache: {ram_path}", file=sys.stderr)
|
|
124
|
+
try:
|
|
125
|
+
shutil.rmtree(ram_path)
|
|
126
|
+
print("✨ RAM disk cleared.", file=sys.stderr)
|
|
127
|
+
except Exception as e:
|
|
128
|
+
print(f"⚠️ Failed to clean {ram_path}: {e}", file=sys.stderr)
|
|
129
|
+
|
|
130
|
+
if __name__ == "__main__":
|
|
131
|
+
main()
|
|
@@ -20,6 +20,7 @@ llm_utils/lm/async_lm/async_lm.py,sha256=W8n_S5PKJln9bzO9T525-tqo5DFwBZNXDucz_v-
|
|
|
20
20
|
llm_utils/lm/async_lm/async_lm_base.py,sha256=ga5nCzows5Ye3yop41zsUxNYxcj_Vpf02DsfJ1eoE9U,8358
|
|
21
21
|
llm_utils/lm/async_lm/lm_specific.py,sha256=PxP54ltrh9NrLJx7BPib52oYo_aCvDOjf7KzMjp1MYg,3929
|
|
22
22
|
llm_utils/scripts/README.md,sha256=yuOLnLa2od2jp4wVy3rV0rESeiV3o8zol5MNMsZx0DY,999
|
|
23
|
+
llm_utils/scripts/fast_vllm.py,sha256=00UWajLOfTorSMgmgxUOpssdg55oOHneNUY0lhVuRGQ,5128
|
|
23
24
|
llm_utils/scripts/vllm_load_balancer.py,sha256=eQlH07573EDWIBkwc9ef1WvI59anLr4hQqLfZvQk7xk,37133
|
|
24
25
|
llm_utils/scripts/vllm_serve.py,sha256=tPcRB_MbJ01LcqC83RHQ7W9XDS7b1Ntc0fCRdegsNXU,14761
|
|
25
26
|
llm_utils/vector_cache/__init__.py,sha256=oZXpjgBuutI-Pd_pBNYAQMY7-K2C6xv8Qt6a3p88GBQ,879
|
|
@@ -50,7 +51,7 @@ vision_utils/README.md,sha256=AIDZZj8jo_QNrEjFyHwd00iOO431s-js-M2dLtVTn3I,5740
|
|
|
50
51
|
vision_utils/__init__.py,sha256=hF54sT6FAxby8kDVhOvruy4yot8O-Ateey5n96O1pQM,284
|
|
51
52
|
vision_utils/io_utils.py,sha256=pI0Va6miesBysJcllK6NXCay8HpGZsaMWwlsKB2DMgA,26510
|
|
52
53
|
vision_utils/plot.py,sha256=HkNj3osA3moPuupP1VguXfPPOW614dZO5tvC-EFKpKM,12028
|
|
53
|
-
speedy_utils-1.1.
|
|
54
|
-
speedy_utils-1.1.
|
|
55
|
-
speedy_utils-1.1.
|
|
56
|
-
speedy_utils-1.1.
|
|
54
|
+
speedy_utils-1.1.38.dist-info/METADATA,sha256=8WgY6bVeosqELf3KSmLIrygeQcYe1uQag4BFwvLfSWM,8048
|
|
55
|
+
speedy_utils-1.1.38.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
56
|
+
speedy_utils-1.1.38.dist-info/entry_points.txt,sha256=rwn89AYfBUh9SRJtFbpp-u2JIKiqmZ2sczvqyO6s9cI,289
|
|
57
|
+
speedy_utils-1.1.38.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|