speedy-utils 1.1.36__py3-none-any.whl → 1.1.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,7 +15,7 @@ from pydantic import BaseModel
15
15
 
16
16
  # from llm_utils.lm.async_lm.async_llm_task import OutputModelType
17
17
  from llm_utils.lm.async_lm.async_lm_base import AsyncLMBase
18
- from speedy_utils import jloads
18
+ from json_repair import jloads
19
19
 
20
20
  from ._utils import (
21
21
  LegacyMsgs,
@@ -0,0 +1,131 @@
1
+ import os
2
+ import sys
3
+ import shutil
4
+ import time
5
+ import argparse
6
+ import subprocess
7
+ from pathlib import Path
8
+ from concurrent.futures import ThreadPoolExecutor, as_completed
9
+
10
+ def get_hf_cache_home():
11
+ """Locate the Hugging Face cache directory."""
12
+ if "HF_HOME" in os.environ:
13
+ return Path(os.environ["HF_HOME"]) / "hub"
14
+ return Path.home() / ".cache" / "huggingface" / "hub"
15
+
16
+ def resolve_model_path(model_id, cache_dir):
17
+ """Find the physical snapshot directory for the given model ID."""
18
+ dir_name = "models--" + model_id.replace("/", "--")
19
+ model_root = cache_dir / dir_name
20
+ if not model_root.exists():
21
+ raise FileNotFoundError(f"Model folder not found at: {model_root}")
22
+
23
+ # 1. Try to find hash via refs/main
24
+ ref_path = model_root / "refs" / "main"
25
+ if ref_path.exists():
26
+ with open(ref_path, "r") as f:
27
+ commit_hash = f.read().strip()
28
+ snapshot_path = model_root / "snapshots" / commit_hash
29
+ if snapshot_path.exists():
30
+ return snapshot_path
31
+
32
+ # 2. Fallback to the newest snapshot folder
33
+ snapshots_dir = model_root / "snapshots"
34
+ if snapshots_dir.exists():
35
+ subdirs = [x for x in snapshots_dir.iterdir() if x.is_dir()]
36
+ if subdirs:
37
+ return sorted(subdirs, key=lambda x: x.stat().st_mtime, reverse=True)[0]
38
+
39
+ raise FileNotFoundError(f"No valid snapshot found in {model_root}")
40
+
41
+ def copy_worker(src, dst):
42
+ """Copy a single file, following symlinks to capture actual data."""
43
+ try:
44
+ os.makedirs(os.path.dirname(dst), exist_ok=True)
45
+ # copy2 follows symlinks by default
46
+ shutil.copy2(src, dst)
47
+ return os.path.getsize(dst)
48
+ except Exception as e:
49
+ return str(e)
50
+
51
+ def cache_to_ram(model_id, shm_base, workers=64):
52
+ """Parallel copy from HF cache to the specified RAM directory."""
53
+ cache_home = get_hf_cache_home()
54
+ src_path = resolve_model_path(model_id, cache_home)
55
+
56
+ safe_name = model_id.replace("/", "_")
57
+ dst_path = Path(shm_base) / safe_name
58
+
59
+ # Check available space in shm
60
+ shm_stats = shutil.disk_usage(shm_base)
61
+ print(f"📦 Source: {src_path}", file=sys.stderr)
62
+ print(f"🚀 Target RAM: {dst_path} (Available: {shm_stats.free/(1024**3):.1f} GB)", file=sys.stderr)
63
+
64
+ files_to_copy = []
65
+ for root, _, files in os.walk(src_path):
66
+ for file in files:
67
+ full_src = Path(root) / file
68
+ rel_path = full_src.relative_to(src_path)
69
+ files_to_copy.append((full_src, dst_path / rel_path))
70
+
71
+ total_bytes = 0
72
+ start = time.time()
73
+ with ThreadPoolExecutor(max_workers=workers) as pool:
74
+ futures = {pool.submit(copy_worker, s, d): s for s, d in files_to_copy}
75
+ for i, future in enumerate(as_completed(futures)):
76
+ res = future.result()
77
+ if isinstance(res, int):
78
+ total_bytes += res
79
+ if i % 100 == 0 or i == len(files_to_copy) - 1:
80
+ print(f" Progress: {i+1}/{len(files_to_copy)} files...", end="\r", file=sys.stderr)
81
+
82
+ elapsed = time.time() - start
83
+ print(f"\n✅ Copied {total_bytes/(1024**3):.2f} GB in {elapsed:.2f}s", file=sys.stderr)
84
+ return dst_path
85
+
86
+ def main():
87
+ parser = argparse.ArgumentParser(description="vLLM RAM-cached loader", add_help=False)
88
+ parser.add_argument("--model", type=str, required=True, help="HuggingFace Model ID")
89
+ parser.add_argument("--shm-dir", type=str, default="/dev/shm", help="RAM disk mount point")
90
+ parser.add_argument("--cache-workers", type=int, default=64, help="Threads for copying")
91
+ parser.add_argument("--keep-cache", action="store_true", help="Do not delete files from RAM on exit")
92
+
93
+ # Capture wrapper args vs vLLM args
94
+ args, vllm_args = parser.parse_known_args()
95
+
96
+ ram_path = None
97
+ try:
98
+ # 1. Sync weights to RAM disk
99
+ ram_path = cache_to_ram(args.model, args.shm_dir, args.cache_workers)
100
+
101
+ # 2. Prepare vLLM Command
102
+ # Point vLLM to the RAM files, but keep the original model ID for the API
103
+ cmd = [
104
+ "vllm", "serve", str(ram_path),
105
+ "--served-model-name", args.model
106
+ ] + vllm_args
107
+
108
+ print(f"\n🔥 Launching vLLM...")
109
+ print(f" Command: {' '.join(cmd)}\n", file=sys.stderr)
110
+
111
+ # 3. Run vLLM and wait
112
+ subprocess.run(cmd, check=True)
113
+
114
+ except KeyboardInterrupt:
115
+ print("\n👋 Process interrupted by user.", file=sys.stderr)
116
+ except subprocess.CalledProcessError as e:
117
+ print(f"\n❌ vLLM exited with error: {e}", file=sys.stderr)
118
+ except Exception as e:
119
+ print(f"\n❌ Error: {e}", file=sys.stderr)
120
+ finally:
121
+ # 4. Cleanup RAM Disk
122
+ if ram_path and ram_path.exists() and not args.keep_cache:
123
+ print(f"🧹 Cleaning up RAM cache: {ram_path}", file=sys.stderr)
124
+ try:
125
+ shutil.rmtree(ram_path)
126
+ print("✨ RAM disk cleared.", file=sys.stderr)
127
+ except Exception as e:
128
+ print(f"⚠️ Failed to clean {ram_path}: {e}", file=sys.stderr)
129
+
130
+ if __name__ == "__main__":
131
+ main()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: speedy-utils
3
- Version: 1.1.36
3
+ Version: 1.1.39
4
4
  Summary: Fast and easy-to-use package for data science
5
5
  Project-URL: Homepage, https://github.com/anhvth/speedy
6
6
  Project-URL: Repository, https://github.com/anhvth/speedy
@@ -16,10 +16,11 @@ llm_utils/lm/utils.py,sha256=dEKFta8S6Mm4LjIctcpFlEGL9RnmLm5DHd2TA70UWuA,12649
16
16
  llm_utils/lm/async_lm/__init__.py,sha256=j0xK49ooZ0Dm5GstGGHbmPMrPjd3mOXoJ1H7eAL_Z4g,122
17
17
  llm_utils/lm/async_lm/_utils.py,sha256=mB-AueWJJatTx0PXqd_oWc6Kz36cfgDmDTKgiXafCJI,6106
18
18
  llm_utils/lm/async_lm/async_llm_task.py,sha256=2PWW4vPW2jYUiGmYFo4-DHrmX5Jm8Iw_1qo6EPL-ytE,18611
19
- llm_utils/lm/async_lm/async_lm.py,sha256=W8n_S5PKJln9bzO9T525-tqo5DFwBZNXDucz_v-35iU,13761
19
+ llm_utils/lm/async_lm/async_lm.py,sha256=YudjpU3QH8whtnJfIYHFSMWe6lX4Fli7JEiejzrRfYk,13760
20
20
  llm_utils/lm/async_lm/async_lm_base.py,sha256=ga5nCzows5Ye3yop41zsUxNYxcj_Vpf02DsfJ1eoE9U,8358
21
21
  llm_utils/lm/async_lm/lm_specific.py,sha256=PxP54ltrh9NrLJx7BPib52oYo_aCvDOjf7KzMjp1MYg,3929
22
22
  llm_utils/scripts/README.md,sha256=yuOLnLa2od2jp4wVy3rV0rESeiV3o8zol5MNMsZx0DY,999
23
+ llm_utils/scripts/fast_vllm.py,sha256=00UWajLOfTorSMgmgxUOpssdg55oOHneNUY0lhVuRGQ,5128
23
24
  llm_utils/scripts/vllm_load_balancer.py,sha256=eQlH07573EDWIBkwc9ef1WvI59anLr4hQqLfZvQk7xk,37133
24
25
  llm_utils/scripts/vllm_serve.py,sha256=tPcRB_MbJ01LcqC83RHQ7W9XDS7b1Ntc0fCRdegsNXU,14761
25
26
  llm_utils/vector_cache/__init__.py,sha256=oZXpjgBuutI-Pd_pBNYAQMY7-K2C6xv8Qt6a3p88GBQ,879
@@ -50,7 +51,7 @@ vision_utils/README.md,sha256=AIDZZj8jo_QNrEjFyHwd00iOO431s-js-M2dLtVTn3I,5740
50
51
  vision_utils/__init__.py,sha256=hF54sT6FAxby8kDVhOvruy4yot8O-Ateey5n96O1pQM,284
51
52
  vision_utils/io_utils.py,sha256=pI0Va6miesBysJcllK6NXCay8HpGZsaMWwlsKB2DMgA,26510
52
53
  vision_utils/plot.py,sha256=HkNj3osA3moPuupP1VguXfPPOW614dZO5tvC-EFKpKM,12028
53
- speedy_utils-1.1.36.dist-info/METADATA,sha256=yZYfOkBwR1aiMwAKAxn78sYCbmNnBt-5lmGY6d11hPI,8048
54
- speedy_utils-1.1.36.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
55
- speedy_utils-1.1.36.dist-info/entry_points.txt,sha256=rwn89AYfBUh9SRJtFbpp-u2JIKiqmZ2sczvqyO6s9cI,289
56
- speedy_utils-1.1.36.dist-info/RECORD,,
54
+ speedy_utils-1.1.39.dist-info/METADATA,sha256=US1AXLkRjdiPfWya3vNvrZ_zmDwKrKBGYdhIW-d0ypY,8048
55
+ speedy_utils-1.1.39.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
56
+ speedy_utils-1.1.39.dist-info/entry_points.txt,sha256=rwn89AYfBUh9SRJtFbpp-u2JIKiqmZ2sczvqyO6s9cI,289
57
+ speedy_utils-1.1.39.dist-info/RECORD,,