speedy-utils 1.1.36__tar.gz → 1.1.39__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/PKG-INFO +1 -1
  2. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/pyproject.toml +1 -1
  3. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/lm/async_lm/async_lm.py +1 -1
  4. speedy_utils-1.1.39/src/llm_utils/scripts/fast_vllm.py +131 -0
  5. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/.github/copilot-instructions.md +0 -0
  6. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/.github/skills/caching-utilities/SKILL.md +0 -0
  7. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/.github/skills/caching-utilities/examples/caching_example.py +0 -0
  8. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/.github/skills/io-utilities/SKILL.md +0 -0
  9. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/.github/skills/io-utilities/examples/io_example.py +0 -0
  10. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/.github/skills/llm-integration/SKILL.md +0 -0
  11. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/.github/skills/llm-integration/examples/llm_example.py +0 -0
  12. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/.github/skills/multi-threading-processing/SKILL.md +0 -0
  13. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/.github/skills/ray-distributed-computing/SKILL.md +0 -0
  14. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/.github/skills/skill-creation/SKILL.md +0 -0
  15. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/.github/skills/vision-utilities/SKILL.md +0 -0
  16. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/.github/skills/vision-utilities/examples/vision_example.py +0 -0
  17. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/.github/workflows/publish.yml +0 -0
  18. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/.gitignore +0 -0
  19. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/.pre-commit-config.yaml +0 -0
  20. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/README.md +0 -0
  21. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/bumpversion.sh +0 -0
  22. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/debug/debug_generate_response.py +0 -0
  23. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/debug/debug_n_param.py +0 -0
  24. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/debug/debug_n_structure.py +0 -0
  25. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/debug/integration_test.py +0 -0
  26. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/debug/test_decode_api.py +0 -0
  27. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/debug/test_endpoints.py +0 -0
  28. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/debug/test_generate.py +0 -0
  29. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/debug/test_generate_endpoint.py +0 -0
  30. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/docs/GENERATE_QUICKREF.md +0 -0
  31. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/docs/IMPLEMENTATION.md +0 -0
  32. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/docs/QUICKSTART.md +0 -0
  33. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/docs/TOKENIZATION.md +0 -0
  34. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/docs/TOKENIZATION_IMPLEMENTATION.md +0 -0
  35. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/docs/zero_copy_sharing.md +0 -0
  36. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/examples/generate_example.py +0 -0
  37. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/examples/pytorch_large_model.py +0 -0
  38. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/examples/shared_kwargs_example.py +0 -0
  39. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/examples/temperature_range_example.py +0 -0
  40. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/examples/test_share_ray.py +0 -0
  41. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/examples/tokenization_example.py +0 -0
  42. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/examples/vision_utils_example.py +0 -0
  43. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/experiments/exp1/dockerfile +0 -0
  44. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/experiments/exp1/run_in_docker.sh +0 -0
  45. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/experiments/exp1/test.png +0 -0
  46. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/experiments/test_read_image.py +0 -0
  47. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/notebooks/README.ipynb +0 -0
  48. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/notebooks/llm_utils/llm_as_a_judge.ipynb +0 -0
  49. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/notebooks/ray_tutorial.ipynb +0 -0
  50. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/notebooks/test_multi_thread.ipynb +0 -0
  51. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/ruff.toml +0 -0
  52. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/scripts/debug_import_time.py +0 -0
  53. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/scripts/deploy.sh +0 -0
  54. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/scripts/imports.sh +0 -0
  55. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/scripts/test_import_time_vision.py +0 -0
  56. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/setup.cfg +0 -0
  57. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/datasets/convert_to_arrow.py +0 -0
  58. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/__init__.py +0 -0
  59. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/chat_format/__init__.py +0 -0
  60. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/chat_format/display.py +0 -0
  61. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/chat_format/transform.py +0 -0
  62. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/chat_format/utils.py +0 -0
  63. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/group_messages.py +0 -0
  64. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/lm/__init__.py +0 -0
  65. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/lm/async_lm/__init__.py +0 -0
  66. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/lm/async_lm/_utils.py +0 -0
  67. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/lm/async_lm/async_llm_task.py +0 -0
  68. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/lm/async_lm/async_lm_base.py +0 -0
  69. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/lm/async_lm/lm_specific.py +0 -0
  70. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/lm/base_prompt_builder.py +0 -0
  71. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/lm/llm.py +0 -0
  72. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/lm/llm_signature.py +0 -0
  73. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/lm/lm_base.py +0 -0
  74. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/lm/mixins.py +0 -0
  75. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/lm/openai_memoize.py +0 -0
  76. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/lm/signature.py +0 -0
  77. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/lm/utils.py +0 -0
  78. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/scripts/README.md +0 -0
  79. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/scripts/vllm_load_balancer.py +0 -0
  80. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/scripts/vllm_serve.py +0 -0
  81. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/vector_cache/__init__.py +0 -0
  82. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/vector_cache/cli.py +0 -0
  83. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/vector_cache/core.py +0 -0
  84. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/vector_cache/types.py +0 -0
  85. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/llm_utils/vector_cache/utils.py +0 -0
  86. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/speedy_utils/__imports.py +0 -0
  87. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/speedy_utils/__init__.py +0 -0
  88. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/speedy_utils/common/__init__.py +0 -0
  89. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/speedy_utils/common/clock.py +0 -0
  90. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/speedy_utils/common/function_decorator.py +0 -0
  91. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/speedy_utils/common/logger.py +0 -0
  92. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/speedy_utils/common/notebook_utils.py +0 -0
  93. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/speedy_utils/common/patcher.py +0 -0
  94. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/speedy_utils/common/report_manager.py +0 -0
  95. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/speedy_utils/common/utils_cache.py +0 -0
  96. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/speedy_utils/common/utils_io.py +0 -0
  97. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/speedy_utils/common/utils_misc.py +0 -0
  98. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/speedy_utils/common/utils_print.py +0 -0
  99. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/speedy_utils/multi_worker/__init__.py +0 -0
  100. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/speedy_utils/multi_worker/process.py +0 -0
  101. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/speedy_utils/multi_worker/thread.py +0 -0
  102. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/speedy_utils/scripts/__init__.py +0 -0
  103. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/speedy_utils/scripts/mpython.py +0 -0
  104. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/speedy_utils/scripts/openapi_client_codegen.py +0 -0
  105. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/vision_utils/README.md +0 -0
  106. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/vision_utils/__init__.py +0 -0
  107. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/vision_utils/io_utils.py +0 -0
  108. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/src/vision_utils/plot.py +0 -0
  109. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/tests/import_all.py +0 -0
  110. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/tests/import_time_report.py +0 -0
  111. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/tests/integration_test.py +0 -0
  112. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/tests/llm_utils/test_llm_mixins.py +0 -0
  113. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/tests/sample_objects.py +0 -0
  114. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/tests/test.py +0 -0
  115. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/tests/test_logger.py +0 -0
  116. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/tests/test_logger_format.py +0 -0
  117. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/tests/test_memoize_typing.py +0 -0
  118. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/tests/test_mpython.py +0 -0
  119. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/tests/test_multithread_error_trace.py +0 -0
  120. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/tests/test_process.py +0 -0
  121. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/tests/test_process_update.py +0 -0
  122. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/tests/test_pytorch_sharing.py +0 -0
  123. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/tests/test_shared_kwargs.py +0 -0
  124. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/tests/test_thread.py +0 -0
  125. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/tests/test_tokenization.py +0 -0
  126. {speedy_utils-1.1.36 → speedy_utils-1.1.39}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: speedy-utils
3
- Version: 1.1.36
3
+ Version: 1.1.39
4
4
  Summary: Fast and easy-to-use package for data science
5
5
  Project-URL: Homepage, https://github.com/anhvth/speedy
6
6
  Project-URL: Repository, https://github.com/anhvth/speedy
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "speedy-utils"
3
- version = "1.1.36"
3
+ version = "1.1.39"
4
4
  description = "Fast and easy-to-use package for data science"
5
5
  authors = [{ name = "AnhVTH", email = "anhvth.226@gmail.com" }]
6
6
  readme = "README.md"
@@ -15,7 +15,7 @@ from pydantic import BaseModel
15
15
 
16
16
  # from llm_utils.lm.async_lm.async_llm_task import OutputModelType
17
17
  from llm_utils.lm.async_lm.async_lm_base import AsyncLMBase
18
- from speedy_utils import jloads
18
+ from json_repair import jloads
19
19
 
20
20
  from ._utils import (
21
21
  LegacyMsgs,
@@ -0,0 +1,131 @@
1
+ import os
2
+ import sys
3
+ import shutil
4
+ import time
5
+ import argparse
6
+ import subprocess
7
+ from pathlib import Path
8
+ from concurrent.futures import ThreadPoolExecutor, as_completed
9
+
10
+ def get_hf_cache_home():
11
+ """Locate the Hugging Face cache directory."""
12
+ if "HF_HOME" in os.environ:
13
+ return Path(os.environ["HF_HOME"]) / "hub"
14
+ return Path.home() / ".cache" / "huggingface" / "hub"
15
+
16
+ def resolve_model_path(model_id, cache_dir):
17
+ """Find the physical snapshot directory for the given model ID."""
18
+ dir_name = "models--" + model_id.replace("/", "--")
19
+ model_root = cache_dir / dir_name
20
+ if not model_root.exists():
21
+ raise FileNotFoundError(f"Model folder not found at: {model_root}")
22
+
23
+ # 1. Try to find hash via refs/main
24
+ ref_path = model_root / "refs" / "main"
25
+ if ref_path.exists():
26
+ with open(ref_path, "r") as f:
27
+ commit_hash = f.read().strip()
28
+ snapshot_path = model_root / "snapshots" / commit_hash
29
+ if snapshot_path.exists():
30
+ return snapshot_path
31
+
32
+ # 2. Fallback to the newest snapshot folder
33
+ snapshots_dir = model_root / "snapshots"
34
+ if snapshots_dir.exists():
35
+ subdirs = [x for x in snapshots_dir.iterdir() if x.is_dir()]
36
+ if subdirs:
37
+ return sorted(subdirs, key=lambda x: x.stat().st_mtime, reverse=True)[0]
38
+
39
+ raise FileNotFoundError(f"No valid snapshot found in {model_root}")
40
+
41
+ def copy_worker(src, dst):
42
+ """Copy a single file, following symlinks to capture actual data."""
43
+ try:
44
+ os.makedirs(os.path.dirname(dst), exist_ok=True)
45
+ # copy2 follows symlinks by default
46
+ shutil.copy2(src, dst)
47
+ return os.path.getsize(dst)
48
+ except Exception as e:
49
+ return str(e)
50
+
51
+ def cache_to_ram(model_id, shm_base, workers=64):
52
+ """Parallel copy from HF cache to the specified RAM directory."""
53
+ cache_home = get_hf_cache_home()
54
+ src_path = resolve_model_path(model_id, cache_home)
55
+
56
+ safe_name = model_id.replace("/", "_")
57
+ dst_path = Path(shm_base) / safe_name
58
+
59
+ # Check available space in shm
60
+ shm_stats = shutil.disk_usage(shm_base)
61
+ print(f"📦 Source: {src_path}", file=sys.stderr)
62
+ print(f"🚀 Target RAM: {dst_path} (Available: {shm_stats.free/(1024**3):.1f} GB)", file=sys.stderr)
63
+
64
+ files_to_copy = []
65
+ for root, _, files in os.walk(src_path):
66
+ for file in files:
67
+ full_src = Path(root) / file
68
+ rel_path = full_src.relative_to(src_path)
69
+ files_to_copy.append((full_src, dst_path / rel_path))
70
+
71
+ total_bytes = 0
72
+ start = time.time()
73
+ with ThreadPoolExecutor(max_workers=workers) as pool:
74
+ futures = {pool.submit(copy_worker, s, d): s for s, d in files_to_copy}
75
+ for i, future in enumerate(as_completed(futures)):
76
+ res = future.result()
77
+ if isinstance(res, int):
78
+ total_bytes += res
79
+ if i % 100 == 0 or i == len(files_to_copy) - 1:
80
+ print(f" Progress: {i+1}/{len(files_to_copy)} files...", end="\r", file=sys.stderr)
81
+
82
+ elapsed = time.time() - start
83
+ print(f"\n✅ Copied {total_bytes/(1024**3):.2f} GB in {elapsed:.2f}s", file=sys.stderr)
84
+ return dst_path
85
+
86
+ def main():
87
+ parser = argparse.ArgumentParser(description="vLLM RAM-cached loader", add_help=False)
88
+ parser.add_argument("--model", type=str, required=True, help="HuggingFace Model ID")
89
+ parser.add_argument("--shm-dir", type=str, default="/dev/shm", help="RAM disk mount point")
90
+ parser.add_argument("--cache-workers", type=int, default=64, help="Threads for copying")
91
+ parser.add_argument("--keep-cache", action="store_true", help="Do not delete files from RAM on exit")
92
+
93
+ # Capture wrapper args vs vLLM args
94
+ args, vllm_args = parser.parse_known_args()
95
+
96
+ ram_path = None
97
+ try:
98
+ # 1. Sync weights to RAM disk
99
+ ram_path = cache_to_ram(args.model, args.shm_dir, args.cache_workers)
100
+
101
+ # 2. Prepare vLLM Command
102
+ # Point vLLM to the RAM files, but keep the original model ID for the API
103
+ cmd = [
104
+ "vllm", "serve", str(ram_path),
105
+ "--served-model-name", args.model
106
+ ] + vllm_args
107
+
108
+ print(f"\n🔥 Launching vLLM...")
109
+ print(f" Command: {' '.join(cmd)}\n", file=sys.stderr)
110
+
111
+ # 3. Run vLLM and wait
112
+ subprocess.run(cmd, check=True)
113
+
114
+ except KeyboardInterrupt:
115
+ print("\n👋 Process interrupted by user.", file=sys.stderr)
116
+ except subprocess.CalledProcessError as e:
117
+ print(f"\n❌ vLLM exited with error: {e}", file=sys.stderr)
118
+ except Exception as e:
119
+ print(f"\n❌ Error: {e}", file=sys.stderr)
120
+ finally:
121
+ # 4. Cleanup RAM Disk
122
+ if ram_path and ram_path.exists() and not args.keep_cache:
123
+ print(f"🧹 Cleaning up RAM cache: {ram_path}", file=sys.stderr)
124
+ try:
125
+ shutil.rmtree(ram_path)
126
+ print("✨ RAM disk cleared.", file=sys.stderr)
127
+ except Exception as e:
128
+ print(f"⚠️ Failed to clean {ram_path}: {e}", file=sys.stderr)
129
+
130
+ if __name__ == "__main__":
131
+ main()
File without changes
File without changes
File without changes
File without changes
File without changes