speedy-utils 1.1.40__tar.gz → 1.1.42__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. speedy_utils-1.1.42/.githooks/pre-push +32 -0
  2. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/.gitignore +1 -0
  3. speedy_utils-1.1.42/AGENTS.md +32 -0
  4. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/PKG-INFO +3 -2
  5. speedy_utils-1.1.42/examples/llm_ray_example.py +73 -0
  6. speedy_utils-1.1.42/examples/test_parallel_gpu.py +61 -0
  7. speedy_utils-1.1.42/notebooks/parallel_gpu_pool.ipynb +89 -0
  8. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/pyproject.toml +5 -2
  9. speedy_utils-1.1.42/scripts/test_ray_mp.py +31 -0
  10. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/__init__.py +2 -0
  11. speedy_utils-1.1.42/src/llm_utils/llm_ray.py +370 -0
  12. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/lm/llm.py +36 -29
  13. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/speedy_utils/__init__.py +3 -0
  14. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/speedy_utils/common/utils_io.py +3 -1
  15. speedy_utils-1.1.42/src/speedy_utils/multi_worker/__init__.py +12 -0
  16. speedy_utils-1.1.42/src/speedy_utils/multi_worker/dataset_ray.py +303 -0
  17. speedy_utils-1.1.42/src/speedy_utils/multi_worker/parallel_gpu_pool.py +178 -0
  18. speedy_utils-1.1.42/src/speedy_utils/multi_worker/process.py +699 -0
  19. speedy_utils-1.1.42/src/speedy_utils/multi_worker/progress.py +140 -0
  20. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/speedy_utils/scripts/mpython.py +49 -4
  21. speedy_utils-1.1.42/test_s3.py +34 -0
  22. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/tests/llm_utils/test_llm_mixins.py +22 -0
  23. speedy_utils-1.1.40/src/speedy_utils/multi_worker/process.py +0 -399
  24. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/.github/copilot-instructions.md +0 -0
  25. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/.github/skills/caching-utilities/SKILL.md +0 -0
  26. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/.github/skills/caching-utilities/examples/caching_example.py +0 -0
  27. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/.github/skills/io-utilities/SKILL.md +0 -0
  28. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/.github/skills/io-utilities/examples/io_example.py +0 -0
  29. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/.github/skills/llm-integration/SKILL.md +0 -0
  30. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/.github/skills/llm-integration/examples/llm_example.py +0 -0
  31. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/.github/skills/multi-threading-processing/SKILL.md +0 -0
  32. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/.github/skills/ray-distributed-computing/SKILL.md +0 -0
  33. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/.github/skills/skill-creation/SKILL.md +0 -0
  34. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/.github/skills/vision-utilities/SKILL.md +0 -0
  35. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/.github/skills/vision-utilities/examples/vision_example.py +0 -0
  36. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/.github/workflows/publish.yml +0 -0
  37. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/.pre-commit-config.yaml +0 -0
  38. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/README.md +0 -0
  39. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/bumpversion.sh +0 -0
  40. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/debug/debug_generate_response.py +0 -0
  41. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/debug/debug_n_param.py +0 -0
  42. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/debug/debug_n_structure.py +0 -0
  43. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/debug/integration_test.py +0 -0
  44. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/debug/test_decode_api.py +0 -0
  45. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/debug/test_endpoints.py +0 -0
  46. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/debug/test_generate.py +0 -0
  47. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/debug/test_generate_endpoint.py +0 -0
  48. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/docs/GENERATE_QUICKREF.md +0 -0
  49. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/docs/IMPLEMENTATION.md +0 -0
  50. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/docs/QUICKSTART.md +0 -0
  51. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/docs/TOKENIZATION.md +0 -0
  52. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/docs/TOKENIZATION_IMPLEMENTATION.md +0 -0
  53. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/docs/zero_copy_sharing.md +0 -0
  54. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/examples/generate_example.py +0 -0
  55. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/examples/pytorch_large_model.py +0 -0
  56. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/examples/shared_kwargs_example.py +0 -0
  57. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/examples/temperature_range_example.py +0 -0
  58. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/examples/test_share_ray.py +0 -0
  59. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/examples/tokenization_example.py +0 -0
  60. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/examples/vision_utils_example.py +0 -0
  61. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/experiments/exp1/dockerfile +0 -0
  62. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/experiments/exp1/run_in_docker.sh +0 -0
  63. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/experiments/exp1/test.png +0 -0
  64. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/experiments/test_read_image.py +0 -0
  65. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/notebooks/README.ipynb +0 -0
  66. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/notebooks/llm_utils/llm_as_a_judge.ipynb +0 -0
  67. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/notebooks/ray_tutorial.ipynb +0 -0
  68. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/notebooks/test_multi_thread.ipynb +0 -0
  69. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/ruff.toml +0 -0
  70. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/scripts/debug_import_time.py +0 -0
  71. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/scripts/deploy.sh +0 -0
  72. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/scripts/imports.sh +0 -0
  73. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/scripts/test_import_time_vision.py +0 -0
  74. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/setup.cfg +0 -0
  75. {speedy_utils-1.1.40/src/datasets → speedy_utils-1.1.42/src/datasets_utils}/convert_to_arrow.py +0 -0
  76. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/chat_format/__init__.py +0 -0
  77. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/chat_format/display.py +0 -0
  78. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/chat_format/transform.py +0 -0
  79. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/chat_format/utils.py +0 -0
  80. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/group_messages.py +0 -0
  81. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/lm/__init__.py +0 -0
  82. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/lm/async_lm/__init__.py +0 -0
  83. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/lm/async_lm/_utils.py +0 -0
  84. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/lm/async_lm/async_llm_task.py +0 -0
  85. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/lm/async_lm/async_lm.py +0 -0
  86. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/lm/async_lm/async_lm_base.py +0 -0
  87. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/lm/async_lm/lm_specific.py +0 -0
  88. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/lm/base_prompt_builder.py +0 -0
  89. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/lm/llm_signature.py +0 -0
  90. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/lm/lm_base.py +0 -0
  91. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/lm/mixins.py +0 -0
  92. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/lm/openai_memoize.py +0 -0
  93. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/lm/signature.py +0 -0
  94. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/lm/utils.py +0 -0
  95. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/scripts/README.md +0 -0
  96. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/scripts/fast_vllm.py +0 -0
  97. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/scripts/vllm_load_balancer.py +0 -0
  98. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/scripts/vllm_serve.py +0 -0
  99. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/vector_cache/__init__.py +0 -0
  100. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/vector_cache/cli.py +0 -0
  101. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/vector_cache/core.py +0 -0
  102. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/vector_cache/types.py +0 -0
  103. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/llm_utils/vector_cache/utils.py +0 -0
  104. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/speedy_utils/__imports.py +0 -0
  105. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/speedy_utils/common/__init__.py +0 -0
  106. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/speedy_utils/common/clock.py +0 -0
  107. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/speedy_utils/common/function_decorator.py +0 -0
  108. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/speedy_utils/common/logger.py +0 -0
  109. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/speedy_utils/common/notebook_utils.py +0 -0
  110. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/speedy_utils/common/patcher.py +0 -0
  111. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/speedy_utils/common/report_manager.py +0 -0
  112. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/speedy_utils/common/utils_cache.py +0 -0
  113. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/speedy_utils/common/utils_misc.py +0 -0
  114. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/speedy_utils/common/utils_print.py +0 -0
  115. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/speedy_utils/multi_worker/thread.py +0 -0
  116. {speedy_utils-1.1.40/src/speedy_utils/multi_worker → speedy_utils-1.1.42/src/speedy_utils/scripts}/__init__.py +0 -0
  117. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/speedy_utils/scripts/openapi_client_codegen.py +0 -0
  118. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/vision_utils/README.md +0 -0
  119. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/vision_utils/__init__.py +0 -0
  120. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/vision_utils/io_utils.py +0 -0
  121. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/src/vision_utils/plot.py +0 -0
  122. /speedy_utils-1.1.40/src/speedy_utils/scripts/__init__.py → /speedy_utils-1.1.42/test2.txt +0 -0
  123. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/tests/import_all.py +0 -0
  124. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/tests/import_time_report.py +0 -0
  125. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/tests/integration_test.py +0 -0
  126. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/tests/sample_objects.py +0 -0
  127. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/tests/test.py +0 -0
  128. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/tests/test_logger.py +0 -0
  129. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/tests/test_logger_format.py +0 -0
  130. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/tests/test_memoize_typing.py +0 -0
  131. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/tests/test_mpython.py +0 -0
  132. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/tests/test_multithread_error_trace.py +0 -0
  133. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/tests/test_process.py +0 -0
  134. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/tests/test_process_update.py +0 -0
  135. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/tests/test_pytorch_sharing.py +0 -0
  136. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/tests/test_shared_kwargs.py +0 -0
  137. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/tests/test_thread.py +0 -0
  138. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/tests/test_tokenization.py +0 -0
  139. {speedy_utils-1.1.40 → speedy_utils-1.1.42}/uv.lock +0 -0
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ if [[ "${SKIP_TAG_HOOK:-}" == "1" ]]; then
5
+ exit 0
6
+ fi
7
+
8
+ remote_name="${1:-}"
9
+ remote_url="${2:-}"
10
+
11
+ # Only tag for GitHub remotes.
12
+ if [[ -z "$remote_url" || "$remote_url" != *"github.com"* ]]; then
13
+ exit 0
14
+ fi
15
+
16
+ today="$(date +%y%m%d)"
17
+
18
+ max_suffix="$(git tag -l "${today}.*" | awk -F. 'NF==2 && $1 ~ /^[0-9]{6}$/ && $2 ~ /^[0-9]+$/ {print $2}' | sort -n | tail -n 1)"
19
+ if [[ -z "${max_suffix}" ]]; then
20
+ next_suffix=1
21
+ else
22
+ next_suffix=$((max_suffix + 1))
23
+ fi
24
+
25
+ tag="${today}.${next_suffix}"
26
+
27
+ if git rev-parse -q --verify "refs/tags/${tag}" >/dev/null; then
28
+ exit 0
29
+ fi
30
+
31
+ git tag "${tag}"
32
+ SKIP_TAG_HOOK=1 git push "${remote_name}" "refs/tags/${tag}"
@@ -122,3 +122,4 @@ dmypy.json
122
122
  .copilot
123
123
  .vscode/settings.json
124
124
  .codegen
125
+ edu_results.json
@@ -0,0 +1,32 @@
1
+ # Repository Guidelines
2
+
3
+ ## Project Structure & Module Organization
4
+
5
+ - `src/` contains `speedy_utils`, `llm_utils`, and `vision_utils` packages.
6
+ - `tests/` holds automated tests; `examples/` and `notebooks/` are usage references.
7
+ - `scripts/` and `experiments/` are for tooling and experiments; keep changes scoped.
8
+ - `docs/` contains documentation assets.
9
+ - `pyproject.toml`, `ruff.toml`, and `bumpversion.sh` define tooling and release helpers.
10
+
11
+ ## Build, Test, and Development Commands
12
+
13
+ - `pip install -e .` installs the package in editable mode.
14
+ - `uv pip install -e .` is a drop-in alternative if you use uv.
15
+ - `python -m pytest` or `pytest tests` runs the test suite.
16
+ - `ruff check .` runs lint rules; `ruff format .` formats code.
17
+
18
+ ## Coding Style & Naming Conventions
19
+
20
+ - Formatting is aligned with Black-style settings (88 char lines) and Ruff rules in `ruff.toml`.
21
+ - Use `snake_case` for Python modules and functions; class names follow `CamelCase`.
22
+ - Keep public APIs exported from `src/*/__init__.py` small and intentional.
23
+
24
+ ## Testing Guidelines
25
+
26
+ - Tests live in `tests/` and should be named `test_*.py`.
27
+ - Prefer pytest-style assertions and keep fixtures near the tests that use them.
28
+
29
+ ## Commit & Pull Request Guidelines
30
+
31
+ - Recent history includes informal messages; prefer concise, descriptive imperatives (e.g., `add cache backend`).
32
+ - PRs should include test results and note any new dependencies or optional extras.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: speedy-utils
3
- Version: 1.1.40
3
+ Version: 1.1.42
4
4
  Summary: Fast and easy-to-use package for data science
5
5
  Project-URL: Homepage, https://github.com/anhvth/speedy
6
6
  Project-URL: Repository, https://github.com/anhvth/speedy
@@ -45,7 +45,8 @@ Requires-Dist: tabulate
45
45
  Requires-Dist: tqdm
46
46
  Requires-Dist: xxhash
47
47
  Provides-Extra: ray
48
- Requires-Dist: ray>=2.49.1; (python_version >= '3.9') and extra == 'ray'
48
+ Requires-Dist: ray[data,llm]>=2.40.0; extra == 'ray'
49
+ Requires-Dist: vllm>=0.6.3; extra == 'ray'
49
50
  Description-Content-Type: text/markdown
50
51
 
51
52
  # Speedy Utils
@@ -0,0 +1,73 @@
1
+ """
2
+ Example: Using LLMRay for distributed offline batch inference.
3
+
4
+ This demonstrates how to process large batches of OpenAI-style messages
5
+ across multiple GPUs in a Ray cluster with automatic data parallelism.
6
+
7
+ Key concepts:
8
+ - dp (data parallel): Number of model replicas
9
+ - tp (tensor parallel): GPUs per replica
10
+ - Total GPUs = dp * tp
11
+ """
12
+ from llm_utils import LLMRay
13
+ from speedy_utils import dump_json_or_pickle
14
+
15
+ # --- Example 1: Simple batch generation ---
16
+ print('=== Example 1: Simple batch generation ===')
17
+
18
+ # Create LLMRay instance
19
+ # - dp=4: 4 model replicas (workers)
20
+ # - tp=2: each replica uses 2 GPUs
21
+ # - Total: 8 GPUs used
22
+ # - If cluster has 16 GPUs across 2 nodes, Ray will distribute automatically
23
+ llm = LLMRay(
24
+ model_name='Qwen/Qwen3-0.6B',
25
+ dp=4,
26
+ tp=2,
27
+ sampling_params={'temperature': 0.7, 'max_tokens': 128},
28
+ )
29
+
30
+ # Prepare messages (OpenAI format: list of message lists)
31
+ messages_list = [
32
+ [{'role': 'user', 'content': 'What is artificial intelligence?'}],
33
+ [{'role': 'user', 'content': 'Explain quantum computing in simple terms.'}],
34
+ [{'role': 'user', 'content': 'Write a haiku about programming.'}],
35
+ [{'role': 'user', 'content': 'What are the benefits of distributed computing?'}],
36
+ ] + [[{'role': 'user', 'content': f'Summarize document {i}'}] for i in range(20)]
37
+
38
+ # Generate responses (automatically distributed across all workers)
39
+ results = llm.generate(messages_list)
40
+
41
+ # Save results
42
+ dump_json_or_pickle(results, 'llm_ray_results.json')
43
+
44
+ print(f'\nProcessed {len(results)} messages')
45
+ print(f'\nSample result:\n{results[0]}')
46
+
47
+
48
+ # --- Example 2: Multi-turn conversation ---
49
+ print('\n=== Example 2: Multi-turn conversation ===')
50
+
51
+ # Multi-turn conversations with system prompts
52
+ inputs = [
53
+ [
54
+ {'role': 'system', 'content': 'You are a creative writer.'},
55
+ {'role': 'user', 'content': 'Write a short story about a robot.'},
56
+ ],
57
+ [
58
+ {'role': 'system', 'content': 'You are a math tutor.'},
59
+ {'role': 'user', 'content': 'What is 2+2?'},
60
+ {'role': 'assistant', 'content': '2+2 equals 4.'},
61
+ {'role': 'user', 'content': 'What about 3+3?'},
62
+ ],
63
+ ]
64
+
65
+ # Process conversations
66
+ results = llm(inputs) # Can also use __call__ syntax
67
+
68
+ for i, result in enumerate(results):
69
+ print(f'\nConversation {i + 1}:')
70
+ print(f'Generated: {result["generated_text"][:100]}...')
71
+
72
+
73
+ print('\n=== All examples completed! ===')
@@ -0,0 +1,61 @@
1
+ import time
2
+ import random
3
+ import ray
4
+ from vllm import LLM, SamplingParams
5
+ from speedy_utils.multi_worker.parallel_gpu_pool import RayWorkerBase, RayRunner
6
+ import os
7
+ ray.init(ignore_reinit_error=True)
8
+
9
+ # --- Define Your Worker ---
10
+ class MyEduWorker(RayWorkerBase):
11
+ def setup(self):
12
+ print(f"Worker {self.worker_id}: Loading vLLM Engine...")
13
+
14
+ # Initialize vLLM
15
+ # Note: Set gpu_memory_utilization based on how many workers share a GPU
16
+ self.model = LLM(
17
+ model="Qwen/Qwen3-0.6B",
18
+ gpu_memory_utilization=0.4, # Adjust based on your GPU pool density
19
+ trust_remote_code=True,
20
+ enforce_eager=True,
21
+
22
+ )
23
+
24
+ # Set default sampling parameters
25
+ self.sampling_params = SamplingParams(
26
+ temperature=0.7,
27
+ top_p=0.9,
28
+ max_tokens=128
29
+ )
30
+
31
+ def process_one_item(self, item):
32
+ # 'item' is the prompt from your all_files list
33
+ prompt = f"Summarize this file metadata: {item}"
34
+
35
+ # vLLM offline generation
36
+ outputs = self.model.generate([prompt], self.sampling_params)
37
+
38
+ # Extract the generated text
39
+ generated_text = outputs[0].outputs[0].text
40
+
41
+ return {
42
+ "file": item,
43
+ "response": generated_text.strip(),
44
+ "worker_id": self.worker_id,
45
+ "gpu_idx": ray.get_runtime_context().get_assigned_resources().get("GPU", []),
46
+ "node_id": ray.get_runtime_context().node_id.hex(),
47
+ "cuda_visible_devices": os.environ.get("CUDA_VISIBLE_DEVICES", "")
48
+ }
49
+
50
+ # --- Run It ---
51
+ # Create fake data (prompts or filenames)
52
+ all_files = [f"document_id_{i}" for i in range(20)]
53
+
54
+ # Set test_mode=False if you want to use real GPUs
55
+ runner = RayRunner(test_mode=False, gpus_per_worker=2)
56
+ results = runner.run(
57
+ worker_class=MyEduWorker,
58
+ all_data=all_files
59
+ )
60
+ from speedy_utils import dump_json_or_pickle
61
+ dump_json_or_pickle(results, "edu_results.json")
@@ -0,0 +1,89 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "de8205ba",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "ename": "ConnectionError",
11
+ "evalue": "Could not find any running Ray instance. Please specify the one to connect to by setting `--address` flag or `RAY_ADDRESS` environment variable.",
12
+ "output_type": "error",
13
+ "traceback": [
14
+ "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
15
+ "\u001b[31mConnectionError\u001b[39m Traceback (most recent call last)",
16
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 24\u001b[39m\n\u001b[32m 20\u001b[39m \u001b[38;5;66;03m# --- Run It ---\u001b[39;00m\n\u001b[32m 21\u001b[39m \u001b[38;5;66;03m# Create fake data\u001b[39;00m\n\u001b[32m 22\u001b[39m all_files = [\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mfile_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mi\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m.pdf\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[32m500\u001b[39m)]\n\u001b[32m---> \u001b[39m\u001b[32m24\u001b[39m \u001b[38;5;28;43;01mwith\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mGPUCluster\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtest_mode\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mas\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mcluster\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m 25\u001b[39m \u001b[43m \u001b[49m\u001b[43mresults\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43mcluster\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 26\u001b[39m \u001b[43m \u001b[49m\u001b[43mworker_class\u001b[49m\u001b[43m=\u001b[49m\u001b[43mMyEduWorker\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 27\u001b[39m \u001b[43m \u001b[49m\u001b[43mall_data\u001b[49m\u001b[43m=\u001b[49m\u001b[43mall_files\u001b[49m\n\u001b[32m 28\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 30\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mProcessing Complete!\u001b[39m\u001b[33m\"\u001b[39m)\n",
17
+ "\u001b[36mFile \u001b[39m\u001b[32m~/projects/speedy_utils/src/speedy_utils/multi_worker/parallel_gpu_pool.py:42\u001b[39m, in \u001b[36mGPUCluster.__enter__\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 39\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n\u001b[32m 41\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m ray.is_initialized():\n\u001b[32m---> \u001b[39m\u001b[32m42\u001b[39m \u001b[43mray\u001b[49m\u001b[43m.\u001b[49m\u001b[43minit\u001b[49m\u001b[43m(\u001b[49m\u001b[43maddress\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mauto\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mignore_reinit_error\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[32m 43\u001b[39m \u001b[38;5;28mself\u001b[39m.is_connected = \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m 45\u001b[39m resources = ray.cluster_resources()\n",
18
+ "\u001b[36mFile \u001b[39m\u001b[32m/mnt/data/anhvth8/venvs/Megatron-Bridge-Host/lib/python3.12/site-packages/ray/_private/client_mode_hook.py:104\u001b[39m, in \u001b[36mclient_mode_hook.<locals>.wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 102\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m func.\u001b[34m__name__\u001b[39m != \u001b[33m\"\u001b[39m\u001b[33minit\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m is_client_mode_enabled_by_default:\n\u001b[32m 103\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(ray, func.\u001b[34m__name__\u001b[39m)(*args, **kwargs)\n\u001b[32m--> \u001b[39m\u001b[32m104\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
19
+ "\u001b[36mFile \u001b[39m\u001b[32m/mnt/data/anhvth8/venvs/Megatron-Bridge-Host/lib/python3.12/site-packages/ray/_private/worker.py:1818\u001b[39m, in \u001b[36minit\u001b[39m\u001b[34m(address, num_cpus, num_gpus, resources, labels, object_store_memory, local_mode, ignore_reinit_error, include_dashboard, dashboard_host, dashboard_port, job_config, configure_logging, logging_level, logging_format, logging_config, log_to_driver, namespace, runtime_env, enable_resource_isolation, system_reserved_cpu, system_reserved_memory, **kwargs)\u001b[39m\n\u001b[32m 1815\u001b[39m job_config.set_py_logging_config(logging_config)\n\u001b[32m 1817\u001b[39m redis_address, gcs_address = \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1818\u001b[39m bootstrap_address = \u001b[43mservices\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcanonicalize_bootstrap_address\u001b[49m\u001b[43m(\u001b[49m\u001b[43maddress\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_temp_dir\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1819\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m bootstrap_address \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 1820\u001b[39m gcs_address = bootstrap_address\n",
20
+ "\u001b[36mFile \u001b[39m\u001b[32m/mnt/data/anhvth8/venvs/Megatron-Bridge-Host/lib/python3.12/site-packages/ray/_private/services.py:532\u001b[39m, in \u001b[36mcanonicalize_bootstrap_address\u001b[39m\u001b[34m(addr, temp_dir)\u001b[39m\n\u001b[32m 521\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"Canonicalizes Ray cluster bootstrap address to host:port.\u001b[39;00m\n\u001b[32m 522\u001b[39m \u001b[33;03mReads address from the environment if needed.\u001b[39;00m\n\u001b[32m 523\u001b[39m \n\u001b[32m (...)\u001b[39m\u001b[32m 529\u001b[39m \u001b[33;03m should start a local Ray instance.\u001b[39;00m\n\u001b[32m 530\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 531\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m addr \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m addr == \u001b[33m\"\u001b[39m\u001b[33mauto\u001b[39m\u001b[33m\"\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m532\u001b[39m addr = \u001b[43mget_ray_address_from_environment\u001b[49m\u001b[43m(\u001b[49m\u001b[43maddr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtemp_dir\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 533\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m addr \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m addr == \u001b[33m\"\u001b[39m\u001b[33mlocal\u001b[39m\u001b[33m\"\u001b[39m:\n\u001b[32m 534\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n",
21
+ "\u001b[36mFile \u001b[39m\u001b[32m/mnt/data/anhvth8/venvs/Megatron-Bridge-Host/lib/python3.12/site-packages/ray/_private/services.py:419\u001b[39m, in \u001b[36mget_ray_address_from_environment\u001b[39m\u001b[34m(addr, temp_dir)\u001b[39m\n\u001b[32m 417\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 418\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m419\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m(\n\u001b[32m 420\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mCould not find any running Ray instance. \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 421\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mPlease specify the one to connect to by setting `--address` flag \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 422\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mor `RAY_ADDRESS` environment variable.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 423\u001b[39m )\n\u001b[32m 425\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m bootstrap_addr\n",
22
+ "\u001b[31mConnectionError\u001b[39m: Could not find any running Ray instance. Please specify the one to connect to by setting `--address` flag or `RAY_ADDRESS` environment variable."
23
+ ]
24
+ }
25
+ ],
26
+ "source": [
27
+ "import time\n",
28
+ "import random\n",
29
+ "# Import the class file we just created\n",
30
+ "from speedy_utils.multi_worker.parallel_gpu_pool import ParallelGPUPool, GPUCluster\n",
31
+ "\n",
32
+ "# --- Define Your Worker ---\n",
33
+ "class MyEduWorker(ParallelGPUPool):\n",
34
+ " def setup(self):\n",
35
+ " # Load your heavy model here\n",
36
+ " print(f\"Worker {self.worker_id}: Loading Model...\")\n",
37
+ " time.sleep(1) # Simulate load\n",
38
+ " \n",
39
+ " def process_one_item(self, item):\n",
40
+ " # Simulate GPU Work\n",
41
+ " time.sleep(random.uniform(0.05, 0.2)) \n",
42
+ " \n",
43
+ " # Return whatever you want (filename, score, etc)\n",
44
+ " return f\"{item}_DONE\"\n",
45
+ "\n",
46
+ "# --- Run It ---\n",
47
+ "# Create fake data\n",
48
+ "all_files = [f\"file_{i}.pdf\" for i in range(500)]\n",
49
+ "\n",
50
+ "cluster = GPUCluster(test_mode=False)\n",
51
+ "results = cluster.run(\n",
52
+ " worker_class=MyEduWorker,\n",
53
+ " all_data=all_files\n",
54
+ ")\n",
55
+ "\n",
56
+ "print(\"Processing Complete!\")"
57
+ ]
58
+ },
59
+ {
60
+ "cell_type": "code",
61
+ "execution_count": null,
62
+ "id": "2b67e6b5",
63
+ "metadata": {},
64
+ "outputs": [],
65
+ "source": []
66
+ }
67
+ ],
68
+ "metadata": {
69
+ "kernelspec": {
70
+ "display_name": "Megatron-Bridge-Host (3.12.12)",
71
+ "language": "python",
72
+ "name": "python3"
73
+ },
74
+ "language_info": {
75
+ "codemirror_mode": {
76
+ "name": "ipython",
77
+ "version": 3
78
+ },
79
+ "file_extension": ".py",
80
+ "mimetype": "text/x-python",
81
+ "name": "python",
82
+ "nbconvert_exporter": "python",
83
+ "pygments_lexer": "ipython3",
84
+ "version": "3.12.12"
85
+ }
86
+ },
87
+ "nbformat": 4,
88
+ "nbformat_minor": 5
89
+ }
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "speedy-utils"
3
- version = "1.1.40"
3
+ version = "1.1.42"
4
4
  description = "Fast and easy-to-use package for data science"
5
5
  authors = [{ name = "AnhVTH", email = "anhvth.226@gmail.com" }]
6
6
  readme = "README.md"
@@ -53,7 +53,10 @@ Homepage = "https://github.com/anhvth/speedy"
53
53
  Repository = "https://github.com/anhvth/speedy"
54
54
 
55
55
  [project.optional-dependencies]
56
- ray = ["ray>=2.49.1; python_version >= '3.9'"]
56
+ ray = [
57
+ "vllm>=0.6.3",
58
+ "ray[data,llm]>=2.40.0",
59
+ ]
57
60
 
58
61
  [project.scripts]
59
62
  mpython = "speedy_utils.scripts.mpython:main"
@@ -0,0 +1,31 @@
1
+ from os import getpid
2
+ from random import choices
3
+ from time import sleep
4
+
5
+ from speedy_utils import multi_process
6
+
7
+
8
+ def square_value(number: int) -> int:
9
+ """Example worker that squares a number while logging occasionally."""
10
+ if choices([True, False], weights=[0.5, 0.9])[0]:
11
+ print(f"Processing {number} in process {getpid()}")
12
+ sleep(0.1)
13
+ return number * number
14
+
15
+
16
+ def main() -> None:
17
+ """Demonstrate running :func:`square_value` across several worker processes."""
18
+ values = list(range(100))
19
+ worker_count = 4
20
+
21
+ multi_process(
22
+ square_value,
23
+ values,
24
+ workers=worker_count,
25
+ backend='mp',
26
+ log_worker='first',
27
+ )
28
+
29
+
30
+ if __name__ == "__main__":
31
+ main()
@@ -12,6 +12,7 @@ from llm_utils.lm import (
12
12
  from llm_utils.lm.base_prompt_builder import BasePromptBuilder
13
13
  from llm_utils.lm.lm_base import get_model_name
14
14
  from llm_utils.lm.openai_memoize import MOpenAI
15
+ from llm_utils.llm_ray import LLMRay
15
16
  from llm_utils.vector_cache import VectorCache
16
17
 
17
18
 
@@ -57,6 +58,7 @@ __all__ = [
57
58
  "AsyncLM",
58
59
  "AsyncLLMTask",
59
60
  "LLM",
61
+ "LLMRay",
60
62
  "MOpenAI",
61
63
  "get_model_name",
62
64
  "VectorCache",