speedy-utils 1.1.40__tar.gz → 1.1.43__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. speedy_utils-1.1.43/.githooks/pre-push +32 -0
  2. speedy_utils-1.1.43/.github/prompts/improveParallelErrorHandling.prompt.md +64 -0
  3. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/.gitignore +1 -0
  4. speedy_utils-1.1.43/AGENTS.md +32 -0
  5. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/PKG-INFO +5 -3
  6. speedy_utils-1.1.43/examples/llm_ray_example.py +73 -0
  7. speedy_utils-1.1.43/examples/test_parallel_gpu.py +61 -0
  8. speedy_utils-1.1.43/notebooks/parallel_gpu_pool.ipynb +89 -0
  9. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/pyproject.toml +7 -3
  10. speedy_utils-1.1.43/scripts/bug.py +34 -0
  11. speedy_utils-1.1.43/scripts/bug_simple.py +11 -0
  12. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/scripts/debug_import_time.py +80 -18
  13. speedy_utils-1.1.43/scripts/test.py +26 -0
  14. speedy_utils-1.1.43/scripts/test_both_backends.py +25 -0
  15. speedy_utils-1.1.43/scripts/test_error_handling.py +37 -0
  16. speedy_utils-1.1.43/scripts/test_locals.py +19 -0
  17. speedy_utils-1.1.43/scripts/test_ray_locals.py +11 -0
  18. speedy_utils-1.1.43/scripts/test_ray_mp.py +31 -0
  19. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/__init__.py +2 -0
  20. speedy_utils-1.1.43/src/llm_utils/llm_ray.py +370 -0
  21. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/lm/llm.py +36 -29
  22. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/speedy_utils/__init__.py +10 -0
  23. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/speedy_utils/common/utils_io.py +3 -1
  24. speedy_utils-1.1.43/src/speedy_utils/multi_worker/__init__.py +12 -0
  25. speedy_utils-1.1.43/src/speedy_utils/multi_worker/dataset_ray.py +303 -0
  26. speedy_utils-1.1.43/src/speedy_utils/multi_worker/parallel_gpu_pool.py +178 -0
  27. speedy_utils-1.1.43/src/speedy_utils/multi_worker/process.py +1302 -0
  28. speedy_utils-1.1.43/src/speedy_utils/multi_worker/progress.py +140 -0
  29. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/speedy_utils/multi_worker/thread.py +202 -42
  30. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/speedy_utils/scripts/mpython.py +49 -4
  31. speedy_utils-1.1.43/test_s3.py +34 -0
  32. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/tests/llm_utils/test_llm_mixins.py +22 -0
  33. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/uv.lock +5247 -2937
  34. speedy_utils-1.1.40/src/speedy_utils/multi_worker/process.py +0 -399
  35. speedy_utils-1.1.40/src/speedy_utils/scripts/__init__.py +0 -0
  36. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/.github/copilot-instructions.md +0 -0
  37. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/.github/skills/caching-utilities/SKILL.md +0 -0
  38. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/.github/skills/caching-utilities/examples/caching_example.py +0 -0
  39. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/.github/skills/io-utilities/SKILL.md +0 -0
  40. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/.github/skills/io-utilities/examples/io_example.py +0 -0
  41. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/.github/skills/llm-integration/SKILL.md +0 -0
  42. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/.github/skills/llm-integration/examples/llm_example.py +0 -0
  43. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/.github/skills/multi-threading-processing/SKILL.md +0 -0
  44. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/.github/skills/ray-distributed-computing/SKILL.md +0 -0
  45. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/.github/skills/skill-creation/SKILL.md +0 -0
  46. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/.github/skills/vision-utilities/SKILL.md +0 -0
  47. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/.github/skills/vision-utilities/examples/vision_example.py +0 -0
  48. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/.github/workflows/publish.yml +0 -0
  49. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/.pre-commit-config.yaml +0 -0
  50. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/README.md +0 -0
  51. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/bumpversion.sh +0 -0
  52. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/debug/debug_generate_response.py +0 -0
  53. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/debug/debug_n_param.py +0 -0
  54. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/debug/debug_n_structure.py +0 -0
  55. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/debug/integration_test.py +0 -0
  56. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/debug/test_decode_api.py +0 -0
  57. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/debug/test_endpoints.py +0 -0
  58. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/debug/test_generate.py +0 -0
  59. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/debug/test_generate_endpoint.py +0 -0
  60. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/docs/GENERATE_QUICKREF.md +0 -0
  61. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/docs/IMPLEMENTATION.md +0 -0
  62. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/docs/QUICKSTART.md +0 -0
  63. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/docs/TOKENIZATION.md +0 -0
  64. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/docs/TOKENIZATION_IMPLEMENTATION.md +0 -0
  65. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/docs/zero_copy_sharing.md +0 -0
  66. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/examples/generate_example.py +0 -0
  67. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/examples/pytorch_large_model.py +0 -0
  68. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/examples/shared_kwargs_example.py +0 -0
  69. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/examples/temperature_range_example.py +0 -0
  70. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/examples/test_share_ray.py +0 -0
  71. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/examples/tokenization_example.py +0 -0
  72. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/examples/vision_utils_example.py +0 -0
  73. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/experiments/exp1/dockerfile +0 -0
  74. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/experiments/exp1/run_in_docker.sh +0 -0
  75. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/experiments/exp1/test.png +0 -0
  76. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/experiments/test_read_image.py +0 -0
  77. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/notebooks/README.ipynb +0 -0
  78. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/notebooks/llm_utils/llm_as_a_judge.ipynb +0 -0
  79. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/notebooks/ray_tutorial.ipynb +0 -0
  80. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/notebooks/test_multi_thread.ipynb +0 -0
  81. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/ruff.toml +0 -0
  82. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/scripts/deploy.sh +0 -0
  83. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/scripts/imports.sh +0 -0
  84. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/scripts/test_import_time_vision.py +0 -0
  85. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/setup.cfg +0 -0
  86. {speedy_utils-1.1.40/src/datasets → speedy_utils-1.1.43/src/datasets_utils}/convert_to_arrow.py +0 -0
  87. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/chat_format/__init__.py +0 -0
  88. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/chat_format/display.py +0 -0
  89. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/chat_format/transform.py +0 -0
  90. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/chat_format/utils.py +0 -0
  91. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/group_messages.py +0 -0
  92. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/lm/__init__.py +0 -0
  93. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/lm/async_lm/__init__.py +0 -0
  94. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/lm/async_lm/_utils.py +0 -0
  95. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/lm/async_lm/async_llm_task.py +0 -0
  96. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/lm/async_lm/async_lm.py +0 -0
  97. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/lm/async_lm/async_lm_base.py +0 -0
  98. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/lm/async_lm/lm_specific.py +0 -0
  99. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/lm/base_prompt_builder.py +0 -0
  100. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/lm/llm_signature.py +0 -0
  101. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/lm/lm_base.py +0 -0
  102. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/lm/mixins.py +0 -0
  103. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/lm/openai_memoize.py +0 -0
  104. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/lm/signature.py +0 -0
  105. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/lm/utils.py +0 -0
  106. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/scripts/README.md +0 -0
  107. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/scripts/fast_vllm.py +0 -0
  108. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/scripts/vllm_load_balancer.py +0 -0
  109. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/scripts/vllm_serve.py +0 -0
  110. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/vector_cache/__init__.py +0 -0
  111. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/vector_cache/cli.py +0 -0
  112. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/vector_cache/core.py +0 -0
  113. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/vector_cache/types.py +0 -0
  114. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/llm_utils/vector_cache/utils.py +0 -0
  115. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/speedy_utils/__imports.py +0 -0
  116. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/speedy_utils/common/__init__.py +0 -0
  117. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/speedy_utils/common/clock.py +0 -0
  118. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/speedy_utils/common/function_decorator.py +0 -0
  119. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/speedy_utils/common/logger.py +0 -0
  120. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/speedy_utils/common/notebook_utils.py +0 -0
  121. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/speedy_utils/common/patcher.py +0 -0
  122. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/speedy_utils/common/report_manager.py +0 -0
  123. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/speedy_utils/common/utils_cache.py +0 -0
  124. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/speedy_utils/common/utils_misc.py +0 -0
  125. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/speedy_utils/common/utils_print.py +0 -0
  126. {speedy_utils-1.1.40/src/speedy_utils/multi_worker → speedy_utils-1.1.43/src/speedy_utils/scripts}/__init__.py +0 -0
  127. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/speedy_utils/scripts/openapi_client_codegen.py +0 -0
  128. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/vision_utils/README.md +0 -0
  129. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/vision_utils/__init__.py +0 -0
  130. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/vision_utils/io_utils.py +0 -0
  131. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/src/vision_utils/plot.py +0 -0
  132. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/tests/import_all.py +0 -0
  133. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/tests/import_time_report.py +0 -0
  134. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/tests/integration_test.py +0 -0
  135. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/tests/sample_objects.py +0 -0
  136. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/tests/test.py +0 -0
  137. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/tests/test_logger.py +0 -0
  138. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/tests/test_logger_format.py +0 -0
  139. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/tests/test_memoize_typing.py +0 -0
  140. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/tests/test_mpython.py +0 -0
  141. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/tests/test_multithread_error_trace.py +0 -0
  142. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/tests/test_process.py +0 -0
  143. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/tests/test_process_update.py +0 -0
  144. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/tests/test_pytorch_sharing.py +0 -0
  145. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/tests/test_shared_kwargs.py +0 -0
  146. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/tests/test_thread.py +0 -0
  147. {speedy_utils-1.1.40 → speedy_utils-1.1.43}/tests/test_tokenization.py +0 -0
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ if [[ "${SKIP_TAG_HOOK:-}" == "1" ]]; then
5
+ exit 0
6
+ fi
7
+
8
+ remote_name="${1:-}"
9
+ remote_url="${2:-}"
10
+
11
+ # Only tag for GitHub remotes.
12
+ if [[ -z "$remote_url" || "$remote_url" != *"github.com"* ]]; then
13
+ exit 0
14
+ fi
15
+
16
+ today="$(date +%y%m%d)"
17
+
18
+ max_suffix="$(git tag -l "${today}.*" | awk -F. 'NF==2 && $1 ~ /^[0-9]{6}$/ && $2 ~ /^[0-9]+$/ {print $2}' | sort -n | tail -n 1)"
19
+ if [[ -z "${max_suffix}" ]]; then
20
+ next_suffix=1
21
+ else
22
+ next_suffix=$((max_suffix + 1))
23
+ fi
24
+
25
+ tag="${today}.${next_suffix}"
26
+
27
+ if git rev-parse -q --verify "refs/tags/${tag}" >/dev/null; then
28
+ exit 0
29
+ fi
30
+
31
+ git tag "${tag}"
32
+ SKIP_TAG_HOOK=1 git push "${remote_name}" "refs/tags/${tag}"
@@ -0,0 +1,64 @@
1
+ ---
2
+ name: improveParallelErrorHandling
3
+ description: Enhance error tracebacks in parallel execution with rich formatting and context
4
+ argument-hint: the parallel execution function and backend type
5
+ ---
6
+
7
+ Improve error handling for the specified parallel execution function to provide clean, user-focused tracebacks similar to direct function calls.
8
+
9
+ ## Requirements
10
+
11
+ 1. **Filter Internal Frames**: Remove framework/library internal frames from tracebacks, showing only user code
12
+ 2. **Add Context Lines**: Display 3 lines before and after each error location with line numbers
13
+ 3. **Include Caller Frame**: Show where the parallel execution function was called, not just where the error occurred
14
+ 4. **Rich Formatting**: Use rich library's Panel/formatting for clean, readable output
15
+ 5. **Suppress Noise**: Set environment variables or flags to suppress verbose framework error logs
16
+
17
+ ## Implementation Steps
18
+
19
+ 1. **Capture Caller Context**: Use `inspect.currentframe().f_back` to capture where the parallel function was called (filename, line number, function name)
20
+
21
+ 2. **Wrap Error Handling**: Catch framework-specific exceptions (e.g., `RayTaskError`, thread exceptions) in the execution loop
22
+
23
+ 3. **Parse/Extract Original Exception**: Get the underlying user exception from the framework wrapper
24
+ - Extract exception type, message, and traceback information
25
+ - Parse from string representation if traceback objects aren't preserved
26
+
27
+ 4. **Filter Frames**: Skip frames matching internal paths:
28
+ - Framework internals (e.g., `ray/_private`, `concurrent/futures`)
29
+ - Library worker implementations (e.g., `speedy_utils/multi_worker`)
30
+ - Site-packages for the framework
31
+
32
+ 5. **Format with Context**:
33
+ - For each user frame, show: `filepath:lineno in function_name`
34
+ - Use `linecache.getline()` to retrieve surrounding lines
35
+ - Highlight the error line with `❱` marker
36
+ - Number all lines (e.g., ` 4 │ code here` or ` 5 ❱ error here`)
37
+
38
+ 6. **Display Caller Frame First**: Show where the parallel function was invoked before showing the actual error location
39
+
40
+ 7. **Clean Exit**: Flush output streams before exiting to ensure traceback displays
41
+
42
+ ## Example Output Format
43
+
44
+ ```
45
+ ╭─────────────── Traceback (most recent call last) ───────────────╮
46
+ │ /path/to/user/script.py:42 in main │
47
+ │ │
48
+ │ 40 │ data = load_data() │
49
+ │ 41 │ # Process in parallel │
50
+ │ 42 ❱ results = multi_process(process_item, data, workers=8) │
51
+ │ 43 │ │
52
+ │ │
53
+ │ /path/to/user/module.py:15 in process_item │
54
+ │ │
55
+ │ 12 │ def process_item(item): │
56
+ │ 13 │ value = item['key'] │
57
+ │ 14 │ denominator = value - 100 │
58
+ │ 15 ❱ return 1 / denominator │
59
+ │ 16 │ │
60
+ ╰──────────────────────────────────────────────────────────────────╯
61
+ ZeroDivisionError: division by zero
62
+ ```
63
+
64
+ Apply these improvements to the specified parallel execution function, ensuring error messages are as clear as direct function calls while maintaining all performance benefits of parallel execution.
@@ -122,3 +122,4 @@ dmypy.json
122
122
  .copilot
123
123
  .vscode/settings.json
124
124
  .codegen
125
+ edu_results.json
@@ -0,0 +1,32 @@
1
+ # Repository Guidelines
2
+
3
+ ## Project Structure & Module Organization
4
+
5
+ - `src/` contains `speedy_utils`, `llm_utils`, and `vision_utils` packages.
6
+ - `tests/` holds automated tests; `examples/` and `notebooks/` are usage references.
7
+ - `scripts/` and `experiments/` are for tooling and experiments; keep changes scoped.
8
+ - `docs/` contains documentation assets.
9
+ - `pyproject.toml`, `ruff.toml`, and `bumpversion.sh` define tooling and release helpers.
10
+
11
+ ## Build, Test, and Development Commands
12
+
13
+ - `pip install -e .` installs the package in editable mode.
14
+ - `uv pip install -e .` is a drop-in alternative if you use uv.
15
+ - `python -m pytest` or `pytest tests` runs the test suite.
16
+ - `ruff check .` runs lint rules; `ruff format .` formats code.
17
+
18
+ ## Coding Style & Naming Conventions
19
+
20
+ - Formatting is aligned with Black-style settings (88 char lines) and Ruff rules in `ruff.toml`.
21
+ - Use `snake_case` for Python modules and functions; class names follow `CamelCase`.
22
+ - Keep public APIs exported from `src/*/__init__.py` small and intentional.
23
+
24
+ ## Testing Guidelines
25
+
26
+ - Tests live in `tests/` and should be named `test_*.py`.
27
+ - Prefer pytest-style assertions and keep fixtures near the tests that use them.
28
+
29
+ ## Commit & Pull Request Guidelines
30
+
31
+ - Recent history includes informal messages; prefer concise, descriptive imperatives (e.g., `add cache backend`).
32
+ - PRs should include test results and note any new dependencies or optional extras.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: speedy-utils
3
- Version: 1.1.40
3
+ Version: 1.1.43
4
4
  Summary: Fast and easy-to-use package for data science
5
5
  Project-URL: Homepage, https://github.com/anhvth/speedy
6
6
  Project-URL: Repository, https://github.com/anhvth/speedy
@@ -17,7 +17,7 @@ Classifier: Programming Language :: Python :: 3.11
17
17
  Classifier: Programming Language :: Python :: 3.12
18
18
  Classifier: Programming Language :: Python :: 3.13
19
19
  Classifier: Programming Language :: Python :: 3.14
20
- Requires-Python: >=3.8
20
+ Requires-Python: >=3.9
21
21
  Requires-Dist: aiohttp
22
22
  Requires-Dist: bump2version
23
23
  Requires-Dist: cachetools
@@ -39,13 +39,15 @@ Requires-Dist: pydantic
39
39
  Requires-Dist: pytest
40
40
  Requires-Dist: ray
41
41
  Requires-Dist: requests
42
+ Requires-Dist: rich>=14.3.1
42
43
  Requires-Dist: ruff
43
44
  Requires-Dist: scikit-learn
44
45
  Requires-Dist: tabulate
45
46
  Requires-Dist: tqdm
46
47
  Requires-Dist: xxhash
47
48
  Provides-Extra: ray
48
- Requires-Dist: ray>=2.49.1; (python_version >= '3.9') and extra == 'ray'
49
+ Requires-Dist: ray[data,llm]>=2.40.0; extra == 'ray'
50
+ Requires-Dist: vllm>=0.6.3; extra == 'ray'
49
51
  Description-Content-Type: text/markdown
50
52
 
51
53
  # Speedy Utils
@@ -0,0 +1,73 @@
1
+ """
2
+ Example: Using LLMRay for distributed offline batch inference.
3
+
4
+ This demonstrates how to process large batches of OpenAI-style messages
5
+ across multiple GPUs in a Ray cluster with automatic data parallelism.
6
+
7
+ Key concepts:
8
+ - dp (data parallel): Number of model replicas
9
+ - tp (tensor parallel): GPUs per replica
10
+ - Total GPUs = dp * tp
11
+ """
12
+ from llm_utils import LLMRay
13
+ from speedy_utils import dump_json_or_pickle
14
+
15
+ # --- Example 1: Simple batch generation ---
16
+ print('=== Example 1: Simple batch generation ===')
17
+
18
+ # Create LLMRay instance
19
+ # - dp=4: 4 model replicas (workers)
20
+ # - tp=2: each replica uses 2 GPUs
21
+ # - Total: 8 GPUs used
22
+ # - If cluster has 16 GPUs across 2 nodes, Ray will distribute automatically
23
+ llm = LLMRay(
24
+ model_name='Qwen/Qwen3-0.6B',
25
+ dp=4,
26
+ tp=2,
27
+ sampling_params={'temperature': 0.7, 'max_tokens': 128},
28
+ )
29
+
30
+ # Prepare messages (OpenAI format: list of message lists)
31
+ messages_list = [
32
+ [{'role': 'user', 'content': 'What is artificial intelligence?'}],
33
+ [{'role': 'user', 'content': 'Explain quantum computing in simple terms.'}],
34
+ [{'role': 'user', 'content': 'Write a haiku about programming.'}],
35
+ [{'role': 'user', 'content': 'What are the benefits of distributed computing?'}],
36
+ ] + [[{'role': 'user', 'content': f'Summarize document {i}'}] for i in range(20)]
37
+
38
+ # Generate responses (automatically distributed across all workers)
39
+ results = llm.generate(messages_list)
40
+
41
+ # Save results
42
+ dump_json_or_pickle(results, 'llm_ray_results.json')
43
+
44
+ print(f'\nProcessed {len(results)} messages')
45
+ print(f'\nSample result:\n{results[0]}')
46
+
47
+
48
+ # --- Example 2: Multi-turn conversation ---
49
+ print('\n=== Example 2: Multi-turn conversation ===')
50
+
51
+ # Multi-turn conversations with system prompts
52
+ inputs = [
53
+ [
54
+ {'role': 'system', 'content': 'You are a creative writer.'},
55
+ {'role': 'user', 'content': 'Write a short story about a robot.'},
56
+ ],
57
+ [
58
+ {'role': 'system', 'content': 'You are a math tutor.'},
59
+ {'role': 'user', 'content': 'What is 2+2?'},
60
+ {'role': 'assistant', 'content': '2+2 equals 4.'},
61
+ {'role': 'user', 'content': 'What about 3+3?'},
62
+ ],
63
+ ]
64
+
65
+ # Process conversations
66
+ results = llm(inputs) # Can also use __call__ syntax
67
+
68
+ for i, result in enumerate(results):
69
+ print(f'\nConversation {i + 1}:')
70
+ print(f'Generated: {result["generated_text"][:100]}...')
71
+
72
+
73
+ print('\n=== All examples completed! ===')
@@ -0,0 +1,61 @@
1
+ import time
2
+ import random
3
+ import ray
4
+ from vllm import LLM, SamplingParams
5
+ from speedy_utils.multi_worker.parallel_gpu_pool import RayWorkerBase, RayRunner
6
+ import os
7
+ ray.init(ignore_reinit_error=True)
8
+
9
+ # --- Define Your Worker ---
10
+ class MyEduWorker(RayWorkerBase):
11
+ def setup(self):
12
+ print(f"Worker {self.worker_id}: Loading vLLM Engine...")
13
+
14
+ # Initialize vLLM
15
+ # Note: Set gpu_memory_utilization based on how many workers share a GPU
16
+ self.model = LLM(
17
+ model="Qwen/Qwen3-0.6B",
18
+ gpu_memory_utilization=0.4, # Adjust based on your GPU pool density
19
+ trust_remote_code=True,
20
+ enforce_eager=True,
21
+
22
+ )
23
+
24
+ # Set default sampling parameters
25
+ self.sampling_params = SamplingParams(
26
+ temperature=0.7,
27
+ top_p=0.9,
28
+ max_tokens=128
29
+ )
30
+
31
+ def process_one_item(self, item):
32
+ # 'item' is the prompt from your all_files list
33
+ prompt = f"Summarize this file metadata: {item}"
34
+
35
+ # vLLM offline generation
36
+ outputs = self.model.generate([prompt], self.sampling_params)
37
+
38
+ # Extract the generated text
39
+ generated_text = outputs[0].outputs[0].text
40
+
41
+ return {
42
+ "file": item,
43
+ "response": generated_text.strip(),
44
+ "worker_id": self.worker_id,
45
+ "gpu_idx": ray.get_runtime_context().get_assigned_resources().get("GPU", []),
46
+ "node_id": ray.get_runtime_context().node_id.hex(),
47
+ "cuda_visible_devices": os.environ.get("CUDA_VISIBLE_DEVICES", "")
48
+ }
49
+
50
+ # --- Run It ---
51
+ # Create fake data (prompts or filenames)
52
+ all_files = [f"document_id_{i}" for i in range(20)]
53
+
54
+ # Set test_mode=False if you want to use real GPUs
55
+ runner = RayRunner(test_mode=False, gpus_per_worker=2)
56
+ results = runner.run(
57
+ worker_class=MyEduWorker,
58
+ all_data=all_files
59
+ )
60
+ from speedy_utils import dump_json_or_pickle
61
+ dump_json_or_pickle(results, "edu_results.json")
@@ -0,0 +1,89 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "de8205ba",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "ename": "ConnectionError",
11
+ "evalue": "Could not find any running Ray instance. Please specify the one to connect to by setting `--address` flag or `RAY_ADDRESS` environment variable.",
12
+ "output_type": "error",
13
+ "traceback": [
14
+ "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
15
+ "\u001b[31mConnectionError\u001b[39m Traceback (most recent call last)",
16
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 24\u001b[39m\n\u001b[32m 20\u001b[39m \u001b[38;5;66;03m# --- Run It ---\u001b[39;00m\n\u001b[32m 21\u001b[39m \u001b[38;5;66;03m# Create fake data\u001b[39;00m\n\u001b[32m 22\u001b[39m all_files = [\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mfile_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mi\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m.pdf\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[32m500\u001b[39m)]\n\u001b[32m---> \u001b[39m\u001b[32m24\u001b[39m \u001b[38;5;28;43;01mwith\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mGPUCluster\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtest_mode\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mas\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mcluster\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m 25\u001b[39m \u001b[43m \u001b[49m\u001b[43mresults\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43mcluster\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 26\u001b[39m \u001b[43m \u001b[49m\u001b[43mworker_class\u001b[49m\u001b[43m=\u001b[49m\u001b[43mMyEduWorker\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 27\u001b[39m \u001b[43m \u001b[49m\u001b[43mall_data\u001b[49m\u001b[43m=\u001b[49m\u001b[43mall_files\u001b[49m\n\u001b[32m 28\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 30\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mProcessing Complete!\u001b[39m\u001b[33m\"\u001b[39m)\n",
17
+ "\u001b[36mFile \u001b[39m\u001b[32m~/projects/speedy_utils/src/speedy_utils/multi_worker/parallel_gpu_pool.py:42\u001b[39m, in \u001b[36mGPUCluster.__enter__\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 39\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n\u001b[32m 41\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m ray.is_initialized():\n\u001b[32m---> \u001b[39m\u001b[32m42\u001b[39m \u001b[43mray\u001b[49m\u001b[43m.\u001b[49m\u001b[43minit\u001b[49m\u001b[43m(\u001b[49m\u001b[43maddress\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mauto\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mignore_reinit_error\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[32m 43\u001b[39m \u001b[38;5;28mself\u001b[39m.is_connected = \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m 45\u001b[39m resources = ray.cluster_resources()\n",
18
+ "\u001b[36mFile \u001b[39m\u001b[32m/mnt/data/anhvth8/venvs/Megatron-Bridge-Host/lib/python3.12/site-packages/ray/_private/client_mode_hook.py:104\u001b[39m, in \u001b[36mclient_mode_hook.<locals>.wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 102\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m func.\u001b[34m__name__\u001b[39m != \u001b[33m\"\u001b[39m\u001b[33minit\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m is_client_mode_enabled_by_default:\n\u001b[32m 103\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(ray, func.\u001b[34m__name__\u001b[39m)(*args, **kwargs)\n\u001b[32m--> \u001b[39m\u001b[32m104\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
19
+ "\u001b[36mFile \u001b[39m\u001b[32m/mnt/data/anhvth8/venvs/Megatron-Bridge-Host/lib/python3.12/site-packages/ray/_private/worker.py:1818\u001b[39m, in \u001b[36minit\u001b[39m\u001b[34m(address, num_cpus, num_gpus, resources, labels, object_store_memory, local_mode, ignore_reinit_error, include_dashboard, dashboard_host, dashboard_port, job_config, configure_logging, logging_level, logging_format, logging_config, log_to_driver, namespace, runtime_env, enable_resource_isolation, system_reserved_cpu, system_reserved_memory, **kwargs)\u001b[39m\n\u001b[32m 1815\u001b[39m job_config.set_py_logging_config(logging_config)\n\u001b[32m 1817\u001b[39m redis_address, gcs_address = \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1818\u001b[39m bootstrap_address = \u001b[43mservices\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcanonicalize_bootstrap_address\u001b[49m\u001b[43m(\u001b[49m\u001b[43maddress\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_temp_dir\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1819\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m bootstrap_address \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 1820\u001b[39m gcs_address = bootstrap_address\n",
20
+ "\u001b[36mFile \u001b[39m\u001b[32m/mnt/data/anhvth8/venvs/Megatron-Bridge-Host/lib/python3.12/site-packages/ray/_private/services.py:532\u001b[39m, in \u001b[36mcanonicalize_bootstrap_address\u001b[39m\u001b[34m(addr, temp_dir)\u001b[39m\n\u001b[32m 521\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"Canonicalizes Ray cluster bootstrap address to host:port.\u001b[39;00m\n\u001b[32m 522\u001b[39m \u001b[33;03mReads address from the environment if needed.\u001b[39;00m\n\u001b[32m 523\u001b[39m \n\u001b[32m (...)\u001b[39m\u001b[32m 529\u001b[39m \u001b[33;03m should start a local Ray instance.\u001b[39;00m\n\u001b[32m 530\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 531\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m addr \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m addr == \u001b[33m\"\u001b[39m\u001b[33mauto\u001b[39m\u001b[33m\"\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m532\u001b[39m addr = \u001b[43mget_ray_address_from_environment\u001b[49m\u001b[43m(\u001b[49m\u001b[43maddr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtemp_dir\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 533\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m addr \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m addr == \u001b[33m\"\u001b[39m\u001b[33mlocal\u001b[39m\u001b[33m\"\u001b[39m:\n\u001b[32m 534\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n",
21
+ "\u001b[36mFile \u001b[39m\u001b[32m/mnt/data/anhvth8/venvs/Megatron-Bridge-Host/lib/python3.12/site-packages/ray/_private/services.py:419\u001b[39m, in \u001b[36mget_ray_address_from_environment\u001b[39m\u001b[34m(addr, temp_dir)\u001b[39m\n\u001b[32m 417\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 418\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m419\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m(\n\u001b[32m 420\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mCould not find any running Ray instance. \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 421\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mPlease specify the one to connect to by setting `--address` flag \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 422\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mor `RAY_ADDRESS` environment variable.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 423\u001b[39m )\n\u001b[32m 425\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m bootstrap_addr\n",
22
+ "\u001b[31mConnectionError\u001b[39m: Could not find any running Ray instance. Please specify the one to connect to by setting `--address` flag or `RAY_ADDRESS` environment variable."
23
+ ]
24
+ }
25
+ ],
26
+ "source": [
27
+ "import time\n",
28
+ "import random\n",
29
+ "# Import the class file we just created\n",
30
+ "from speedy_utils.multi_worker.parallel_gpu_pool import ParallelGPUPool, GPUCluster\n",
31
+ "\n",
32
+ "# --- Define Your Worker ---\n",
33
+ "class MyEduWorker(ParallelGPUPool):\n",
34
+ " def setup(self):\n",
35
+ " # Load your heavy model here\n",
36
+ " print(f\"Worker {self.worker_id}: Loading Model...\")\n",
37
+ " time.sleep(1) # Simulate load\n",
38
+ " \n",
39
+ " def process_one_item(self, item):\n",
40
+ " # Simulate GPU Work\n",
41
+ " time.sleep(random.uniform(0.05, 0.2)) \n",
42
+ " \n",
43
+ " # Return whatever you want (filename, score, etc)\n",
44
+ " return f\"{item}_DONE\"\n",
45
+ "\n",
46
+ "# --- Run It ---\n",
47
+ "# Create fake data\n",
48
+ "all_files = [f\"file_{i}.pdf\" for i in range(500)]\n",
49
+ "\n",
50
+ "cluster = GPUCluster(test_mode=False)\n",
51
+ "results = cluster.run(\n",
52
+ " worker_class=MyEduWorker,\n",
53
+ " all_data=all_files\n",
54
+ ")\n",
55
+ "\n",
56
+ "print(\"Processing Complete!\")"
57
+ ]
58
+ },
59
+ {
60
+ "cell_type": "code",
61
+ "execution_count": null,
62
+ "id": "2b67e6b5",
63
+ "metadata": {},
64
+ "outputs": [],
65
+ "source": []
66
+ }
67
+ ],
68
+ "metadata": {
69
+ "kernelspec": {
70
+ "display_name": "Megatron-Bridge-Host (3.12.12)",
71
+ "language": "python",
72
+ "name": "python3"
73
+ },
74
+ "language_info": {
75
+ "codemirror_mode": {
76
+ "name": "ipython",
77
+ "version": 3
78
+ },
79
+ "file_extension": ".py",
80
+ "mimetype": "text/x-python",
81
+ "name": "python",
82
+ "nbconvert_exporter": "python",
83
+ "pygments_lexer": "ipython3",
84
+ "version": "3.12.12"
85
+ }
86
+ },
87
+ "nbformat": 4,
88
+ "nbformat_minor": 5
89
+ }
@@ -1,11 +1,11 @@
1
1
  [project]
2
2
  name = "speedy-utils"
3
- version = "1.1.40"
3
+ version = "1.1.43"
4
4
  description = "Fast and easy-to-use package for data science"
5
5
  authors = [{ name = "AnhVTH", email = "anhvth.226@gmail.com" }]
6
6
  readme = "README.md"
7
7
  license = { text = "MIT" }
8
- requires-python = ">=3.8"
8
+ requires-python = ">=3.9"
9
9
  dependencies = [
10
10
  "numpy",
11
11
  "requests",
@@ -33,6 +33,7 @@ dependencies = [
33
33
  "ray",
34
34
  "aiohttp",
35
35
  "pytest",
36
+ "rich>=14.3.1",
36
37
  ]
37
38
  classifiers = [
38
39
  "Development Status :: 4 - Beta",
@@ -53,7 +54,10 @@ Homepage = "https://github.com/anhvth/speedy"
53
54
  Repository = "https://github.com/anhvth/speedy"
54
55
 
55
56
  [project.optional-dependencies]
56
- ray = ["ray>=2.49.1; python_version >= '3.9'"]
57
+ ray = [
58
+ "vllm>=0.6.3",
59
+ "ray[data,llm]>=2.40.0",
60
+ ]
57
61
 
58
62
  [project.scripts]
59
63
  mpython = "speedy_utils.scripts.mpython:main"
@@ -0,0 +1,34 @@
1
+ # type: ignore
2
+ from speedy_utils import multi_process, multi_thread
3
+
4
+
5
+ def do_something(x):
6
+ if x % 3 == 0:
7
+ raise ValueError(f'Error at index {x}')
8
+ return x * 2
9
+
10
+
11
+ inputs = range(10)
12
+
13
+
14
+ if __name__ == '__main__':
15
+ print('Testing error_handler="log" with mp backend:')
16
+ results = multi_process(
17
+ do_something,
18
+ inputs,
19
+ backend='mp',
20
+ error_handler='log',
21
+ max_error_files=5,
22
+ )
23
+ print(f'Results: {results}')
24
+ print()
25
+
26
+ # print('Testing error_handler="log" with multi_thread:')
27
+ # results = multi_thread(
28
+ # do_something,
29
+ # inputs,
30
+ # error_handler='log',
31
+ # max_error_files=5,
32
+ # )
33
+ # print(f'Results: {results}')
34
+
@@ -0,0 +1,11 @@
1
+ from speedy_utils import *
2
+
3
+
4
+ def do_something(x):
5
+ x = 10
6
+ y = 0
7
+ x/y
8
+
9
+
10
+ do_something(1)
11
+