speedy-utils 1.1.46__py3-none-any.whl → 1.1.47__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_utils/lm/llm.py +41 -12
- speedy_utils/__init__.py +4 -0
- speedy_utils/multi_worker/__init__.py +4 -0
- speedy_utils/multi_worker/_multi_process.py +425 -0
- speedy_utils/multi_worker/_multi_process_ray.py +308 -0
- speedy_utils/multi_worker/common.py +879 -0
- speedy_utils/multi_worker/dataset_sharding.py +203 -0
- speedy_utils/multi_worker/process.py +53 -1234
- speedy_utils/multi_worker/progress.py +71 -1
- speedy_utils/multi_worker/thread.py +45 -0
- speedy_utils/scripts/mpython.py +19 -12
- {speedy_utils-1.1.46.dist-info → speedy_utils-1.1.47.dist-info}/METADATA +1 -1
- {speedy_utils-1.1.46.dist-info → speedy_utils-1.1.47.dist-info}/RECORD +15 -11
- {speedy_utils-1.1.46.dist-info → speedy_utils-1.1.47.dist-info}/WHEEL +0 -0
- {speedy_utils-1.1.46.dist-info → speedy_utils-1.1.47.dist-info}/entry_points.txt +0 -0
|
@@ -4,12 +4,15 @@ Real-time progress tracking for distributed Ray tasks.
|
|
|
4
4
|
This module provides a ProgressActor that allows workers to report item-level
|
|
5
5
|
progress in real-time, giving users visibility into actual items processed
|
|
6
6
|
rather than just task completion.
|
|
7
|
+
|
|
8
|
+
Users can simply call report_progress(n) from within their worker functions,
|
|
9
|
+
and speedy_utils will automatically route it to the centralized tracker.
|
|
7
10
|
"""
|
|
8
11
|
import time
|
|
9
12
|
import threading
|
|
10
13
|
from typing import Optional, Callable
|
|
11
14
|
|
|
12
|
-
__all__ = ['ProgressActor', 'create_progress_tracker', 'get_ray_progress_actor']
|
|
15
|
+
__all__ = ['ProgressActor', 'create_progress_tracker', 'get_ray_progress_actor', 'report_progress', 'set_progress_context']
|
|
13
16
|
|
|
14
17
|
|
|
15
18
|
def get_ray_progress_actor():
|
|
@@ -119,6 +122,73 @@ class ProgressPoller:
|
|
|
119
122
|
if self._thread:
|
|
120
123
|
self._thread.join(timeout=2.0)
|
|
121
124
|
|
|
125
|
+
def _poll_loop(self):
|
|
126
|
+
"""Background loop that polls progress and updates tqdm bar."""
|
|
127
|
+
while not self._stop_event.is_set():
|
|
128
|
+
try:
|
|
129
|
+
stats = self._ray.get(self.progress_actor.get_progress.remote())
|
|
130
|
+
self.pbar.n = stats["processed"]
|
|
131
|
+
self.pbar.refresh()
|
|
132
|
+
except Exception:
|
|
133
|
+
pass
|
|
134
|
+
self._stop_event.wait(self.poll_interval)
|
|
135
|
+
|
|
136
|
+
# Final update
|
|
137
|
+
try:
|
|
138
|
+
stats = self._ray.get(self.progress_actor.get_progress.remote())
|
|
139
|
+
self.pbar.n = stats["processed"]
|
|
140
|
+
self.pbar.refresh()
|
|
141
|
+
except Exception:
|
|
142
|
+
pass
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
# Thread-local storage for progress context
|
|
146
|
+
_progress_context = threading.local()
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def set_progress_context(progress_actor):
|
|
150
|
+
"""
|
|
151
|
+
Set the progress actor in thread-local storage.
|
|
152
|
+
|
|
153
|
+
This is called automatically by speedy_utils when setting up worker context.
|
|
154
|
+
Users don't need to call this directly.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
progress_actor: Ray actor handle for progress tracking
|
|
158
|
+
"""
|
|
159
|
+
_progress_context.actor = progress_actor
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def report_progress(n: int = 1):
|
|
163
|
+
"""
|
|
164
|
+
Report progress to the centralized tracker.
|
|
165
|
+
|
|
166
|
+
This is a simple helper that workers can call to report items processed.
|
|
167
|
+
Works automatically when called from within multi_process with Ray backend.
|
|
168
|
+
Safe to call even when no progress tracking is active (no-op).
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
n: Number of items processed
|
|
172
|
+
|
|
173
|
+
Example:
|
|
174
|
+
def process_data(items):
|
|
175
|
+
for i, item in enumerate(items):
|
|
176
|
+
result = expensive_operation(item)
|
|
177
|
+
|
|
178
|
+
# Report progress every 100 items
|
|
179
|
+
if i % 100 == 0:
|
|
180
|
+
report_progress(100)
|
|
181
|
+
|
|
182
|
+
return results
|
|
183
|
+
"""
|
|
184
|
+
actor = getattr(_progress_context, 'actor', None)
|
|
185
|
+
if actor is not None:
|
|
186
|
+
try:
|
|
187
|
+
import ray
|
|
188
|
+
ray.get(actor.update.remote(n))
|
|
189
|
+
except Exception:
|
|
190
|
+
# Silently ignore progress reporting errors
|
|
191
|
+
pass
|
|
122
192
|
def _poll_loop(self):
|
|
123
193
|
"""Poll the progress actor and update tqdm."""
|
|
124
194
|
while not self._stop_event.is_set():
|
|
@@ -26,6 +26,9 @@ DEFAULT_WORKERS = (os.cpu_count() or 4) * 2
|
|
|
26
26
|
T = TypeVar('T')
|
|
27
27
|
R = TypeVar('R')
|
|
28
28
|
|
|
29
|
+
if TYPE_CHECKING:
|
|
30
|
+
from typing import Literal, overload
|
|
31
|
+
|
|
29
32
|
SPEEDY_RUNNING_THREADS: list[threading.Thread] = [] # cooperative shutdown tracking
|
|
30
33
|
_SPEEDY_THREADS_LOCK = threading.Lock()
|
|
31
34
|
|
|
@@ -342,6 +345,48 @@ def _cancel_futures(inflight: set[Future[Any]]) -> None:
|
|
|
342
345
|
# ────────────────────────────────────────────────────────────
|
|
343
346
|
# main API
|
|
344
347
|
# ────────────────────────────────────────────────────────────
|
|
348
|
+
if TYPE_CHECKING:
|
|
349
|
+
@overload
|
|
350
|
+
def multi_thread(
|
|
351
|
+
func: Callable[[T], R],
|
|
352
|
+
inputs: Iterable[T],
|
|
353
|
+
*,
|
|
354
|
+
workers: int | None = ...,
|
|
355
|
+
batch: int = ...,
|
|
356
|
+
ordered: bool = ...,
|
|
357
|
+
progress: bool = ...,
|
|
358
|
+
progress_update: int = ...,
|
|
359
|
+
prefetch_factor: int = ...,
|
|
360
|
+
timeout: float | None = ...,
|
|
361
|
+
stop_on_error: bool | None = ...,
|
|
362
|
+
error_handler: Literal['raise'] = ...,
|
|
363
|
+
max_error_files: int = ...,
|
|
364
|
+
n_proc: int = ...,
|
|
365
|
+
store_output_pkl_file: str | None = ...,
|
|
366
|
+
**fixed_kwargs: Any,
|
|
367
|
+
) -> list[R]: ...
|
|
368
|
+
|
|
369
|
+
@overload
|
|
370
|
+
def multi_thread(
|
|
371
|
+
func: Callable[[T], R],
|
|
372
|
+
inputs: Iterable[T],
|
|
373
|
+
*,
|
|
374
|
+
workers: int | None = ...,
|
|
375
|
+
batch: int = ...,
|
|
376
|
+
ordered: bool = ...,
|
|
377
|
+
progress: bool = ...,
|
|
378
|
+
progress_update: int = ...,
|
|
379
|
+
prefetch_factor: int = ...,
|
|
380
|
+
timeout: float | None = ...,
|
|
381
|
+
stop_on_error: bool | None = ...,
|
|
382
|
+
error_handler: Literal['ignore', 'log'] = ...,
|
|
383
|
+
max_error_files: int = ...,
|
|
384
|
+
n_proc: int = ...,
|
|
385
|
+
store_output_pkl_file: str | None = ...,
|
|
386
|
+
**fixed_kwargs: Any,
|
|
387
|
+
) -> list[R | None]: ...
|
|
388
|
+
|
|
389
|
+
|
|
345
390
|
def multi_thread(
|
|
346
391
|
func: Callable[[T], R],
|
|
347
392
|
inputs: Iterable[T],
|
speedy_utils/scripts/mpython.py
CHANGED
|
@@ -9,11 +9,12 @@ import shutil
|
|
|
9
9
|
import subprocess
|
|
10
10
|
import sys
|
|
11
11
|
|
|
12
|
+
|
|
12
13
|
try:
|
|
13
14
|
from rich.console import Console, Group
|
|
14
15
|
from rich.panel import Panel
|
|
15
|
-
from rich.text import Text
|
|
16
16
|
from rich.syntax import Syntax
|
|
17
|
+
from rich.text import Text
|
|
17
18
|
except ImportError:
|
|
18
19
|
Console = None
|
|
19
20
|
Group = None
|
|
@@ -81,7 +82,7 @@ def assert_script(python_path):
|
|
|
81
82
|
)
|
|
82
83
|
if Console and Panel and Text and Syntax and Group:
|
|
83
84
|
console = Console(stderr=True, force_terminal=True)
|
|
84
|
-
syntax = Syntax(helper_code,
|
|
85
|
+
syntax = Syntax(helper_code, 'python', theme='monokai', line_numbers=False)
|
|
85
86
|
console.print()
|
|
86
87
|
console.print(
|
|
87
88
|
Panel(
|
|
@@ -93,29 +94,35 @@ def assert_script(python_path):
|
|
|
93
94
|
)
|
|
94
95
|
)
|
|
95
96
|
console.print()
|
|
96
|
-
console.print(
|
|
97
|
+
console.print('```python')
|
|
97
98
|
console.print(syntax)
|
|
98
|
-
console.print(
|
|
99
|
-
console.print(
|
|
99
|
+
console.print('```')
|
|
100
|
+
console.print('-' * 80)
|
|
100
101
|
else:
|
|
101
102
|
# Fallback to plain text
|
|
102
|
-
print(
|
|
103
|
+
print(
|
|
104
|
+
f'Warning: MP_ID and MP_TOTAL not found in {python_path}, please add them.',
|
|
105
|
+
file=sys.stderr,
|
|
106
|
+
)
|
|
103
107
|
print(f'Example:\n{helper_code}', file=sys.stderr)
|
|
104
108
|
|
|
105
109
|
|
|
106
110
|
def run_in_tmux(commands_to_run, tmux_name, num_windows):
|
|
107
111
|
with open('/tmp/start_multirun_tmux.sh', 'w') as script_file:
|
|
108
112
|
script_file.write('#!/bin/bash\n\n')
|
|
109
|
-
|
|
113
|
+
# Use -n to name the first window explicitly (avoids base-index issues)
|
|
114
|
+
script_file.write(f'tmux new-session -d -s {tmux_name} -n w0\n')
|
|
110
115
|
for i, cmd in enumerate(itertools.cycle(commands_to_run)):
|
|
111
116
|
if i >= num_windows:
|
|
112
117
|
break
|
|
113
|
-
window_name = f'{
|
|
118
|
+
window_name = f'w{i}'
|
|
119
|
+
target = f'{tmux_name}:{window_name}'
|
|
114
120
|
if i == 0:
|
|
115
|
-
script_file.write(f"tmux send-keys -t {
|
|
121
|
+
script_file.write(f"tmux send-keys -t {target} '{cmd}' C-m\n")
|
|
116
122
|
else:
|
|
117
|
-
|
|
118
|
-
script_file.write(f
|
|
123
|
+
# Create window with explicit name, then send keys to it
|
|
124
|
+
script_file.write(f'tmux new-window -t {tmux_name} -n {window_name}\n')
|
|
125
|
+
script_file.write(f"tmux send-keys -t {target} '{cmd}' C-m\n")
|
|
119
126
|
|
|
120
127
|
# Make the script executable
|
|
121
128
|
script_file.write('chmod +x /tmp/start_multirun_tmux.sh\n')
|
|
@@ -135,7 +142,7 @@ def main():
|
|
|
135
142
|
parser = argparse.ArgumentParser(
|
|
136
143
|
description='Process fold arguments',
|
|
137
144
|
epilog=f'Helper code for multi-process sharding:\n{helper_code}',
|
|
138
|
-
formatter_class=argparse.RawDescriptionHelpFormatter
|
|
145
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
139
146
|
)
|
|
140
147
|
parser.add_argument(
|
|
141
148
|
'--total_fold', '-t', default=16, type=int, help='total number of folds'
|
|
@@ -7,7 +7,7 @@ llm_utils/chat_format/transform.py,sha256=PJ2g9KT1GSbWuAs7giEbTpTAffpU9QsIXyRlbf
|
|
|
7
7
|
llm_utils/chat_format/utils.py,sha256=M2EctZ6NeHXqFYufh26Y3CpSphN0bdZm5xoNaEJj5vg,1251
|
|
8
8
|
llm_utils/lm/__init__.py,sha256=4jYMy3wPH3tg-tHFyWEWOqrnmX4Tu32VZCdzRGMGQsI,778
|
|
9
9
|
llm_utils/lm/base_prompt_builder.py,sha256=_TzYMsWr-SsbA_JNXptUVN56lV5RfgWWTrFi-E8LMy4,12337
|
|
10
|
-
llm_utils/lm/llm.py,sha256=
|
|
10
|
+
llm_utils/lm/llm.py,sha256=KEtrHq5D8ZkeD4iTc_zPgOtgVXpsCRA-A3fmWgYCz0w,21378
|
|
11
11
|
llm_utils/lm/llm_signature.py,sha256=vV8uZgLLd6ZKqWbq0OPywWvXAfl7hrJQnbtBF-VnZRU,1244
|
|
12
12
|
llm_utils/lm/lm_base.py,sha256=Bk3q34KrcCK_bC4Ryxbc3KqkiPL39zuVZaBQ1i6wJqs,9437
|
|
13
13
|
llm_utils/lm/mixins.py,sha256=Nz7CwJFBOvbZNbODUlJC04Pcbac3zWnT8vy7sZG_MVI,24906
|
|
@@ -30,7 +30,7 @@ llm_utils/vector_cache/core.py,sha256=VXuYJy1AX22NHKvIXRriETip5RrmQcNp73-g-ZT774
|
|
|
30
30
|
llm_utils/vector_cache/types.py,sha256=CpMZanJSTeBVxQSqjBq6pBVWp7u2-JRcgY9t5jhykdQ,438
|
|
31
31
|
llm_utils/vector_cache/utils.py,sha256=OsiRFydv8i8HiJtPL9hh40aUv8I5pYfg2zvmtDi4DME,1446
|
|
32
32
|
speedy_utils/__imports.py,sha256=V0YzkDK4-QkK_IDXY1be6C6_STuNhXAKIp4_dM0coQs,7800
|
|
33
|
-
speedy_utils/__init__.py,sha256=
|
|
33
|
+
speedy_utils/__init__.py,sha256=4RLsC0s9sAkGWcwC85eQQ1HT2Ea_flhbBQUCJuzJAmk,3429
|
|
34
34
|
speedy_utils/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
35
|
speedy_utils/common/clock.py,sha256=raLtMGIgzrRej5kUt7hOUm2ZZw2THVPo-q8dMvdZOxw,7354
|
|
36
36
|
speedy_utils/common/function_decorator.py,sha256=GKXqRs_hHFFmhyhql0Br0o52WzekUnpNlm99NfaVwgY,2025
|
|
@@ -43,21 +43,25 @@ speedy_utils/common/utils_error.py,sha256=KQx2JTZsvsX2DsKRIoVR-4rc-6-l3OzEz9UtnH
|
|
|
43
43
|
speedy_utils/common/utils_io.py,sha256=94m_EZ2eIs3w2m0rx-QQWsREPpVJctpweYHco3byczQ,15876
|
|
44
44
|
speedy_utils/common/utils_misc.py,sha256=ZRJCS7OJxybpVm1sasoeCYRW2TaaGCXj4DySYlQeVR8,2227
|
|
45
45
|
speedy_utils/common/utils_print.py,sha256=AGDB7mgJnO00QkJBH6kJb46738q3GzMUZPwtQ248vQw,4763
|
|
46
|
-
speedy_utils/multi_worker/__init__.py,sha256=
|
|
46
|
+
speedy_utils/multi_worker/__init__.py,sha256=JzPNJtYgBgSlAG6yBU0HhFcEwwCrH3iWeo-W7HZ9Pxs,503
|
|
47
|
+
speedy_utils/multi_worker/_multi_process.py,sha256=0ceoXZZAdfSVVqZAgtCLw7Oq_ZLh6y0i6OkPgx6cicc,13343
|
|
48
|
+
speedy_utils/multi_worker/_multi_process_ray.py,sha256=Qri-jbfHMLxUyiV5I2-KehTj1XM2YxagXWMesFkyzBg,9868
|
|
49
|
+
speedy_utils/multi_worker/common.py,sha256=e5FVKtPGrIw4s4-sA_SIzaIFaTeVK598jXBC5U8YcLU,27014
|
|
47
50
|
speedy_utils/multi_worker/dataset_ray.py,sha256=U_l_4Y7CVpaHiApsXQSdNvals8NK87LHPS_XHiJF3qs,10044
|
|
51
|
+
speedy_utils/multi_worker/dataset_sharding.py,sha256=dWGCQiElb8SvLHfh97xT-QA2csmf2W3s8S4cFjZrO9Y,7196
|
|
48
52
|
speedy_utils/multi_worker/parallel_gpu_pool.py,sha256=A7llZcQbRVZqwCqNRku7TpqGCdSoIzpdcTaupgqT5nI,6108
|
|
49
|
-
speedy_utils/multi_worker/process.py,sha256=
|
|
50
|
-
speedy_utils/multi_worker/progress.py,sha256=
|
|
51
|
-
speedy_utils/multi_worker/thread.py,sha256=
|
|
53
|
+
speedy_utils/multi_worker/process.py,sha256=1P-7J93jt51lMfpHLD9F8DDKAdeMtyYIFVOKEiN5nok,3927
|
|
54
|
+
speedy_utils/multi_worker/progress.py,sha256=V-BN4GSmwOwL_8Y0J7Ya-byJUQXvuYcdqkmQqGHDaVw,7004
|
|
55
|
+
speedy_utils/multi_worker/thread.py,sha256=iVjg4SI3GjzA5DhasT0w29U61fKnWXoLqCgoqQPzyBA,28528
|
|
52
56
|
speedy_utils/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
57
|
speedy_utils/scripts/kill_mpython.py,sha256=xYdJIs1KQ4zlyj-E4XN6j1T7dOhzVo65uKzjXmIvUDU,1847
|
|
54
|
-
speedy_utils/scripts/mpython.py,sha256=
|
|
58
|
+
speedy_utils/scripts/mpython.py,sha256=b_Zbytrht2oYfV45P2nl7u5gNsYp4qQ868Pz8QReSD4,6741
|
|
55
59
|
speedy_utils/scripts/openapi_client_codegen.py,sha256=GModmmhkvGnxljK4KczyixKDrk-VEcLaW5I0XT6tzWo,9657
|
|
56
60
|
vision_utils/README.md,sha256=AIDZZj8jo_QNrEjFyHwd00iOO431s-js-M2dLtVTn3I,5740
|
|
57
61
|
vision_utils/__init__.py,sha256=hF54sT6FAxby8kDVhOvruy4yot8O-Ateey5n96O1pQM,284
|
|
58
62
|
vision_utils/io_utils.py,sha256=pI0Va6miesBysJcllK6NXCay8HpGZsaMWwlsKB2DMgA,26510
|
|
59
63
|
vision_utils/plot.py,sha256=HkNj3osA3moPuupP1VguXfPPOW614dZO5tvC-EFKpKM,12028
|
|
60
|
-
speedy_utils-1.1.
|
|
61
|
-
speedy_utils-1.1.
|
|
62
|
-
speedy_utils-1.1.
|
|
63
|
-
speedy_utils-1.1.
|
|
64
|
+
speedy_utils-1.1.47.dist-info/METADATA,sha256=Ol3PRn5VGCiozaLocWrOLiYdO6nTHoEmBYhWODVfIi4,13073
|
|
65
|
+
speedy_utils-1.1.47.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
66
|
+
speedy_utils-1.1.47.dist-info/entry_points.txt,sha256=QY_2Vn6IcPCaqlY74pDRyZ6UTvPilaNPT7Gxijj7XI8,343
|
|
67
|
+
speedy_utils-1.1.47.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|