speedy-utils 1.1.45__py3-none-any.whl → 1.1.47__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,12 +4,15 @@ Real-time progress tracking for distributed Ray tasks.
4
4
  This module provides a ProgressActor that allows workers to report item-level
5
5
  progress in real-time, giving users visibility into actual items processed
6
6
  rather than just task completion.
7
+
8
+ Users can simply call report_progress(n) from within their worker functions,
9
+ and speedy_utils will automatically route it to the centralized tracker.
7
10
  """
8
11
  import time
9
12
  import threading
10
13
  from typing import Optional, Callable
11
14
 
12
- __all__ = ['ProgressActor', 'create_progress_tracker', 'get_ray_progress_actor']
15
+ __all__ = ['ProgressActor', 'create_progress_tracker', 'get_ray_progress_actor', 'report_progress', 'set_progress_context']
13
16
 
14
17
 
15
18
  def get_ray_progress_actor():
@@ -119,6 +122,73 @@ class ProgressPoller:
119
122
  if self._thread:
120
123
  self._thread.join(timeout=2.0)
121
124
 
125
+ def _poll_loop(self):
126
+ """Background loop that polls progress and updates tqdm bar."""
127
+ while not self._stop_event.is_set():
128
+ try:
129
+ stats = self._ray.get(self.progress_actor.get_progress.remote())
130
+ self.pbar.n = stats["processed"]
131
+ self.pbar.refresh()
132
+ except Exception:
133
+ pass
134
+ self._stop_event.wait(self.poll_interval)
135
+
136
+ # Final update
137
+ try:
138
+ stats = self._ray.get(self.progress_actor.get_progress.remote())
139
+ self.pbar.n = stats["processed"]
140
+ self.pbar.refresh()
141
+ except Exception:
142
+ pass
143
+
144
+
145
+ # Thread-local storage for progress context
146
+ _progress_context = threading.local()
147
+
148
+
149
+ def set_progress_context(progress_actor):
150
+ """
151
+ Set the progress actor in thread-local storage.
152
+
153
+ This is called automatically by speedy_utils when setting up worker context.
154
+ Users don't need to call this directly.
155
+
156
+ Args:
157
+ progress_actor: Ray actor handle for progress tracking
158
+ """
159
+ _progress_context.actor = progress_actor
160
+
161
+
162
+ def report_progress(n: int = 1):
163
+ """
164
+ Report progress to the centralized tracker.
165
+
166
+ This is a simple helper that workers can call to report items processed.
167
+ Works automatically when called from within multi_process with Ray backend.
168
+ Safe to call even when no progress tracking is active (no-op).
169
+
170
+ Args:
171
+ n: Number of items processed
172
+
173
+ Example:
174
+ def process_data(items):
175
+ for i, item in enumerate(items):
176
+ result = expensive_operation(item)
177
+
178
+ # Report progress every 100 items
179
+ if i % 100 == 0:
180
+ report_progress(100)
181
+
182
+ return results
183
+ """
184
+ actor = getattr(_progress_context, 'actor', None)
185
+ if actor is not None:
186
+ try:
187
+ import ray
188
+ ray.get(actor.update.remote(n))
189
+ except Exception:
190
+ # Silently ignore progress reporting errors
191
+ pass
122
192
  def _poll_loop(self):
123
193
  """Poll the progress actor and update tqdm."""
124
194
  while not self._stop_event.is_set():
@@ -26,6 +26,9 @@ DEFAULT_WORKERS = (os.cpu_count() or 4) * 2
26
26
  T = TypeVar('T')
27
27
  R = TypeVar('R')
28
28
 
29
+ if TYPE_CHECKING:
30
+ from typing import Literal, overload
31
+
29
32
  SPEEDY_RUNNING_THREADS: list[threading.Thread] = [] # cooperative shutdown tracking
30
33
  _SPEEDY_THREADS_LOCK = threading.Lock()
31
34
 
@@ -342,6 +345,48 @@ def _cancel_futures(inflight: set[Future[Any]]) -> None:
342
345
  # ────────────────────────────────────────────────────────────
343
346
  # main API
344
347
  # ────────────────────────────────────────────────────────────
348
+ if TYPE_CHECKING:
349
+ @overload
350
+ def multi_thread(
351
+ func: Callable[[T], R],
352
+ inputs: Iterable[T],
353
+ *,
354
+ workers: int | None = ...,
355
+ batch: int = ...,
356
+ ordered: bool = ...,
357
+ progress: bool = ...,
358
+ progress_update: int = ...,
359
+ prefetch_factor: int = ...,
360
+ timeout: float | None = ...,
361
+ stop_on_error: bool | None = ...,
362
+ error_handler: Literal['raise'] = ...,
363
+ max_error_files: int = ...,
364
+ n_proc: int = ...,
365
+ store_output_pkl_file: str | None = ...,
366
+ **fixed_kwargs: Any,
367
+ ) -> list[R]: ...
368
+
369
+ @overload
370
+ def multi_thread(
371
+ func: Callable[[T], R],
372
+ inputs: Iterable[T],
373
+ *,
374
+ workers: int | None = ...,
375
+ batch: int = ...,
376
+ ordered: bool = ...,
377
+ progress: bool = ...,
378
+ progress_update: int = ...,
379
+ prefetch_factor: int = ...,
380
+ timeout: float | None = ...,
381
+ stop_on_error: bool | None = ...,
382
+ error_handler: Literal['ignore', 'log'] = ...,
383
+ max_error_files: int = ...,
384
+ n_proc: int = ...,
385
+ store_output_pkl_file: str | None = ...,
386
+ **fixed_kwargs: Any,
387
+ ) -> list[R | None]: ...
388
+
389
+
345
390
  def multi_thread(
346
391
  func: Callable[[T], R],
347
392
  inputs: Iterable[T],
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env python3
2
+ """Script to kill all tmux sessions matching 'mpython*' pattern."""
3
+
4
+ import subprocess
5
+ import sys
6
+
7
+
8
+ def main():
9
+ """Kill all tmux sessions with names starting with 'mpython'."""
10
+ try:
11
+ # Get list of tmux sessions matching the pattern
12
+ result = subprocess.run(
13
+ ["tmux", "ls"],
14
+ capture_output=True,
15
+ text=True
16
+ )
17
+ if result.returncode != 0:
18
+ if "no server running" in result.stderr.lower():
19
+ print("No tmux server running.")
20
+ return
21
+ print(f"Error listing tmux sessions: {result.stderr}")
22
+ sys.exit(result.returncode)
23
+
24
+ sessions = []
25
+ for line in result.stdout.strip().split('\n'):
26
+ if line.strip():
27
+ session_name = line.split(':')[0]
28
+ if session_name.startswith('mpython'):
29
+ sessions.append(session_name)
30
+
31
+ if not sessions:
32
+ print("No tmux sessions found matching 'mpython*'")
33
+ return
34
+
35
+ print(f"Found {len(sessions)} tmux session(s) to kill: {', '.join(sessions)}")
36
+
37
+ # Kill each session
38
+ for session in sessions:
39
+ kill_result = subprocess.run(
40
+ ["tmux", "kill-session", "-t", session],
41
+ capture_output=True,
42
+ text=True
43
+ )
44
+ if kill_result.returncode == 0:
45
+ print(f"Successfully killed tmux session '{session}'")
46
+ else:
47
+ print(f"Error killing tmux session '{session}': {kill_result.stderr}")
48
+
49
+ except FileNotFoundError:
50
+ print("Error: tmux command not found. Please ensure tmux is installed.")
51
+ sys.exit(1)
52
+ except Exception as e:
53
+ print(f"Unexpected error: {e}")
54
+ sys.exit(1)
55
+
56
+
57
+ if __name__ == "__main__":
58
+ main()
@@ -7,6 +7,20 @@ import re
7
7
  import shlex # To properly escape command line arguments
8
8
  import shutil
9
9
  import subprocess
10
+ import sys
11
+
12
+
13
+ try:
14
+ from rich.console import Console, Group
15
+ from rich.panel import Panel
16
+ from rich.syntax import Syntax
17
+ from rich.text import Text
18
+ except ImportError:
19
+ Console = None
20
+ Group = None
21
+ Panel = None
22
+ Text = None
23
+ Syntax = None
10
24
 
11
25
 
12
26
  taskset_path = shutil.which('taskset')
@@ -60,33 +74,55 @@ def assert_script(python_path):
60
74
  with open(python_path) as f:
61
75
  code_str = f.read()
62
76
  if 'MP_ID' not in code_str or 'MP_TOTAL' not in code_str:
63
- example_code = (
64
- 'import os; MP_TOTAL = int(os.environ.get("MP_TOTAL"));MP_ID = int(os.environ.get("MP_ID"))\n'
77
+ helper_code = (
78
+ 'import os\n'
79
+ 'MP_ID = int(os.getenv("MP_ID", "0"))\n'
80
+ 'MP_TOTAL = int(os.getenv("MP_TOTAL", "1"))\n'
65
81
  'inputs = list(inputs[MP_ID::MP_TOTAL])'
66
82
  )
67
- # ANSI escape codes for coloring
68
- YELLOW = '\033[93m'
69
- RESET = '\033[0m'
70
- raise_msg = (
71
- f'MP_ID and MP_TOTAL not found in {python_path}, please add them.\n\n'
72
- f'Example:\n{YELLOW}{example_code}{RESET}'
73
- )
74
- raise Exception(raise_msg)
83
+ if Console and Panel and Text and Syntax and Group:
84
+ console = Console(stderr=True, force_terminal=True)
85
+ syntax = Syntax(helper_code, 'python', theme='monokai', line_numbers=False)
86
+ console.print()
87
+ console.print(
88
+ Panel(
89
+ f'Your script {python_path} is missing MP_ID and/or MP_TOTAL variables.\n\n'
90
+ f'Add the following code to enable multi-process sharding:',
91
+ title='[bold yellow]Warning: Missing Multi-Process Variables[/bold yellow]',
92
+ border_style='yellow',
93
+ expand=False,
94
+ )
95
+ )
96
+ console.print()
97
+ console.print('```python')
98
+ console.print(syntax)
99
+ console.print('```')
100
+ console.print('-' * 80)
101
+ else:
102
+ # Fallback to plain text
103
+ print(
104
+ f'Warning: MP_ID and MP_TOTAL not found in {python_path}, please add them.',
105
+ file=sys.stderr,
106
+ )
107
+ print(f'Example:\n{helper_code}', file=sys.stderr)
75
108
 
76
109
 
77
110
  def run_in_tmux(commands_to_run, tmux_name, num_windows):
78
111
  with open('/tmp/start_multirun_tmux.sh', 'w') as script_file:
79
112
  script_file.write('#!/bin/bash\n\n')
80
- script_file.write(f'tmux new-session -d -s {tmux_name}\n')
113
+ # Use -n to name the first window explicitly (avoids base-index issues)
114
+ script_file.write(f'tmux new-session -d -s {tmux_name} -n w0\n')
81
115
  for i, cmd in enumerate(itertools.cycle(commands_to_run)):
82
116
  if i >= num_windows:
83
117
  break
84
- window_name = f'{tmux_name}:{i}'
118
+ window_name = f'w{i}'
119
+ target = f'{tmux_name}:{window_name}'
85
120
  if i == 0:
86
- script_file.write(f"tmux send-keys -t {window_name} '{cmd}' C-m\n")
121
+ script_file.write(f"tmux send-keys -t {target} '{cmd}' C-m\n")
87
122
  else:
88
- script_file.write(f'tmux new-window -t {tmux_name}\n')
89
- script_file.write(f"tmux send-keys -t {window_name} '{cmd}' C-m\n")
123
+ # Create window with explicit name, then send keys to it
124
+ script_file.write(f'tmux new-window -t {tmux_name} -n {window_name}\n')
125
+ script_file.write(f"tmux send-keys -t {target} '{cmd}' C-m\n")
90
126
 
91
127
  # Make the script executable
92
128
  script_file.write('chmod +x /tmp/start_multirun_tmux.sh\n')
@@ -96,7 +132,18 @@ def run_in_tmux(commands_to_run, tmux_name, num_windows):
96
132
  def main():
97
133
  # Assert that MP_ID and MP_TOTAL are not already set
98
134
 
99
- parser = argparse.ArgumentParser(description='Process fold arguments')
135
+ helper_code = (
136
+ 'import os\n'
137
+ 'MP_ID = int(os.getenv("MP_ID", "0"))\n'
138
+ 'MP_TOTAL = int(os.getenv("MP_TOTAL", "1"))\n'
139
+ 'inputs = list(inputs[MP_ID::MP_TOTAL])'
140
+ )
141
+
142
+ parser = argparse.ArgumentParser(
143
+ description='Process fold arguments',
144
+ epilog=f'Helper code for multi-process sharding:\n{helper_code}',
145
+ formatter_class=argparse.RawDescriptionHelpFormatter,
146
+ )
100
147
  parser.add_argument(
101
148
  '--total_fold', '-t', default=16, type=int, help='total number of folds'
102
149
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: speedy-utils
3
- Version: 1.1.45
3
+ Version: 1.1.47
4
4
  Summary: Fast and easy-to-use package for data science
5
5
  Project-URL: Homepage, https://github.com/anhvth/speedy
6
6
  Project-URL: Repository, https://github.com/anhvth/speedy
@@ -7,7 +7,7 @@ llm_utils/chat_format/transform.py,sha256=PJ2g9KT1GSbWuAs7giEbTpTAffpU9QsIXyRlbf
7
7
  llm_utils/chat_format/utils.py,sha256=M2EctZ6NeHXqFYufh26Y3CpSphN0bdZm5xoNaEJj5vg,1251
8
8
  llm_utils/lm/__init__.py,sha256=4jYMy3wPH3tg-tHFyWEWOqrnmX4Tu32VZCdzRGMGQsI,778
9
9
  llm_utils/lm/base_prompt_builder.py,sha256=_TzYMsWr-SsbA_JNXptUVN56lV5RfgWWTrFi-E8LMy4,12337
10
- llm_utils/lm/llm.py,sha256=i6L5aKF6NhzmaFPBA2pCm8TkQmS1nCgORMqP5QyfJ28,20097
10
+ llm_utils/lm/llm.py,sha256=KEtrHq5D8ZkeD4iTc_zPgOtgVXpsCRA-A3fmWgYCz0w,21378
11
11
  llm_utils/lm/llm_signature.py,sha256=vV8uZgLLd6ZKqWbq0OPywWvXAfl7hrJQnbtBF-VnZRU,1244
12
12
  llm_utils/lm/lm_base.py,sha256=Bk3q34KrcCK_bC4Ryxbc3KqkiPL39zuVZaBQ1i6wJqs,9437
13
13
  llm_utils/lm/mixins.py,sha256=Nz7CwJFBOvbZNbODUlJC04Pcbac3zWnT8vy7sZG_MVI,24906
@@ -30,7 +30,7 @@ llm_utils/vector_cache/core.py,sha256=VXuYJy1AX22NHKvIXRriETip5RrmQcNp73-g-ZT774
30
30
  llm_utils/vector_cache/types.py,sha256=CpMZanJSTeBVxQSqjBq6pBVWp7u2-JRcgY9t5jhykdQ,438
31
31
  llm_utils/vector_cache/utils.py,sha256=OsiRFydv8i8HiJtPL9hh40aUv8I5pYfg2zvmtDi4DME,1446
32
32
  speedy_utils/__imports.py,sha256=V0YzkDK4-QkK_IDXY1be6C6_STuNhXAKIp4_dM0coQs,7800
33
- speedy_utils/__init__.py,sha256=_kSjS816Kv5UZPd4EM_juB68tXM_sHUYt6OFB-RhE6U,3261
33
+ speedy_utils/__init__.py,sha256=4RLsC0s9sAkGWcwC85eQQ1HT2Ea_flhbBQUCJuzJAmk,3429
34
34
  speedy_utils/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
35
  speedy_utils/common/clock.py,sha256=raLtMGIgzrRej5kUt7hOUm2ZZw2THVPo-q8dMvdZOxw,7354
36
36
  speedy_utils/common/function_decorator.py,sha256=GKXqRs_hHFFmhyhql0Br0o52WzekUnpNlm99NfaVwgY,2025
@@ -43,20 +43,25 @@ speedy_utils/common/utils_error.py,sha256=KQx2JTZsvsX2DsKRIoVR-4rc-6-l3OzEz9UtnH
43
43
  speedy_utils/common/utils_io.py,sha256=94m_EZ2eIs3w2m0rx-QQWsREPpVJctpweYHco3byczQ,15876
44
44
  speedy_utils/common/utils_misc.py,sha256=ZRJCS7OJxybpVm1sasoeCYRW2TaaGCXj4DySYlQeVR8,2227
45
45
  speedy_utils/common/utils_print.py,sha256=AGDB7mgJnO00QkJBH6kJb46738q3GzMUZPwtQ248vQw,4763
46
- speedy_utils/multi_worker/__init__.py,sha256=urcuxzaAJp-Rl3SIwHNre3x2vyHxLR7YGiDdm-Q8GQs,361
46
+ speedy_utils/multi_worker/__init__.py,sha256=JzPNJtYgBgSlAG6yBU0HhFcEwwCrH3iWeo-W7HZ9Pxs,503
47
+ speedy_utils/multi_worker/_multi_process.py,sha256=0ceoXZZAdfSVVqZAgtCLw7Oq_ZLh6y0i6OkPgx6cicc,13343
48
+ speedy_utils/multi_worker/_multi_process_ray.py,sha256=Qri-jbfHMLxUyiV5I2-KehTj1XM2YxagXWMesFkyzBg,9868
49
+ speedy_utils/multi_worker/common.py,sha256=e5FVKtPGrIw4s4-sA_SIzaIFaTeVK598jXBC5U8YcLU,27014
47
50
  speedy_utils/multi_worker/dataset_ray.py,sha256=U_l_4Y7CVpaHiApsXQSdNvals8NK87LHPS_XHiJF3qs,10044
51
+ speedy_utils/multi_worker/dataset_sharding.py,sha256=dWGCQiElb8SvLHfh97xT-QA2csmf2W3s8S4cFjZrO9Y,7196
48
52
  speedy_utils/multi_worker/parallel_gpu_pool.py,sha256=A7llZcQbRVZqwCqNRku7TpqGCdSoIzpdcTaupgqT5nI,6108
49
- speedy_utils/multi_worker/process.py,sha256=U-pjHoWZ3xOeplMl2nSxVeiJE0F9V-eswpSdK-8c3dU,45446
50
- speedy_utils/multi_worker/progress.py,sha256=Ozeca-t-j1224n_dWwZkWzva9DC16SCLgScKeGtXLaQ,4717
51
- speedy_utils/multi_worker/thread.py,sha256=E7o_iUCIKmgk1tFt7mZAFT7c5q229wVzWj-trmVsxVA,27254
53
+ speedy_utils/multi_worker/process.py,sha256=1P-7J93jt51lMfpHLD9F8DDKAdeMtyYIFVOKEiN5nok,3927
54
+ speedy_utils/multi_worker/progress.py,sha256=V-BN4GSmwOwL_8Y0J7Ya-byJUQXvuYcdqkmQqGHDaVw,7004
55
+ speedy_utils/multi_worker/thread.py,sha256=iVjg4SI3GjzA5DhasT0w29U61fKnWXoLqCgoqQPzyBA,28528
52
56
  speedy_utils/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
- speedy_utils/scripts/mpython.py,sha256=uJ-QPG43cyHUptMP4MnyH7IdSTh4mzVQAejy9o1cQoE,5068
57
+ speedy_utils/scripts/kill_mpython.py,sha256=xYdJIs1KQ4zlyj-E4XN6j1T7dOhzVo65uKzjXmIvUDU,1847
58
+ speedy_utils/scripts/mpython.py,sha256=b_Zbytrht2oYfV45P2nl7u5gNsYp4qQ868Pz8QReSD4,6741
54
59
  speedy_utils/scripts/openapi_client_codegen.py,sha256=GModmmhkvGnxljK4KczyixKDrk-VEcLaW5I0XT6tzWo,9657
55
60
  vision_utils/README.md,sha256=AIDZZj8jo_QNrEjFyHwd00iOO431s-js-M2dLtVTn3I,5740
56
61
  vision_utils/__init__.py,sha256=hF54sT6FAxby8kDVhOvruy4yot8O-Ateey5n96O1pQM,284
57
62
  vision_utils/io_utils.py,sha256=pI0Va6miesBysJcllK6NXCay8HpGZsaMWwlsKB2DMgA,26510
58
63
  vision_utils/plot.py,sha256=HkNj3osA3moPuupP1VguXfPPOW614dZO5tvC-EFKpKM,12028
59
- speedy_utils-1.1.45.dist-info/METADATA,sha256=yXr9vEuXiRpvZ3VibGULqr3X1a832aQZJAb-lJLM6mM,13073
60
- speedy_utils-1.1.45.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
61
- speedy_utils-1.1.45.dist-info/entry_points.txt,sha256=rwn89AYfBUh9SRJtFbpp-u2JIKiqmZ2sczvqyO6s9cI,289
62
- speedy_utils-1.1.45.dist-info/RECORD,,
64
+ speedy_utils-1.1.47.dist-info/METADATA,sha256=Ol3PRn5VGCiozaLocWrOLiYdO6nTHoEmBYhWODVfIi4,13073
65
+ speedy_utils-1.1.47.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
66
+ speedy_utils-1.1.47.dist-info/entry_points.txt,sha256=QY_2Vn6IcPCaqlY74pDRyZ6UTvPilaNPT7Gxijj7XI8,343
67
+ speedy_utils-1.1.47.dist-info/RECORD,,
@@ -1,5 +1,6 @@
1
1
  [console_scripts]
2
2
  fast-vllm = llm_utils.scripts.fast_vllm:main
3
+ kill-mpython = speedy_utils.scripts.kill_mpython:main
3
4
  mpython = speedy_utils.scripts.mpython:main
4
5
  openapi_client_codegen = speedy_utils.scripts.openapi_client_codegen:main
5
6
  svllm = llm_utils.scripts.vllm_serve:main