speedy-utils 1.1.46__py3-none-any.whl → 1.1.47__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,12 +4,15 @@ Real-time progress tracking for distributed Ray tasks.
4
4
  This module provides a ProgressActor that allows workers to report item-level
5
5
  progress in real-time, giving users visibility into actual items processed
6
6
  rather than just task completion.
7
+
8
+ Users can simply call report_progress(n) from within their worker functions,
9
+ and speedy_utils will automatically route it to the centralized tracker.
7
10
  """
8
11
  import time
9
12
  import threading
10
13
  from typing import Optional, Callable
11
14
 
12
- __all__ = ['ProgressActor', 'create_progress_tracker', 'get_ray_progress_actor']
15
+ __all__ = ['ProgressActor', 'create_progress_tracker', 'get_ray_progress_actor', 'report_progress', 'set_progress_context']
13
16
 
14
17
 
15
18
  def get_ray_progress_actor():
@@ -119,6 +122,73 @@ class ProgressPoller:
119
122
  if self._thread:
120
123
  self._thread.join(timeout=2.0)
121
124
 
125
+ def _poll_loop(self):
126
+ """Background loop that polls progress and updates tqdm bar."""
127
+ while not self._stop_event.is_set():
128
+ try:
129
+ stats = self._ray.get(self.progress_actor.get_progress.remote())
130
+ self.pbar.n = stats["processed"]
131
+ self.pbar.refresh()
132
+ except Exception:
133
+ pass
134
+ self._stop_event.wait(self.poll_interval)
135
+
136
+ # Final update
137
+ try:
138
+ stats = self._ray.get(self.progress_actor.get_progress.remote())
139
+ self.pbar.n = stats["processed"]
140
+ self.pbar.refresh()
141
+ except Exception:
142
+ pass
143
+
144
+
145
+ # Thread-local storage for progress context
146
+ _progress_context = threading.local()
147
+
148
+
149
+ def set_progress_context(progress_actor):
150
+ """
151
+ Set the progress actor in thread-local storage.
152
+
153
+ This is called automatically by speedy_utils when setting up worker context.
154
+ Users don't need to call this directly.
155
+
156
+ Args:
157
+ progress_actor: Ray actor handle for progress tracking
158
+ """
159
+ _progress_context.actor = progress_actor
160
+
161
+
162
+ def report_progress(n: int = 1):
163
+ """
164
+ Report progress to the centralized tracker.
165
+
166
+ This is a simple helper that workers can call to report items processed.
167
+ Works automatically when called from within multi_process with Ray backend.
168
+ Safe to call even when no progress tracking is active (no-op).
169
+
170
+ Args:
171
+ n: Number of items processed
172
+
173
+ Example:
174
+ def process_data(items):
175
+ for i, item in enumerate(items):
176
+ result = expensive_operation(item)
177
+
178
+ # Report progress every 100 items
179
+ if i % 100 == 0:
180
+ report_progress(100)
181
+
182
+ return results
183
+ """
184
+ actor = getattr(_progress_context, 'actor', None)
185
+ if actor is not None:
186
+ try:
187
+ import ray
188
+ ray.get(actor.update.remote(n))
189
+ except Exception:
190
+ # Silently ignore progress reporting errors
191
+ pass
122
192
  def _poll_loop(self):
123
193
  """Poll the progress actor and update tqdm."""
124
194
  while not self._stop_event.is_set():
@@ -26,6 +26,9 @@ DEFAULT_WORKERS = (os.cpu_count() or 4) * 2
26
26
  T = TypeVar('T')
27
27
  R = TypeVar('R')
28
28
 
29
+ if TYPE_CHECKING:
30
+ from typing import Literal, overload
31
+
29
32
  SPEEDY_RUNNING_THREADS: list[threading.Thread] = [] # cooperative shutdown tracking
30
33
  _SPEEDY_THREADS_LOCK = threading.Lock()
31
34
 
@@ -342,6 +345,48 @@ def _cancel_futures(inflight: set[Future[Any]]) -> None:
342
345
  # ────────────────────────────────────────────────────────────
343
346
  # main API
344
347
  # ────────────────────────────────────────────────────────────
348
+ if TYPE_CHECKING:
349
+ @overload
350
+ def multi_thread(
351
+ func: Callable[[T], R],
352
+ inputs: Iterable[T],
353
+ *,
354
+ workers: int | None = ...,
355
+ batch: int = ...,
356
+ ordered: bool = ...,
357
+ progress: bool = ...,
358
+ progress_update: int = ...,
359
+ prefetch_factor: int = ...,
360
+ timeout: float | None = ...,
361
+ stop_on_error: bool | None = ...,
362
+ error_handler: Literal['raise'] = ...,
363
+ max_error_files: int = ...,
364
+ n_proc: int = ...,
365
+ store_output_pkl_file: str | None = ...,
366
+ **fixed_kwargs: Any,
367
+ ) -> list[R]: ...
368
+
369
+ @overload
370
+ def multi_thread(
371
+ func: Callable[[T], R],
372
+ inputs: Iterable[T],
373
+ *,
374
+ workers: int | None = ...,
375
+ batch: int = ...,
376
+ ordered: bool = ...,
377
+ progress: bool = ...,
378
+ progress_update: int = ...,
379
+ prefetch_factor: int = ...,
380
+ timeout: float | None = ...,
381
+ stop_on_error: bool | None = ...,
382
+ error_handler: Literal['ignore', 'log'] = ...,
383
+ max_error_files: int = ...,
384
+ n_proc: int = ...,
385
+ store_output_pkl_file: str | None = ...,
386
+ **fixed_kwargs: Any,
387
+ ) -> list[R | None]: ...
388
+
389
+
345
390
  def multi_thread(
346
391
  func: Callable[[T], R],
347
392
  inputs: Iterable[T],
@@ -9,11 +9,12 @@ import shutil
9
9
  import subprocess
10
10
  import sys
11
11
 
12
+
12
13
  try:
13
14
  from rich.console import Console, Group
14
15
  from rich.panel import Panel
15
- from rich.text import Text
16
16
  from rich.syntax import Syntax
17
+ from rich.text import Text
17
18
  except ImportError:
18
19
  Console = None
19
20
  Group = None
@@ -81,7 +82,7 @@ def assert_script(python_path):
81
82
  )
82
83
  if Console and Panel and Text and Syntax and Group:
83
84
  console = Console(stderr=True, force_terminal=True)
84
- syntax = Syntax(helper_code, "python", theme="monokai", line_numbers=False)
85
+ syntax = Syntax(helper_code, 'python', theme='monokai', line_numbers=False)
85
86
  console.print()
86
87
  console.print(
87
88
  Panel(
@@ -93,29 +94,35 @@ def assert_script(python_path):
93
94
  )
94
95
  )
95
96
  console.print()
96
- console.print("```python")
97
+ console.print('```python')
97
98
  console.print(syntax)
98
- console.print("```")
99
- console.print("-"*80)
99
+ console.print('```')
100
+ console.print('-' * 80)
100
101
  else:
101
102
  # Fallback to plain text
102
- print(f'Warning: MP_ID and MP_TOTAL not found in {python_path}, please add them.', file=sys.stderr)
103
+ print(
104
+ f'Warning: MP_ID and MP_TOTAL not found in {python_path}, please add them.',
105
+ file=sys.stderr,
106
+ )
103
107
  print(f'Example:\n{helper_code}', file=sys.stderr)
104
108
 
105
109
 
106
110
  def run_in_tmux(commands_to_run, tmux_name, num_windows):
107
111
  with open('/tmp/start_multirun_tmux.sh', 'w') as script_file:
108
112
  script_file.write('#!/bin/bash\n\n')
109
- script_file.write(f'tmux new-session -d -s {tmux_name}\n')
113
+ # Use -n to name the first window explicitly (avoids base-index issues)
114
+ script_file.write(f'tmux new-session -d -s {tmux_name} -n w0\n')
110
115
  for i, cmd in enumerate(itertools.cycle(commands_to_run)):
111
116
  if i >= num_windows:
112
117
  break
113
- window_name = f'{tmux_name}:{i}'
118
+ window_name = f'w{i}'
119
+ target = f'{tmux_name}:{window_name}'
114
120
  if i == 0:
115
- script_file.write(f"tmux send-keys -t {window_name} '{cmd}' C-m\n")
121
+ script_file.write(f"tmux send-keys -t {target} '{cmd}' C-m\n")
116
122
  else:
117
- script_file.write(f'tmux new-window -t {tmux_name}\n')
118
- script_file.write(f"tmux send-keys -t {window_name} '{cmd}' C-m\n")
123
+ # Create window with explicit name, then send keys to it
124
+ script_file.write(f'tmux new-window -t {tmux_name} -n {window_name}\n')
125
+ script_file.write(f"tmux send-keys -t {target} '{cmd}' C-m\n")
119
126
 
120
127
  # Make the script executable
121
128
  script_file.write('chmod +x /tmp/start_multirun_tmux.sh\n')
@@ -135,7 +142,7 @@ def main():
135
142
  parser = argparse.ArgumentParser(
136
143
  description='Process fold arguments',
137
144
  epilog=f'Helper code for multi-process sharding:\n{helper_code}',
138
- formatter_class=argparse.RawDescriptionHelpFormatter
145
+ formatter_class=argparse.RawDescriptionHelpFormatter,
139
146
  )
140
147
  parser.add_argument(
141
148
  '--total_fold', '-t', default=16, type=int, help='total number of folds'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: speedy-utils
3
- Version: 1.1.46
3
+ Version: 1.1.47
4
4
  Summary: Fast and easy-to-use package for data science
5
5
  Project-URL: Homepage, https://github.com/anhvth/speedy
6
6
  Project-URL: Repository, https://github.com/anhvth/speedy
@@ -7,7 +7,7 @@ llm_utils/chat_format/transform.py,sha256=PJ2g9KT1GSbWuAs7giEbTpTAffpU9QsIXyRlbf
7
7
  llm_utils/chat_format/utils.py,sha256=M2EctZ6NeHXqFYufh26Y3CpSphN0bdZm5xoNaEJj5vg,1251
8
8
  llm_utils/lm/__init__.py,sha256=4jYMy3wPH3tg-tHFyWEWOqrnmX4Tu32VZCdzRGMGQsI,778
9
9
  llm_utils/lm/base_prompt_builder.py,sha256=_TzYMsWr-SsbA_JNXptUVN56lV5RfgWWTrFi-E8LMy4,12337
10
- llm_utils/lm/llm.py,sha256=i6L5aKF6NhzmaFPBA2pCm8TkQmS1nCgORMqP5QyfJ28,20097
10
+ llm_utils/lm/llm.py,sha256=KEtrHq5D8ZkeD4iTc_zPgOtgVXpsCRA-A3fmWgYCz0w,21378
11
11
  llm_utils/lm/llm_signature.py,sha256=vV8uZgLLd6ZKqWbq0OPywWvXAfl7hrJQnbtBF-VnZRU,1244
12
12
  llm_utils/lm/lm_base.py,sha256=Bk3q34KrcCK_bC4Ryxbc3KqkiPL39zuVZaBQ1i6wJqs,9437
13
13
  llm_utils/lm/mixins.py,sha256=Nz7CwJFBOvbZNbODUlJC04Pcbac3zWnT8vy7sZG_MVI,24906
@@ -30,7 +30,7 @@ llm_utils/vector_cache/core.py,sha256=VXuYJy1AX22NHKvIXRriETip5RrmQcNp73-g-ZT774
30
30
  llm_utils/vector_cache/types.py,sha256=CpMZanJSTeBVxQSqjBq6pBVWp7u2-JRcgY9t5jhykdQ,438
31
31
  llm_utils/vector_cache/utils.py,sha256=OsiRFydv8i8HiJtPL9hh40aUv8I5pYfg2zvmtDi4DME,1446
32
32
  speedy_utils/__imports.py,sha256=V0YzkDK4-QkK_IDXY1be6C6_STuNhXAKIp4_dM0coQs,7800
33
- speedy_utils/__init__.py,sha256=_kSjS816Kv5UZPd4EM_juB68tXM_sHUYt6OFB-RhE6U,3261
33
+ speedy_utils/__init__.py,sha256=4RLsC0s9sAkGWcwC85eQQ1HT2Ea_flhbBQUCJuzJAmk,3429
34
34
  speedy_utils/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
35
  speedy_utils/common/clock.py,sha256=raLtMGIgzrRej5kUt7hOUm2ZZw2THVPo-q8dMvdZOxw,7354
36
36
  speedy_utils/common/function_decorator.py,sha256=GKXqRs_hHFFmhyhql0Br0o52WzekUnpNlm99NfaVwgY,2025
@@ -43,21 +43,25 @@ speedy_utils/common/utils_error.py,sha256=KQx2JTZsvsX2DsKRIoVR-4rc-6-l3OzEz9UtnH
43
43
  speedy_utils/common/utils_io.py,sha256=94m_EZ2eIs3w2m0rx-QQWsREPpVJctpweYHco3byczQ,15876
44
44
  speedy_utils/common/utils_misc.py,sha256=ZRJCS7OJxybpVm1sasoeCYRW2TaaGCXj4DySYlQeVR8,2227
45
45
  speedy_utils/common/utils_print.py,sha256=AGDB7mgJnO00QkJBH6kJb46738q3GzMUZPwtQ248vQw,4763
46
- speedy_utils/multi_worker/__init__.py,sha256=urcuxzaAJp-Rl3SIwHNre3x2vyHxLR7YGiDdm-Q8GQs,361
46
+ speedy_utils/multi_worker/__init__.py,sha256=JzPNJtYgBgSlAG6yBU0HhFcEwwCrH3iWeo-W7HZ9Pxs,503
47
+ speedy_utils/multi_worker/_multi_process.py,sha256=0ceoXZZAdfSVVqZAgtCLw7Oq_ZLh6y0i6OkPgx6cicc,13343
48
+ speedy_utils/multi_worker/_multi_process_ray.py,sha256=Qri-jbfHMLxUyiV5I2-KehTj1XM2YxagXWMesFkyzBg,9868
49
+ speedy_utils/multi_worker/common.py,sha256=e5FVKtPGrIw4s4-sA_SIzaIFaTeVK598jXBC5U8YcLU,27014
47
50
  speedy_utils/multi_worker/dataset_ray.py,sha256=U_l_4Y7CVpaHiApsXQSdNvals8NK87LHPS_XHiJF3qs,10044
51
+ speedy_utils/multi_worker/dataset_sharding.py,sha256=dWGCQiElb8SvLHfh97xT-QA2csmf2W3s8S4cFjZrO9Y,7196
48
52
  speedy_utils/multi_worker/parallel_gpu_pool.py,sha256=A7llZcQbRVZqwCqNRku7TpqGCdSoIzpdcTaupgqT5nI,6108
49
- speedy_utils/multi_worker/process.py,sha256=U-pjHoWZ3xOeplMl2nSxVeiJE0F9V-eswpSdK-8c3dU,45446
50
- speedy_utils/multi_worker/progress.py,sha256=Ozeca-t-j1224n_dWwZkWzva9DC16SCLgScKeGtXLaQ,4717
51
- speedy_utils/multi_worker/thread.py,sha256=E7o_iUCIKmgk1tFt7mZAFT7c5q229wVzWj-trmVsxVA,27254
53
+ speedy_utils/multi_worker/process.py,sha256=1P-7J93jt51lMfpHLD9F8DDKAdeMtyYIFVOKEiN5nok,3927
54
+ speedy_utils/multi_worker/progress.py,sha256=V-BN4GSmwOwL_8Y0J7Ya-byJUQXvuYcdqkmQqGHDaVw,7004
55
+ speedy_utils/multi_worker/thread.py,sha256=iVjg4SI3GjzA5DhasT0w29U61fKnWXoLqCgoqQPzyBA,28528
52
56
  speedy_utils/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
57
  speedy_utils/scripts/kill_mpython.py,sha256=xYdJIs1KQ4zlyj-E4XN6j1T7dOhzVo65uKzjXmIvUDU,1847
54
- speedy_utils/scripts/mpython.py,sha256=bf_pdCm5_NaDUV0J652Cg_YFIwva_5AhoVKTGOowENo,6485
58
+ speedy_utils/scripts/mpython.py,sha256=b_Zbytrht2oYfV45P2nl7u5gNsYp4qQ868Pz8QReSD4,6741
55
59
  speedy_utils/scripts/openapi_client_codegen.py,sha256=GModmmhkvGnxljK4KczyixKDrk-VEcLaW5I0XT6tzWo,9657
56
60
  vision_utils/README.md,sha256=AIDZZj8jo_QNrEjFyHwd00iOO431s-js-M2dLtVTn3I,5740
57
61
  vision_utils/__init__.py,sha256=hF54sT6FAxby8kDVhOvruy4yot8O-Ateey5n96O1pQM,284
58
62
  vision_utils/io_utils.py,sha256=pI0Va6miesBysJcllK6NXCay8HpGZsaMWwlsKB2DMgA,26510
59
63
  vision_utils/plot.py,sha256=HkNj3osA3moPuupP1VguXfPPOW614dZO5tvC-EFKpKM,12028
60
- speedy_utils-1.1.46.dist-info/METADATA,sha256=nIgSDMrAbHssvJznZtozJZbZVdH4K8dALKRdExipi1g,13073
61
- speedy_utils-1.1.46.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
62
- speedy_utils-1.1.46.dist-info/entry_points.txt,sha256=QY_2Vn6IcPCaqlY74pDRyZ6UTvPilaNPT7Gxijj7XI8,343
63
- speedy_utils-1.1.46.dist-info/RECORD,,
64
+ speedy_utils-1.1.47.dist-info/METADATA,sha256=Ol3PRn5VGCiozaLocWrOLiYdO6nTHoEmBYhWODVfIi4,13073
65
+ speedy_utils-1.1.47.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
66
+ speedy_utils-1.1.47.dist-info/entry_points.txt,sha256=QY_2Vn6IcPCaqlY74pDRyZ6UTvPilaNPT7Gxijj7XI8,343
67
+ speedy_utils-1.1.47.dist-info/RECORD,,