stouputils 1.17.0__py3-none-any.whl → 1.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
stouputils/parallel.py DELETED
@@ -1,556 +0,0 @@
1
- """
2
- This module provides utility functions for parallel processing, such as:
3
-
4
- - multiprocessing(): Execute a function in parallel using multiprocessing
5
- - multithreading(): Execute a function in parallel using multithreading
6
- - run_in_subprocess(): Execute a function in a subprocess with args and kwargs
7
-
8
- I highly encourage you to read the function docstrings to understand when to use each method.
9
-
10
- Priority (nice) mapping for multiprocessing():
11
-
12
- - Unix-style values from -20 (highest priority) to 19 (lowest priority)
13
- - Windows automatic mapping:
14
- * -20 to -10: HIGH_PRIORITY_CLASS
15
- * -9 to -1: ABOVE_NORMAL_PRIORITY_CLASS
16
- * 0: NORMAL_PRIORITY_CLASS
17
- * 1 to 9: BELOW_NORMAL_PRIORITY_CLASS
18
- * 10 to 19: IDLE_PRIORITY_CLASS
19
-
20
- .. image:: https://raw.githubusercontent.com/Stoupy51/stouputils/refs/heads/main/assets/parallel_module.gif
21
- :alt: stouputils parallel examples
22
- """
23
-
24
- # Imports
25
- import os
26
- import time
27
- from collections.abc import Callable, Iterable
28
- from typing import Any, TypeVar, cast
29
-
30
- from .ctx import SetMPStartMethod
31
- from .print import BAR_FORMAT, MAGENTA
32
-
33
-
34
- # Small test functions for doctests
35
- def doctest_square(x: int) -> int:
36
- return x * x
37
- def doctest_slow(x: int) -> int:
38
- time.sleep(0.1)
39
- return x
40
-
41
- # Constants
42
- CPU_COUNT: int = cast(int, os.cpu_count())
43
- T = TypeVar("T")
44
- R = TypeVar("R")
45
-
46
- # Functions
47
- def multiprocessing[T, R](
48
- func: Callable[..., R] | list[Callable[..., R]],
49
- args: Iterable[T],
50
- use_starmap: bool = False,
51
- chunksize: int = 1,
52
- desc: str = "",
53
- max_workers: int | float = CPU_COUNT,
54
- delay_first_calls: float = 0,
55
- nice: int | None = None,
56
- color: str = MAGENTA,
57
- bar_format: str = BAR_FORMAT,
58
- ascii: bool = False,
59
- smooth_tqdm: bool = True,
60
- **tqdm_kwargs: Any
61
- ) -> list[R]:
62
- r""" Method to execute a function in parallel using multiprocessing
63
-
64
- - For CPU-bound operations where the GIL (Global Interpreter Lock) is a bottleneck.
65
- - When the task can be divided into smaller, independent sub-tasks that can be executed concurrently.
66
- - For computationally intensive tasks like scientific simulations, data analysis, or machine learning workloads.
67
-
68
- Args:
69
- func (Callable | list[Callable]): Function to execute, or list of functions (one per argument)
70
- args (Iterable): Iterable of arguments to pass to the function(s)
71
- use_starmap (bool): Whether to use starmap or not (Defaults to False):
72
- True means the function will be called like func(\*args[i]) instead of func(args[i])
73
- chunksize (int): Number of arguments to process at a time
74
- (Defaults to 1 for proper progress bar display)
75
- desc (str): Description displayed in the progress bar
76
- (if not provided no progress bar will be displayed)
77
- max_workers (int | float): Number of workers to use (Defaults to CPU_COUNT), -1 means CPU_COUNT.
78
- If float between 0 and 1, it's treated as a percentage of CPU_COUNT.
79
- If negative float between -1 and 0, it's treated as a percentage of len(args).
80
- delay_first_calls (float): Apply i*delay_first_calls seconds delay to the first "max_workers" calls.
81
- For instance, the first process will be delayed by 0 seconds, the second by 1 second, etc.
82
- (Defaults to 0): This can be useful to avoid functions being called in the same second.
83
- nice (int | None): Adjust the priority of worker processes (Defaults to None).
84
- Use Unix-style values: -20 (highest priority) to 19 (lowest priority).
85
- Positive values reduce priority, negative values increase it.
86
- Automatically converted to appropriate priority class on Windows.
87
- If None, no priority adjustment is made.
88
- color (str): Color of the progress bar (Defaults to MAGENTA)
89
- bar_format (str): Format of the progress bar (Defaults to BAR_FORMAT)
90
- ascii (bool): Whether to use ASCII or Unicode characters for the progress bar
91
- smooth_tqdm (bool): Whether to enable smooth progress bar updates by setting miniters and mininterval (Defaults to True)
92
- **tqdm_kwargs (Any): Additional keyword arguments to pass to tqdm
93
-
94
- Returns:
95
- list[object]: Results of the function execution
96
-
97
- Examples:
98
- .. code-block:: python
99
-
100
- > multiprocessing(doctest_square, args=[1, 2, 3])
101
- [1, 4, 9]
102
-
103
- > multiprocessing(int.__mul__, [(1,2), (3,4), (5,6)], use_starmap=True)
104
- [2, 12, 30]
105
-
106
- > # Using a list of functions (one per argument)
107
- > multiprocessing([doctest_square, doctest_square, doctest_square], [1, 2, 3])
108
- [1, 4, 9]
109
-
110
- > # Will process in parallel with progress bar
111
- > multiprocessing(doctest_slow, range(10), desc="Processing")
112
- [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
113
-
114
- > # Will process in parallel with progress bar and delay the first threads
115
- > multiprocessing(
116
- . doctest_slow,
117
- . range(10),
118
- . desc="Processing with delay",
119
- . max_workers=2,
120
- . delay_first_calls=0.6
121
- . )
122
- [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
123
- """
124
- # Imports
125
- import multiprocessing as mp
126
- from multiprocessing import Pool
127
-
128
- from tqdm.auto import tqdm
129
- from tqdm.contrib.concurrent import process_map # pyright: ignore[reportUnknownVariableType]
130
-
131
- # Handle parameters
132
- args = list(args) # Ensure we have a list (not other iterable)
133
- if max_workers == -1:
134
- max_workers = CPU_COUNT
135
- if isinstance(max_workers, float):
136
- if max_workers > 0:
137
- assert max_workers <= 1, "max_workers as positive float must be between 0 and 1 (percentage of CPU_COUNT)"
138
- max_workers = int(max_workers * CPU_COUNT)
139
- else:
140
- assert -1 <= max_workers < 0, "max_workers as negative float must be between -1 and 0 (percentage of len(args))"
141
- max_workers = int(-max_workers * len(args))
142
- verbose: bool = desc != ""
143
- desc, func, args = _handle_parameters(func, args, use_starmap, delay_first_calls, max_workers, desc, color)
144
- if bar_format == BAR_FORMAT:
145
- bar_format = bar_format.replace(MAGENTA, color)
146
- if smooth_tqdm:
147
- tqdm_kwargs.setdefault("mininterval", 0.0)
148
- try:
149
- total = len(args) # type: ignore
150
- import shutil
151
- width = shutil.get_terminal_size().columns
152
- tqdm_kwargs.setdefault("miniters", max(1, total // width))
153
- except (TypeError, OSError):
154
- tqdm_kwargs.setdefault("miniters", 1)
155
-
156
- # Do multiprocessing only if there is more than 1 argument and more than 1 CPU
157
- if max_workers > 1 and len(args) > 1:
158
- def process() -> list[Any]:
159
- # Wrap function with nice if specified
160
- if nice is not None:
161
- wrapped_args = [(nice, func, arg) for arg in args]
162
- wrapped_func = _nice_wrapper
163
- else:
164
- wrapped_args = args
165
- wrapped_func = func
166
-
167
- if verbose:
168
- return list(process_map(
169
- wrapped_func, wrapped_args, max_workers=max_workers, chunksize=chunksize, desc=desc, bar_format=bar_format, ascii=ascii, **tqdm_kwargs
170
- )) # type: ignore
171
- else:
172
- with Pool(max_workers) as pool:
173
- return list(pool.map(wrapped_func, wrapped_args, chunksize=chunksize)) # type: ignore
174
- try:
175
- return process()
176
- except RuntimeError as e:
177
- if "SemLock created in a fork context is being shared with a process in a spawn context" in str(e):
178
-
179
- # Try with alternate start method
180
- with SetMPStartMethod("spawn" if mp.get_start_method() != "spawn" else "fork"):
181
- return process()
182
- else: # Re-raise if it's not the SemLock error
183
- raise
184
-
185
- # Single process execution
186
- else:
187
- if verbose:
188
- return [func(arg) for arg in tqdm(args, total=len(args), desc=desc, bar_format=bar_format, ascii=ascii, **tqdm_kwargs)]
189
- else:
190
- return [func(arg) for arg in args]
191
-
192
-
193
- def multithreading[T, R](
194
- func: Callable[..., R] | list[Callable[..., R]],
195
- args: Iterable[T],
196
- use_starmap: bool = False,
197
- desc: str = "",
198
- max_workers: int | float = CPU_COUNT,
199
- delay_first_calls: float = 0,
200
- color: str = MAGENTA,
201
- bar_format: str = BAR_FORMAT,
202
- ascii: bool = False,
203
- smooth_tqdm: bool = True,
204
- **tqdm_kwargs: Any
205
- ) -> list[R]:
206
- r""" Method to execute a function in parallel using multithreading, you should use it:
207
-
208
- - For I/O-bound operations where the GIL is not a bottleneck, such as network requests or disk operations.
209
- - When the task involves waiting for external resources, such as network responses or user input.
210
- - For operations that involve a lot of waiting, such as GUI event handling or handling user input.
211
-
212
- Args:
213
- func (Callable | list[Callable]): Function to execute, or list of functions (one per argument)
214
- args (Iterable): Iterable of arguments to pass to the function(s)
215
- use_starmap (bool): Whether to use starmap or not (Defaults to False):
216
- True means the function will be called like func(\*args[i]) instead of func(args[i])
217
- desc (str): Description displayed in the progress bar
218
- (if not provided no progress bar will be displayed)
219
- max_workers (int | float): Number of workers to use (Defaults to CPU_COUNT), -1 means CPU_COUNT.
220
- If float between 0 and 1, it's treated as a percentage of CPU_COUNT.
221
- If negative float between -1 and 0, it's treated as a percentage of len(args).
222
- delay_first_calls (float): Apply i*delay_first_calls seconds delay to the first "max_workers" calls.
223
- For instance with value to 1, the first thread will be delayed by 0 seconds, the second by 1 second, etc.
224
- (Defaults to 0): This can be useful to avoid functions being called in the same second.
225
- color (str): Color of the progress bar (Defaults to MAGENTA)
226
- bar_format (str): Format of the progress bar (Defaults to BAR_FORMAT)
227
- ascii (bool): Whether to use ASCII or Unicode characters for the progress bar
228
- smooth_tqdm (bool): Whether to enable smooth progress bar updates by setting miniters and mininterval (Defaults to True)
229
- **tqdm_kwargs (Any): Additional keyword arguments to pass to tqdm
230
-
231
- Returns:
232
- list[object]: Results of the function execution
233
-
234
- Examples:
235
- .. code-block:: python
236
-
237
- > multithreading(doctest_square, args=[1, 2, 3])
238
- [1, 4, 9]
239
-
240
- > multithreading(int.__mul__, [(1,2), (3,4), (5,6)], use_starmap=True)
241
- [2, 12, 30]
242
-
243
- > # Using a list of functions (one per argument)
244
- > multithreading([doctest_square, doctest_square, doctest_square], [1, 2, 3])
245
- [1, 4, 9]
246
-
247
- > # Will process in parallel with progress bar
248
- > multithreading(doctest_slow, range(10), desc="Threading")
249
- [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
250
-
251
- > # Will process in parallel with progress bar and delay the first threads
252
- > multithreading(
253
- . doctest_slow,
254
- . range(10),
255
- . desc="Threading with delay",
256
- . max_workers=2,
257
- . delay_first_calls=0.6
258
- . )
259
- [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
260
- """
261
- # Imports
262
- from concurrent.futures import ThreadPoolExecutor
263
-
264
- from tqdm.auto import tqdm
265
-
266
- # Handle parameters
267
- args = list(args) # Ensure we have a list (not other iterable)
268
- if max_workers == -1:
269
- max_workers = CPU_COUNT
270
- if isinstance(max_workers, float):
271
- if max_workers > 0:
272
- assert max_workers <= 1, "max_workers as positive float must be between 0 and 1 (percentage of CPU_COUNT)"
273
- max_workers = int(max_workers * CPU_COUNT)
274
- else:
275
- assert -1 <= max_workers < 0, "max_workers as negative float must be between -1 and 0 (percentage of len(args))"
276
- max_workers = int(-max_workers * len(args))
277
- verbose: bool = desc != ""
278
- desc, func, args = _handle_parameters(func, args, use_starmap, delay_first_calls, max_workers, desc, color)
279
- if bar_format == BAR_FORMAT:
280
- bar_format = bar_format.replace(MAGENTA, color)
281
- if smooth_tqdm:
282
- tqdm_kwargs.setdefault("mininterval", 0.0)
283
- try:
284
- total = len(args) # type: ignore
285
- import shutil
286
- width = shutil.get_terminal_size().columns
287
- tqdm_kwargs.setdefault("miniters", max(1, total // width))
288
- except (TypeError, OSError):
289
- tqdm_kwargs.setdefault("miniters", 1)
290
-
291
- # Do multithreading only if there is more than 1 argument and more than 1 CPU
292
- if max_workers > 1 and len(args) > 1:
293
- if verbose:
294
- with ThreadPoolExecutor(max_workers) as executor:
295
- return list(tqdm(executor.map(func, args), total=len(args), desc=desc, bar_format=bar_format, ascii=ascii, **tqdm_kwargs))
296
- else:
297
- with ThreadPoolExecutor(max_workers) as executor:
298
- return list(executor.map(func, args))
299
-
300
- # Single process execution
301
- else:
302
- if verbose:
303
- return [func(arg) for arg in tqdm(args, total=len(args), desc=desc, bar_format=bar_format, ascii=ascii, **tqdm_kwargs)]
304
- else:
305
- return [func(arg) for arg in args]
306
-
307
-
308
- def run_in_subprocess[R](
309
- func: Callable[..., R],
310
- *args: Any,
311
- timeout: float | None = None,
312
- no_join: bool = False,
313
- **kwargs: Any
314
- ) -> R:
315
- """ Execute a function in a subprocess with positional and keyword arguments.
316
-
317
- This is useful when you need to run a function in isolation to avoid memory leaks,
318
- resource conflicts, or to ensure a clean execution environment. The subprocess will
319
- be created, run the function with the provided arguments, and return the result.
320
-
321
- Args:
322
- func (Callable): The function to execute in a subprocess.
323
- (SHOULD BE A TOP-LEVEL FUNCTION TO BE PICKLABLE)
324
- *args (Any): Positional arguments to pass to the function.
325
- timeout (float | None): Maximum time in seconds to wait for the subprocess.
326
- If None, wait indefinitely. If the subprocess exceeds this time, it will be terminated.
327
- no_join (bool): If True, do not wait for the subprocess to finish (fire-and-forget).
328
- **kwargs (Any): Keyword arguments to pass to the function.
329
-
330
- Returns:
331
- R: The return value of the function.
332
-
333
- Raises:
334
- RuntimeError: If the subprocess exits with a non-zero exit code or times out.
335
- TimeoutError: If the subprocess exceeds the specified timeout.
336
-
337
- Examples:
338
- .. code-block:: python
339
-
340
- > # Simple function execution
341
- > run_in_subprocess(doctest_square, 5)
342
- 25
343
-
344
- > # Function with multiple arguments
345
- > def add(a: int, b: int) -> int:
346
- . return a + b
347
- > run_in_subprocess(add, 10, 20)
348
- 30
349
-
350
- > # Function with keyword arguments
351
- > def greet(name: str, greeting: str = "Hello") -> str:
352
- . return f"{greeting}, {name}!"
353
- > run_in_subprocess(greet, "World", greeting="Hi")
354
- 'Hi, World!'
355
-
356
- > # With timeout to prevent hanging
357
- > run_in_subprocess(some_gpu_func, data, timeout=300.0)
358
- """
359
- import multiprocessing as mp
360
- from multiprocessing import Queue
361
-
362
- # Create a queue to get the result from the subprocess (only if we need to wait)
363
- result_queue: Queue[R | Exception] | None = None if no_join else Queue()
364
-
365
- # Create and start the subprocess using the module-level wrapper
366
- process: mp.Process = mp.Process(
367
- target=_subprocess_wrapper,
368
- args=(result_queue, func, args, kwargs)
369
- )
370
- process.start()
371
-
372
- # Detach process if no_join (fire-and-forget)
373
- if result_queue is None:
374
- return None # type: ignore
375
- process.join(timeout=timeout)
376
-
377
- # Check if process is still alive (timed out)
378
- if process.is_alive():
379
- process.terminate()
380
- time.sleep(0.5) # Give it a moment to terminate gracefully
381
- if process.is_alive():
382
- process.kill()
383
- process.join()
384
- raise TimeoutError(f"Subprocess exceeded timeout of {timeout} seconds and was terminated")
385
-
386
- # Check exit code
387
- if process.exitcode != 0:
388
- # Try to get any exception from the queue (non-blocking)
389
- error_msg = f"Subprocess failed with exit code {process.exitcode}"
390
- try:
391
- if not result_queue.empty():
392
- result_or_exception = result_queue.get_nowait()
393
- if isinstance(result_or_exception, Exception):
394
- raise result_or_exception
395
- except Exception:
396
- pass
397
- raise RuntimeError(error_msg)
398
-
399
- # Retrieve the result
400
- try:
401
- result_or_exception = result_queue.get_nowait()
402
- if isinstance(result_or_exception, Exception):
403
- raise result_or_exception
404
- return result_or_exception
405
- except Exception as e:
406
- raise RuntimeError("Subprocess did not return any result") from e
407
-
408
-
409
- # "Private" function to wrap function execution with nice priority (must be at module level for pickling)
410
- def _nice_wrapper[T, R](args: tuple[int, Callable[[T], R], T]) -> R:
411
- """ Wrapper that applies nice priority then executes the function.
412
-
413
- Args:
414
- args (tuple): Tuple containing (nice_value, func, arg)
415
-
416
- Returns:
417
- R: Result of the function execution
418
- """
419
- nice_value, func, arg = args
420
- _set_process_priority(nice_value)
421
- return func(arg)
422
-
423
- # "Private" function to set process priority (must be at module level for pickling on Windows)
424
- def _set_process_priority(nice_value: int) -> None:
425
- """ Set the priority of the current process.
426
-
427
- Args:
428
- nice_value (int): Unix-style priority value (-20 to 19)
429
- """
430
- try:
431
- import sys
432
- if sys.platform == "win32":
433
- # Map Unix nice values to Windows priority classes
434
- # -20 to -10: HIGH, -9 to -1: ABOVE_NORMAL, 0: NORMAL, 1-9: BELOW_NORMAL, 10-19: IDLE
435
- import ctypes
436
- # Windows priority class constants
437
- if nice_value <= -10:
438
- priority = 0x00000080 # HIGH_PRIORITY_CLASS
439
- elif nice_value < 0:
440
- priority = 0x00008000 # ABOVE_NORMAL_PRIORITY_CLASS
441
- elif nice_value == 0:
442
- priority = 0x00000020 # NORMAL_PRIORITY_CLASS
443
- elif nice_value < 10:
444
- priority = 0x00004000 # BELOW_NORMAL_PRIORITY_CLASS
445
- else:
446
- priority = 0x00000040 # IDLE_PRIORITY_CLASS
447
- kernel32 = ctypes.windll.kernel32
448
- handle = kernel32.GetCurrentProcess()
449
- kernel32.SetPriorityClass(handle, priority)
450
- else:
451
- # Unix-like systems
452
- os.nice(nice_value)
453
- except Exception:
454
- pass # Silently ignore if we can't set priority
455
-
456
- # "Private" function for subprocess wrapper (must be at module level for pickling on Windows)
457
- def _subprocess_wrapper[R](
458
- result_queue: Any,
459
- func: Callable[..., R],
460
- args: tuple[Any, ...],
461
- kwargs: dict[str, Any]
462
- ) -> None:
463
- """ Wrapper function to execute the target function and store the result in the queue.
464
-
465
- Must be at module level to be pickable on Windows (spawn context).
466
-
467
- Args:
468
- result_queue (multiprocessing.Queue | None): Queue to store the result or exception (None if detached).
469
- func (Callable): The target function to execute.
470
- args (tuple): Positional arguments for the function.
471
- kwargs (dict): Keyword arguments for the function.
472
- """
473
- try:
474
- result: R = func(*args, **kwargs)
475
- if result_queue is not None:
476
- result_queue.put(result)
477
- except Exception as e:
478
- if result_queue is not None:
479
- result_queue.put(e)
480
-
481
- # "Private" function to use starmap
482
- def _starmap[T, R](args: tuple[Callable[[T], R], list[T]]) -> R:
483
- r""" Private function to use starmap using args[0](\*args[1])
484
-
485
- Args:
486
- args (tuple): Tuple containing the function and the arguments list to pass to the function
487
- Returns:
488
- object: Result of the function execution
489
- """
490
- func, arguments = args
491
- return func(*arguments)
492
-
493
- # "Private" function to apply delay before calling the target function
494
- def _delayed_call[T, R](args: tuple[Callable[[T], R], float, T]) -> R:
495
- """ Private function to apply delay before calling the target function
496
-
497
- Args:
498
- args (tuple): Tuple containing the function, delay in seconds, and the argument to pass to the function
499
- Returns:
500
- object: Result of the function execution
501
- """
502
- func, delay, arg = args
503
- time.sleep(delay)
504
- return func(arg)
505
-
506
- # "Private" function to handle parameters for multiprocessing or multithreading functions
507
- def _handle_parameters[T, R](
508
- func: Callable[[T], R] | list[Callable[[T], R]],
509
- args: list[T],
510
- use_starmap: bool,
511
- delay_first_calls: float,
512
- max_workers: int,
513
- desc: str,
514
- color: str
515
- ) -> tuple[str, Callable[[T], R], list[T]]:
516
- r""" Private function to handle the parameters for multiprocessing or multithreading functions
517
-
518
- Args:
519
- func (Callable | list[Callable]): Function to execute, or list of functions (one per argument)
520
- args (list): List of arguments to pass to the function(s)
521
- use_starmap (bool): Whether to use starmap or not (Defaults to False):
522
- True means the function will be called like func(\*args[i]) instead of func(args[i])
523
- delay_first_calls (int): Apply i*delay_first_calls seconds delay to the first "max_workers" calls.
524
- For instance, the first process will be delayed by 0 seconds, the second by 1 second, etc. (Defaults to 0):
525
- This can be useful to avoid functions being called in the same second.
526
- max_workers (int): Number of workers to use (Defaults to CPU_COUNT)
527
- desc (str): Description of the function execution displayed in the progress bar
528
- color (str): Color of the progress bar
529
-
530
- Returns:
531
- tuple[str, Callable[[T], R], list[T]]: Tuple containing the description, function, and arguments
532
- """
533
- desc = color + desc
534
-
535
- # Handle list of functions: validate and convert to starmap format
536
- if isinstance(func, list):
537
- func = cast(list[Callable[[T], R]], func)
538
- assert len(func) == len(args), f"Length mismatch: {len(func)} functions but {len(args)} arguments"
539
- args = [(f, arg if use_starmap else (arg,)) for f, arg in zip(func, args, strict=False)] # type: ignore
540
- func = _starmap # type: ignore
541
-
542
- # If use_starmap is True, we use the _starmap function
543
- elif use_starmap:
544
- args = [(func, arg) for arg in args] # type: ignore
545
- func = _starmap # type: ignore
546
-
547
- # Prepare delayed function calls if delay_first_calls is set
548
- if delay_first_calls > 0:
549
- args = [
550
- (func, i * delay_first_calls if i < max_workers else 0, arg) # type: ignore
551
- for i, arg in enumerate(args)
552
- ]
553
- func = _delayed_call # type: ignore
554
-
555
- return desc, func, args # type: ignore
556
-