winipedia-utils 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- winipedia_utils/concurrent/concurrent.py +245 -245
- winipedia_utils/concurrent/multiprocessing.py +130 -130
- winipedia_utils/concurrent/multithreading.py +93 -93
- winipedia_utils/consts.py +21 -23
- winipedia_utils/data/__init__.py +1 -1
- winipedia_utils/data/dataframe/__init__.py +1 -1
- winipedia_utils/data/dataframe/cleaning.py +378 -378
- winipedia_utils/data/structures/__init__.py +1 -1
- winipedia_utils/data/structures/dicts.py +16 -16
- winipedia_utils/git/__init__.py +1 -1
- winipedia_utils/git/gitignore/__init__.py +1 -1
- winipedia_utils/git/gitignore/gitignore.py +136 -136
- winipedia_utils/git/pre_commit/__init__.py +1 -1
- winipedia_utils/git/pre_commit/config.py +70 -70
- winipedia_utils/git/pre_commit/hooks.py +109 -109
- winipedia_utils/git/pre_commit/run_hooks.py +49 -49
- winipedia_utils/iterating/__init__.py +1 -1
- winipedia_utils/iterating/iterate.py +29 -29
- winipedia_utils/logging/ansi.py +6 -6
- winipedia_utils/logging/config.py +64 -64
- winipedia_utils/logging/logger.py +26 -26
- winipedia_utils/modules/class_.py +119 -119
- winipedia_utils/modules/function.py +101 -101
- winipedia_utils/modules/module.py +379 -379
- winipedia_utils/modules/package.py +390 -390
- winipedia_utils/oop/mixins/meta.py +333 -333
- winipedia_utils/oop/mixins/mixin.py +37 -37
- winipedia_utils/os/__init__.py +1 -1
- winipedia_utils/os/os.py +63 -63
- winipedia_utils/projects/__init__.py +1 -1
- winipedia_utils/projects/poetry/__init__.py +1 -1
- winipedia_utils/projects/poetry/config.py +91 -91
- winipedia_utils/projects/poetry/poetry.py +31 -31
- winipedia_utils/projects/project.py +48 -48
- winipedia_utils/resources/__init__.py +1 -1
- winipedia_utils/resources/svgs/__init__.py +1 -1
- winipedia_utils/resources/svgs/download_arrow.svg +2 -2
- winipedia_utils/resources/svgs/exit_fullscreen_icon.svg +5 -5
- winipedia_utils/resources/svgs/fullscreen_icon.svg +2 -2
- winipedia_utils/resources/svgs/menu_icon.svg +3 -3
- winipedia_utils/resources/svgs/pause_icon.svg +3 -3
- winipedia_utils/resources/svgs/play_icon.svg +16 -16
- winipedia_utils/resources/svgs/plus_icon.svg +23 -23
- winipedia_utils/resources/svgs/svg.py +15 -15
- winipedia_utils/security/__init__.py +1 -1
- winipedia_utils/security/cryptography.py +29 -29
- winipedia_utils/security/keyring.py +70 -70
- winipedia_utils/setup.py +47 -47
- winipedia_utils/testing/assertions.py +23 -23
- winipedia_utils/testing/convention.py +177 -177
- winipedia_utils/testing/create_tests.py +291 -291
- winipedia_utils/testing/fixtures.py +28 -28
- winipedia_utils/testing/tests/base/fixtures/__init__.py +1 -1
- winipedia_utils/testing/tests/base/fixtures/fixture.py +6 -6
- winipedia_utils/testing/tests/base/fixtures/scopes/class_.py +33 -33
- winipedia_utils/testing/tests/base/fixtures/scopes/function.py +7 -7
- winipedia_utils/testing/tests/base/fixtures/scopes/module.py +31 -31
- winipedia_utils/testing/tests/base/fixtures/scopes/package.py +7 -7
- winipedia_utils/testing/tests/base/fixtures/scopes/session.py +312 -312
- winipedia_utils/testing/tests/base/utils/utils.py +82 -82
- winipedia_utils/testing/tests/conftest.py +32 -32
- winipedia_utils/text/string.py +126 -126
- {winipedia_utils-0.2.0.dist-info → winipedia_utils-0.2.2.dist-info}/METADATA +3 -5
- winipedia_utils-0.2.2.dist-info/RECORD +80 -0
- {winipedia_utils-0.2.0.dist-info → winipedia_utils-0.2.2.dist-info}/licenses/LICENSE +21 -21
- winipedia_utils/django/__init__.py +0 -24
- winipedia_utils/django/bulk.py +0 -538
- winipedia_utils/django/command.py +0 -334
- winipedia_utils/django/database.py +0 -289
- winipedia_utils/pyside/__init__.py +0 -1
- winipedia_utils/pyside/core/__init__.py +0 -1
- winipedia_utils/pyside/core/py_qiodevice.py +0 -476
- winipedia_utils/pyside/ui/__init__.py +0 -1
- winipedia_utils/pyside/ui/base/__init__.py +0 -1
- winipedia_utils/pyside/ui/base/base.py +0 -180
- winipedia_utils/pyside/ui/pages/__init__.py +0 -1
- winipedia_utils/pyside/ui/pages/base/__init__.py +0 -1
- winipedia_utils/pyside/ui/pages/base/base.py +0 -92
- winipedia_utils/pyside/ui/pages/browser.py +0 -26
- winipedia_utils/pyside/ui/pages/player.py +0 -85
- winipedia_utils/pyside/ui/widgets/__init__.py +0 -1
- winipedia_utils/pyside/ui/widgets/browser.py +0 -243
- winipedia_utils/pyside/ui/widgets/clickable_widget.py +0 -57
- winipedia_utils/pyside/ui/widgets/media_player.py +0 -430
- winipedia_utils/pyside/ui/widgets/notification.py +0 -78
- winipedia_utils/pyside/ui/windows/__init__.py +0 -1
- winipedia_utils/pyside/ui/windows/base/__init__.py +0 -1
- winipedia_utils/pyside/ui/windows/base/base.py +0 -49
- winipedia_utils-0.2.0.dist-info/RECORD +0 -103
- {winipedia_utils-0.2.0.dist-info → winipedia_utils-0.2.2.dist-info}/WHEEL +0 -0
|
@@ -1,245 +1,245 @@
|
|
|
1
|
-
"""Concurrent processing utilities for parallel execution.
|
|
2
|
-
|
|
3
|
-
This module provides functions for concurrent processing using both multiprocessing
|
|
4
|
-
and multithreading approaches. It includes utilities for handling timeouts,
|
|
5
|
-
managing process pools, and organizing parallel execution of functions.
|
|
6
|
-
|
|
7
|
-
Returns:
|
|
8
|
-
Various utility functions for concurrent processing.
|
|
9
|
-
|
|
10
|
-
"""
|
|
11
|
-
|
|
12
|
-
import multiprocessing
|
|
13
|
-
import os
|
|
14
|
-
import threading
|
|
15
|
-
from collections.abc import Callable, Generator, Iterable
|
|
16
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
17
|
-
from copy import deepcopy
|
|
18
|
-
from functools import partial
|
|
19
|
-
from typing import TYPE_CHECKING, Any, cast
|
|
20
|
-
|
|
21
|
-
from tqdm import tqdm
|
|
22
|
-
|
|
23
|
-
from winipedia_utils.concurrent.multiprocessing import get_spwan_pool
|
|
24
|
-
from winipedia_utils.concurrent.multithreading import imap_unordered
|
|
25
|
-
from winipedia_utils.iterating.iterate import get_len_with_default
|
|
26
|
-
from winipedia_utils.logging.logger import get_logger
|
|
27
|
-
|
|
28
|
-
if TYPE_CHECKING:
|
|
29
|
-
from multiprocessing.pool import Pool
|
|
30
|
-
|
|
31
|
-
logger = get_logger(__name__)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def get_order_and_func_result(
|
|
35
|
-
func_order_args: tuple[Any, ...],
|
|
36
|
-
) -> tuple[int, Any]:
|
|
37
|
-
"""Process function for imap with arguments unpacking.
|
|
38
|
-
|
|
39
|
-
Helper function that gives back a function that can be used with imap_unordered
|
|
40
|
-
to execute a function with arguments unpacking.
|
|
41
|
-
|
|
42
|
-
Args:
|
|
43
|
-
func_order_args: Tuple containing the function to be executed,
|
|
44
|
-
the order index, and the arguments for the function
|
|
45
|
-
|
|
46
|
-
Returns:
|
|
47
|
-
A tuple containing the order index and the result of the function execution
|
|
48
|
-
|
|
49
|
-
"""
|
|
50
|
-
function, order, *args = func_order_args
|
|
51
|
-
return order, function(*args)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def generate_process_args(
|
|
55
|
-
*,
|
|
56
|
-
process_function: Callable[..., Any],
|
|
57
|
-
process_args: Iterable[Iterable[Any]],
|
|
58
|
-
process_args_static: Iterable[Any] | None = None,
|
|
59
|
-
deepcopy_static_args: Iterable[Any] | None = None,
|
|
60
|
-
) -> Generator[tuple[Any, ...], None, None]:
|
|
61
|
-
"""Prepare arguments for multiprocessing or multithreading execution.
|
|
62
|
-
|
|
63
|
-
Converts input arguments into a format suitable for parallel processing,
|
|
64
|
-
organizing them for efficient unpacking during execution. The function:
|
|
65
|
-
1. Prepends process func and order indices to arguments
|
|
66
|
-
2. Handles static arguments (with optional deep copying)
|
|
67
|
-
3. Restructures arguments into tuples for unpacking
|
|
68
|
-
|
|
69
|
-
Args:
|
|
70
|
-
process_function: Function to be executed
|
|
71
|
-
process_args: Iterable of argument lists for each parallel call
|
|
72
|
-
process_args_static: Optional constant arguments to add to each call
|
|
73
|
-
deepcopy_static_args: Optional constant arguments that should be deep-copied
|
|
74
|
-
|
|
75
|
-
Returns:
|
|
76
|
-
A Genrator that yields one args tuple for each function call
|
|
77
|
-
First is the process function
|
|
78
|
-
Second item in the tuple is the order index
|
|
79
|
-
Second item in the tuple is the function
|
|
80
|
-
Rest of the items are the arguments for the function
|
|
81
|
-
The length of the generator
|
|
82
|
-
"""
|
|
83
|
-
process_args_static = (
|
|
84
|
-
() if process_args_static is None else tuple(process_args_static)
|
|
85
|
-
)
|
|
86
|
-
deepcopy_static_args = (
|
|
87
|
-
() if deepcopy_static_args is None else tuple(deepcopy_static_args)
|
|
88
|
-
)
|
|
89
|
-
for order, process_arg in enumerate(process_args):
|
|
90
|
-
yield (
|
|
91
|
-
process_function,
|
|
92
|
-
order,
|
|
93
|
-
*process_arg,
|
|
94
|
-
*process_args_static,
|
|
95
|
-
*(
|
|
96
|
-
deepcopy(deepcopy_static_arg)
|
|
97
|
-
for deepcopy_static_arg in deepcopy_static_args
|
|
98
|
-
),
|
|
99
|
-
)
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
def get_multiprocess_results_with_tqdm(
|
|
103
|
-
results: Iterable[Any],
|
|
104
|
-
process_func: Callable[..., Any],
|
|
105
|
-
process_args_len: int,
|
|
106
|
-
*,
|
|
107
|
-
threads: bool,
|
|
108
|
-
) -> list[Any]:
|
|
109
|
-
"""Get multiprocess results with tqdm progress tracking.
|
|
110
|
-
|
|
111
|
-
Processes results from parallel execution with a progress bar and ensures
|
|
112
|
-
they are returned in the original order.
|
|
113
|
-
|
|
114
|
-
Args:
|
|
115
|
-
results: Iterable of results from parallel execution
|
|
116
|
-
process_func: Function that was executed in parallel
|
|
117
|
-
process_args_len: Number of items to process in parallel
|
|
118
|
-
threads: Whether threading (True) or multiprocessing (False) was used
|
|
119
|
-
|
|
120
|
-
Returns:
|
|
121
|
-
list[Any]: Results from parallel execution in original order
|
|
122
|
-
|
|
123
|
-
"""
|
|
124
|
-
results = tqdm(
|
|
125
|
-
results,
|
|
126
|
-
total=process_args_len,
|
|
127
|
-
desc=f"Multi{'threading' if threads else 'processing'} {process_func.__name__}",
|
|
128
|
-
unit=f" {'threads' if threads else 'processes'}",
|
|
129
|
-
)
|
|
130
|
-
results_list = list(results)
|
|
131
|
-
# results list is a tuple of (order, result),
|
|
132
|
-
# so we need to sort it by order to get the original order
|
|
133
|
-
results_list = sorted(results_list, key=lambda x: x[0])
|
|
134
|
-
# now extract the results from the tuple
|
|
135
|
-
return [result[1] for result in results_list]
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
def find_max_pools(
|
|
139
|
-
*,
|
|
140
|
-
threads: bool,
|
|
141
|
-
process_args_len: int | None = None,
|
|
142
|
-
) -> int:
|
|
143
|
-
"""Find optimal number of worker processes or threads for parallel execution.
|
|
144
|
-
|
|
145
|
-
Determines the maximum number of worker processes or threads based on system
|
|
146
|
-
resources, active tasks, and the number of items to process.
|
|
147
|
-
|
|
148
|
-
Args:
|
|
149
|
-
threads: Whether to use threading (True) or multiprocessing (False)
|
|
150
|
-
process_args_len: Number of items to process in parallel
|
|
151
|
-
|
|
152
|
-
Returns:
|
|
153
|
-
int: Maximum number of worker processes or threads to use
|
|
154
|
-
|
|
155
|
-
"""
|
|
156
|
-
# use tee to find length of process_args
|
|
157
|
-
cpu_count = os.cpu_count() or 1
|
|
158
|
-
if threads:
|
|
159
|
-
active_tasks = threading.active_count()
|
|
160
|
-
max_tasks = cpu_count * 4
|
|
161
|
-
else:
|
|
162
|
-
active_tasks = len(multiprocessing.active_children())
|
|
163
|
-
max_tasks = cpu_count
|
|
164
|
-
|
|
165
|
-
available_tasks = max_tasks - active_tasks
|
|
166
|
-
max_pools = (
|
|
167
|
-
min(available_tasks, process_args_len) if process_args_len else available_tasks
|
|
168
|
-
)
|
|
169
|
-
max_pools = max(max_pools, 1)
|
|
170
|
-
|
|
171
|
-
logger.info(
|
|
172
|
-
"Multi%s with max_pools: %s",
|
|
173
|
-
"threading" if threads else "processing",
|
|
174
|
-
max_pools,
|
|
175
|
-
)
|
|
176
|
-
|
|
177
|
-
return max_pools
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
def concurrent_loop( # noqa: PLR0913
|
|
181
|
-
*,
|
|
182
|
-
threading: bool,
|
|
183
|
-
process_function: Callable[..., Any],
|
|
184
|
-
process_args: Iterable[Iterable[Any]],
|
|
185
|
-
process_args_static: Iterable[Any] | None = None,
|
|
186
|
-
deepcopy_static_args: Iterable[Any] | None = None,
|
|
187
|
-
process_args_len: int = 1,
|
|
188
|
-
) -> list[Any]:
|
|
189
|
-
"""Execute a function concurrently with multiple arguments using a pool executor.
|
|
190
|
-
|
|
191
|
-
This function is a helper function for multiprocess_loop and multithread_loop.
|
|
192
|
-
It is not meant to be used directly.
|
|
193
|
-
|
|
194
|
-
Args:
|
|
195
|
-
threading (bool):
|
|
196
|
-
Whether to use threading (True) or multiprocessing (False)
|
|
197
|
-
pool_executor (Pool | ThreadPoolExecutor):
|
|
198
|
-
Pool executor to use for concurrent execution
|
|
199
|
-
process_function (Callable[..., Any]):
|
|
200
|
-
Function to be executed concurrently
|
|
201
|
-
process_args (Iterable[Iterable[Any]]):
|
|
202
|
-
Arguments for each process
|
|
203
|
-
process_args_static (Iterable[Any] | None, optional):
|
|
204
|
-
Static arguments to pass to each process. Defaults to None.
|
|
205
|
-
deepcopy_static_args (Iterable[Any] | None, optional):
|
|
206
|
-
Arguments that should be deep-copied for each process. Defaults to None.
|
|
207
|
-
process_args_len (int | None, optional):
|
|
208
|
-
Length of process_args. Defaults to None.
|
|
209
|
-
|
|
210
|
-
Returns:
|
|
211
|
-
list[Any]: Results from the process_function executions
|
|
212
|
-
"""
|
|
213
|
-
process_args_len = get_len_with_default(process_args, process_args_len)
|
|
214
|
-
process_args = generate_process_args(
|
|
215
|
-
process_function=process_function,
|
|
216
|
-
process_args=process_args,
|
|
217
|
-
process_args_static=process_args_static,
|
|
218
|
-
deepcopy_static_args=deepcopy_static_args,
|
|
219
|
-
)
|
|
220
|
-
max_workers = find_max_pools(threads=threading, process_args_len=process_args_len)
|
|
221
|
-
pool_executor = (
|
|
222
|
-
ThreadPoolExecutor(max_workers=max_workers)
|
|
223
|
-
if threading
|
|
224
|
-
else get_spwan_pool(processes=max_workers)
|
|
225
|
-
)
|
|
226
|
-
with pool_executor as pool:
|
|
227
|
-
map_func: Callable[[Callable[..., Any], Iterable[Any]], Any]
|
|
228
|
-
|
|
229
|
-
if process_args_len == 1:
|
|
230
|
-
map_func = map
|
|
231
|
-
elif threading:
|
|
232
|
-
pool = cast("ThreadPoolExecutor", pool)
|
|
233
|
-
map_func = partial(imap_unordered, pool)
|
|
234
|
-
else:
|
|
235
|
-
pool = cast("Pool", pool)
|
|
236
|
-
map_func = pool.imap_unordered
|
|
237
|
-
|
|
238
|
-
results = map_func(get_order_and_func_result, process_args)
|
|
239
|
-
|
|
240
|
-
return get_multiprocess_results_with_tqdm(
|
|
241
|
-
results=results,
|
|
242
|
-
process_func=process_function,
|
|
243
|
-
process_args_len=process_args_len,
|
|
244
|
-
threads=threading,
|
|
245
|
-
)
|
|
1
|
+
"""Concurrent processing utilities for parallel execution.
|
|
2
|
+
|
|
3
|
+
This module provides functions for concurrent processing using both multiprocessing
|
|
4
|
+
and multithreading approaches. It includes utilities for handling timeouts,
|
|
5
|
+
managing process pools, and organizing parallel execution of functions.
|
|
6
|
+
|
|
7
|
+
Returns:
|
|
8
|
+
Various utility functions for concurrent processing.
|
|
9
|
+
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import multiprocessing
|
|
13
|
+
import os
|
|
14
|
+
import threading
|
|
15
|
+
from collections.abc import Callable, Generator, Iterable
|
|
16
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
17
|
+
from copy import deepcopy
|
|
18
|
+
from functools import partial
|
|
19
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
20
|
+
|
|
21
|
+
from tqdm import tqdm
|
|
22
|
+
|
|
23
|
+
from winipedia_utils.concurrent.multiprocessing import get_spwan_pool
|
|
24
|
+
from winipedia_utils.concurrent.multithreading import imap_unordered
|
|
25
|
+
from winipedia_utils.iterating.iterate import get_len_with_default
|
|
26
|
+
from winipedia_utils.logging.logger import get_logger
|
|
27
|
+
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
from multiprocessing.pool import Pool
|
|
30
|
+
|
|
31
|
+
logger = get_logger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_order_and_func_result(
|
|
35
|
+
func_order_args: tuple[Any, ...],
|
|
36
|
+
) -> tuple[int, Any]:
|
|
37
|
+
"""Process function for imap with arguments unpacking.
|
|
38
|
+
|
|
39
|
+
Helper function that gives back a function that can be used with imap_unordered
|
|
40
|
+
to execute a function with arguments unpacking.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
func_order_args: Tuple containing the function to be executed,
|
|
44
|
+
the order index, and the arguments for the function
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
A tuple containing the order index and the result of the function execution
|
|
48
|
+
|
|
49
|
+
"""
|
|
50
|
+
function, order, *args = func_order_args
|
|
51
|
+
return order, function(*args)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def generate_process_args(
|
|
55
|
+
*,
|
|
56
|
+
process_function: Callable[..., Any],
|
|
57
|
+
process_args: Iterable[Iterable[Any]],
|
|
58
|
+
process_args_static: Iterable[Any] | None = None,
|
|
59
|
+
deepcopy_static_args: Iterable[Any] | None = None,
|
|
60
|
+
) -> Generator[tuple[Any, ...], None, None]:
|
|
61
|
+
"""Prepare arguments for multiprocessing or multithreading execution.
|
|
62
|
+
|
|
63
|
+
Converts input arguments into a format suitable for parallel processing,
|
|
64
|
+
organizing them for efficient unpacking during execution. The function:
|
|
65
|
+
1. Prepends process func and order indices to arguments
|
|
66
|
+
2. Handles static arguments (with optional deep copying)
|
|
67
|
+
3. Restructures arguments into tuples for unpacking
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
process_function: Function to be executed
|
|
71
|
+
process_args: Iterable of argument lists for each parallel call
|
|
72
|
+
process_args_static: Optional constant arguments to add to each call
|
|
73
|
+
deepcopy_static_args: Optional constant arguments that should be deep-copied
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
A Genrator that yields one args tuple for each function call
|
|
77
|
+
First is the process function
|
|
78
|
+
Second item in the tuple is the order index
|
|
79
|
+
Second item in the tuple is the function
|
|
80
|
+
Rest of the items are the arguments for the function
|
|
81
|
+
The length of the generator
|
|
82
|
+
"""
|
|
83
|
+
process_args_static = (
|
|
84
|
+
() if process_args_static is None else tuple(process_args_static)
|
|
85
|
+
)
|
|
86
|
+
deepcopy_static_args = (
|
|
87
|
+
() if deepcopy_static_args is None else tuple(deepcopy_static_args)
|
|
88
|
+
)
|
|
89
|
+
for order, process_arg in enumerate(process_args):
|
|
90
|
+
yield (
|
|
91
|
+
process_function,
|
|
92
|
+
order,
|
|
93
|
+
*process_arg,
|
|
94
|
+
*process_args_static,
|
|
95
|
+
*(
|
|
96
|
+
deepcopy(deepcopy_static_arg)
|
|
97
|
+
for deepcopy_static_arg in deepcopy_static_args
|
|
98
|
+
),
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def get_multiprocess_results_with_tqdm(
|
|
103
|
+
results: Iterable[Any],
|
|
104
|
+
process_func: Callable[..., Any],
|
|
105
|
+
process_args_len: int,
|
|
106
|
+
*,
|
|
107
|
+
threads: bool,
|
|
108
|
+
) -> list[Any]:
|
|
109
|
+
"""Get multiprocess results with tqdm progress tracking.
|
|
110
|
+
|
|
111
|
+
Processes results from parallel execution with a progress bar and ensures
|
|
112
|
+
they are returned in the original order.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
results: Iterable of results from parallel execution
|
|
116
|
+
process_func: Function that was executed in parallel
|
|
117
|
+
process_args_len: Number of items to process in parallel
|
|
118
|
+
threads: Whether threading (True) or multiprocessing (False) was used
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
list[Any]: Results from parallel execution in original order
|
|
122
|
+
|
|
123
|
+
"""
|
|
124
|
+
results = tqdm(
|
|
125
|
+
results,
|
|
126
|
+
total=process_args_len,
|
|
127
|
+
desc=f"Multi{'threading' if threads else 'processing'} {process_func.__name__}",
|
|
128
|
+
unit=f" {'threads' if threads else 'processes'}",
|
|
129
|
+
)
|
|
130
|
+
results_list = list(results)
|
|
131
|
+
# results list is a tuple of (order, result),
|
|
132
|
+
# so we need to sort it by order to get the original order
|
|
133
|
+
results_list = sorted(results_list, key=lambda x: x[0])
|
|
134
|
+
# now extract the results from the tuple
|
|
135
|
+
return [result[1] for result in results_list]
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def find_max_pools(
|
|
139
|
+
*,
|
|
140
|
+
threads: bool,
|
|
141
|
+
process_args_len: int | None = None,
|
|
142
|
+
) -> int:
|
|
143
|
+
"""Find optimal number of worker processes or threads for parallel execution.
|
|
144
|
+
|
|
145
|
+
Determines the maximum number of worker processes or threads based on system
|
|
146
|
+
resources, active tasks, and the number of items to process.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
threads: Whether to use threading (True) or multiprocessing (False)
|
|
150
|
+
process_args_len: Number of items to process in parallel
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
int: Maximum number of worker processes or threads to use
|
|
154
|
+
|
|
155
|
+
"""
|
|
156
|
+
# use tee to find length of process_args
|
|
157
|
+
cpu_count = os.cpu_count() or 1
|
|
158
|
+
if threads:
|
|
159
|
+
active_tasks = threading.active_count()
|
|
160
|
+
max_tasks = cpu_count * 4
|
|
161
|
+
else:
|
|
162
|
+
active_tasks = len(multiprocessing.active_children())
|
|
163
|
+
max_tasks = cpu_count
|
|
164
|
+
|
|
165
|
+
available_tasks = max_tasks - active_tasks
|
|
166
|
+
max_pools = (
|
|
167
|
+
min(available_tasks, process_args_len) if process_args_len else available_tasks
|
|
168
|
+
)
|
|
169
|
+
max_pools = max(max_pools, 1)
|
|
170
|
+
|
|
171
|
+
logger.info(
|
|
172
|
+
"Multi%s with max_pools: %s",
|
|
173
|
+
"threading" if threads else "processing",
|
|
174
|
+
max_pools,
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
return max_pools
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def concurrent_loop( # noqa: PLR0913
|
|
181
|
+
*,
|
|
182
|
+
threading: bool,
|
|
183
|
+
process_function: Callable[..., Any],
|
|
184
|
+
process_args: Iterable[Iterable[Any]],
|
|
185
|
+
process_args_static: Iterable[Any] | None = None,
|
|
186
|
+
deepcopy_static_args: Iterable[Any] | None = None,
|
|
187
|
+
process_args_len: int = 1,
|
|
188
|
+
) -> list[Any]:
|
|
189
|
+
"""Execute a function concurrently with multiple arguments using a pool executor.
|
|
190
|
+
|
|
191
|
+
This function is a helper function for multiprocess_loop and multithread_loop.
|
|
192
|
+
It is not meant to be used directly.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
threading (bool):
|
|
196
|
+
Whether to use threading (True) or multiprocessing (False)
|
|
197
|
+
pool_executor (Pool | ThreadPoolExecutor):
|
|
198
|
+
Pool executor to use for concurrent execution
|
|
199
|
+
process_function (Callable[..., Any]):
|
|
200
|
+
Function to be executed concurrently
|
|
201
|
+
process_args (Iterable[Iterable[Any]]):
|
|
202
|
+
Arguments for each process
|
|
203
|
+
process_args_static (Iterable[Any] | None, optional):
|
|
204
|
+
Static arguments to pass to each process. Defaults to None.
|
|
205
|
+
deepcopy_static_args (Iterable[Any] | None, optional):
|
|
206
|
+
Arguments that should be deep-copied for each process. Defaults to None.
|
|
207
|
+
process_args_len (int | None, optional):
|
|
208
|
+
Length of process_args. Defaults to None.
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
list[Any]: Results from the process_function executions
|
|
212
|
+
"""
|
|
213
|
+
process_args_len = get_len_with_default(process_args, process_args_len)
|
|
214
|
+
process_args = generate_process_args(
|
|
215
|
+
process_function=process_function,
|
|
216
|
+
process_args=process_args,
|
|
217
|
+
process_args_static=process_args_static,
|
|
218
|
+
deepcopy_static_args=deepcopy_static_args,
|
|
219
|
+
)
|
|
220
|
+
max_workers = find_max_pools(threads=threading, process_args_len=process_args_len)
|
|
221
|
+
pool_executor = (
|
|
222
|
+
ThreadPoolExecutor(max_workers=max_workers)
|
|
223
|
+
if threading
|
|
224
|
+
else get_spwan_pool(processes=max_workers)
|
|
225
|
+
)
|
|
226
|
+
with pool_executor as pool:
|
|
227
|
+
map_func: Callable[[Callable[..., Any], Iterable[Any]], Any]
|
|
228
|
+
|
|
229
|
+
if process_args_len == 1:
|
|
230
|
+
map_func = map
|
|
231
|
+
elif threading:
|
|
232
|
+
pool = cast("ThreadPoolExecutor", pool)
|
|
233
|
+
map_func = partial(imap_unordered, pool)
|
|
234
|
+
else:
|
|
235
|
+
pool = cast("Pool", pool)
|
|
236
|
+
map_func = pool.imap_unordered
|
|
237
|
+
|
|
238
|
+
results = map_func(get_order_and_func_result, process_args)
|
|
239
|
+
|
|
240
|
+
return get_multiprocess_results_with_tqdm(
|
|
241
|
+
results=results,
|
|
242
|
+
process_func=process_function,
|
|
243
|
+
process_args_len=process_args_len,
|
|
244
|
+
threads=threading,
|
|
245
|
+
)
|