winipedia-utils 0.2.9__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of winipedia-utils might be problematic. Click here for more details.
- winipedia_utils/concurrent/concurrent.py +245 -245
- winipedia_utils/concurrent/multiprocessing.py +130 -130
- winipedia_utils/concurrent/multithreading.py +93 -93
- winipedia_utils/consts.py +21 -21
- winipedia_utils/data/__init__.py +1 -1
- winipedia_utils/data/dataframe/__init__.py +1 -1
- winipedia_utils/data/dataframe/cleaning.py +378 -378
- winipedia_utils/data/structures/__init__.py +1 -1
- winipedia_utils/data/structures/dicts.py +16 -16
- winipedia_utils/git/__init__.py +1 -1
- winipedia_utils/git/gitignore/__init__.py +1 -1
- winipedia_utils/git/gitignore/gitignore.py +136 -136
- winipedia_utils/git/pre_commit/__init__.py +1 -1
- winipedia_utils/git/pre_commit/config.py +70 -70
- winipedia_utils/git/pre_commit/hooks.py +127 -109
- winipedia_utils/git/pre_commit/run_hooks.py +49 -49
- winipedia_utils/iterating/__init__.py +1 -1
- winipedia_utils/iterating/iterate.py +29 -29
- winipedia_utils/logging/ansi.py +6 -6
- winipedia_utils/logging/config.py +64 -64
- winipedia_utils/logging/logger.py +26 -26
- winipedia_utils/modules/class_.py +119 -119
- winipedia_utils/modules/function.py +101 -101
- winipedia_utils/modules/module.py +379 -379
- winipedia_utils/modules/package.py +390 -390
- winipedia_utils/oop/mixins/meta.py +333 -333
- winipedia_utils/oop/mixins/mixin.py +37 -37
- winipedia_utils/os/__init__.py +1 -1
- winipedia_utils/os/os.py +63 -63
- winipedia_utils/projects/__init__.py +1 -1
- winipedia_utils/projects/poetry/__init__.py +1 -1
- winipedia_utils/projects/poetry/config.py +117 -117
- winipedia_utils/projects/poetry/poetry.py +31 -31
- winipedia_utils/projects/project.py +48 -48
- winipedia_utils/resources/__init__.py +1 -1
- winipedia_utils/resources/svgs/__init__.py +1 -1
- winipedia_utils/resources/svgs/download_arrow.svg +2 -2
- winipedia_utils/resources/svgs/exit_fullscreen_icon.svg +5 -5
- winipedia_utils/resources/svgs/fullscreen_icon.svg +2 -2
- winipedia_utils/resources/svgs/menu_icon.svg +3 -3
- winipedia_utils/resources/svgs/pause_icon.svg +3 -3
- winipedia_utils/resources/svgs/play_icon.svg +16 -16
- winipedia_utils/resources/svgs/plus_icon.svg +23 -23
- winipedia_utils/resources/svgs/svg.py +15 -15
- winipedia_utils/security/__init__.py +1 -1
- winipedia_utils/security/cryptography.py +29 -29
- winipedia_utils/security/keyring.py +70 -70
- winipedia_utils/setup.py +47 -47
- winipedia_utils/testing/assertions.py +23 -23
- winipedia_utils/testing/convention.py +177 -177
- winipedia_utils/testing/create_tests.py +297 -297
- winipedia_utils/testing/fixtures.py +28 -28
- winipedia_utils/testing/tests/base/fixtures/__init__.py +1 -1
- winipedia_utils/testing/tests/base/fixtures/fixture.py +6 -6
- winipedia_utils/testing/tests/base/fixtures/scopes/class_.py +33 -33
- winipedia_utils/testing/tests/base/fixtures/scopes/function.py +7 -7
- winipedia_utils/testing/tests/base/fixtures/scopes/module.py +33 -31
- winipedia_utils/testing/tests/base/fixtures/scopes/package.py +7 -7
- winipedia_utils/testing/tests/base/fixtures/scopes/session.py +296 -296
- winipedia_utils/testing/tests/base/utils/utils.py +111 -114
- winipedia_utils/testing/tests/conftest.py +32 -32
- winipedia_utils/text/string.py +126 -126
- winipedia_utils-0.2.17.dist-info/METADATA +716 -0
- winipedia_utils-0.2.17.dist-info/RECORD +80 -0
- {winipedia_utils-0.2.9.dist-info → winipedia_utils-0.2.17.dist-info}/licenses/LICENSE +21 -21
- winipedia_utils/testing/tests/test_0.py +0 -12
- winipedia_utils-0.2.9.dist-info/METADATA +0 -355
- winipedia_utils-0.2.9.dist-info/RECORD +0 -81
- {winipedia_utils-0.2.9.dist-info → winipedia_utils-0.2.17.dist-info}/WHEEL +0 -0
|
@@ -1,130 +1,130 @@
|
|
|
1
|
-
"""Multiprocessing utilities for concurrent execution.
|
|
2
|
-
|
|
3
|
-
This module provides functions for parallel processing using both multiprocessing
|
|
4
|
-
and multithreading approaches. It includes utilities for handling timeouts,
|
|
5
|
-
managing process pools, and organizing parallel execution of functions.
|
|
6
|
-
|
|
7
|
-
Returns:
|
|
8
|
-
Various utility functions for concurrent processing.
|
|
9
|
-
|
|
10
|
-
"""
|
|
11
|
-
|
|
12
|
-
import multiprocessing
|
|
13
|
-
from collections.abc import Callable, Iterable
|
|
14
|
-
from functools import wraps
|
|
15
|
-
from multiprocessing.pool import Pool
|
|
16
|
-
from typing import Any
|
|
17
|
-
|
|
18
|
-
from winipedia_utils.logging.logger import get_logger
|
|
19
|
-
|
|
20
|
-
logger = get_logger(__name__)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def get_spwan_pool(*args: Any, **kwargs: Any) -> Pool:
|
|
24
|
-
"""Get a multiprocessing pool with the spawn context.
|
|
25
|
-
|
|
26
|
-
Args:
|
|
27
|
-
*args: Positional arguments to pass to the Pool constructor
|
|
28
|
-
**kwargs: Keyword arguments to pass to the Pool constructor
|
|
29
|
-
|
|
30
|
-
Returns:
|
|
31
|
-
A multiprocessing pool with the spawn context
|
|
32
|
-
|
|
33
|
-
"""
|
|
34
|
-
return multiprocessing.get_context("spawn").Pool(*args, **kwargs)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def cancel_on_timeout(seconds: float, message: str) -> Callable[..., Any]:
|
|
38
|
-
"""Cancel a function execution if it exceeds a specified timeout.
|
|
39
|
-
|
|
40
|
-
Creates a wrapper that executes the decorated function in a separate process
|
|
41
|
-
and terminates it if execution time exceeds the specified timeout.
|
|
42
|
-
|
|
43
|
-
Args:
|
|
44
|
-
seconds: Maximum execution time in seconds before timeout
|
|
45
|
-
message: Error message to include in the raised TimeoutError
|
|
46
|
-
|
|
47
|
-
Returns:
|
|
48
|
-
A decorator function that wraps the target function with timeout functionality
|
|
49
|
-
|
|
50
|
-
Raises:
|
|
51
|
-
multiprocessing.TimeoutError: When function execution exceeds the timeout
|
|
52
|
-
|
|
53
|
-
Note:
|
|
54
|
-
Only works with functions that are pickle-able.
|
|
55
|
-
This means it may not work as a decorator.
|
|
56
|
-
Instaed you should use it as a wrapper function.
|
|
57
|
-
Like this:
|
|
58
|
-
my_func = cancel_on_timeout(seconds=2, message="Test timeout")(my_func)
|
|
59
|
-
|
|
60
|
-
"""
|
|
61
|
-
|
|
62
|
-
def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
|
|
63
|
-
@wraps(func)
|
|
64
|
-
def wrapper(*args: object, **kwargs: object) -> object:
|
|
65
|
-
spawn_pool = get_spwan_pool(processes=1)
|
|
66
|
-
with spawn_pool as pool:
|
|
67
|
-
async_result = pool.apply_async(func, args, kwargs)
|
|
68
|
-
try:
|
|
69
|
-
return async_result.get(timeout=seconds)
|
|
70
|
-
except multiprocessing.TimeoutError:
|
|
71
|
-
logger.warning(
|
|
72
|
-
"%s -> Execution exceeded %s seconds: %s",
|
|
73
|
-
func.__name__,
|
|
74
|
-
seconds,
|
|
75
|
-
message,
|
|
76
|
-
)
|
|
77
|
-
raise
|
|
78
|
-
finally:
|
|
79
|
-
pool.terminate() # Ensure the worker process is killed
|
|
80
|
-
pool.join() # Wait for cleanup
|
|
81
|
-
|
|
82
|
-
return wrapper
|
|
83
|
-
|
|
84
|
-
return decorator
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
def multiprocess_loop(
|
|
88
|
-
process_function: Callable[..., Any],
|
|
89
|
-
process_args: Iterable[Iterable[Any]],
|
|
90
|
-
process_args_static: Iterable[Any] | None = None,
|
|
91
|
-
deepcopy_static_args: Iterable[Any] | None = None,
|
|
92
|
-
process_args_len: int = 1,
|
|
93
|
-
) -> list[Any]:
|
|
94
|
-
"""Process a loop using multiprocessing Pool for parallel execution.
|
|
95
|
-
|
|
96
|
-
Executes the given process_function with the provided arguments in parallel using
|
|
97
|
-
multiprocessing Pool, which is suitable for CPU-bound tasks.
|
|
98
|
-
|
|
99
|
-
Args:
|
|
100
|
-
process_function: Function that processes the given process_args
|
|
101
|
-
process_args: List of args to be processed by the process_function
|
|
102
|
-
e.g. [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
|
|
103
|
-
process_args_static: Optional constant arguments passed to each function call
|
|
104
|
-
deepcopy_static_args: Optional arguments that should be
|
|
105
|
-
deep-copied for each process
|
|
106
|
-
process_args_len: Optional length of process_args
|
|
107
|
-
If not provided, it will ot be taken into account
|
|
108
|
-
when calculating the max number of processes.
|
|
109
|
-
|
|
110
|
-
Returns:
|
|
111
|
-
List of results from the process_function executions
|
|
112
|
-
|
|
113
|
-
Note:
|
|
114
|
-
Pool is used for CPU-bound tasks as it bypasses
|
|
115
|
-
Python's GIL by creating separate processes.
|
|
116
|
-
Multiprocessing is not safe for mutable objects unlike ThreadPoolExecutor.
|
|
117
|
-
When debugging, if ConnectionErrors occur, set max_processes to 1.
|
|
118
|
-
Also given functions must be pickle-able.
|
|
119
|
-
|
|
120
|
-
"""
|
|
121
|
-
from winipedia_utils.concurrent.concurrent import concurrent_loop
|
|
122
|
-
|
|
123
|
-
return concurrent_loop(
|
|
124
|
-
threading=False,
|
|
125
|
-
process_function=process_function,
|
|
126
|
-
process_args=process_args,
|
|
127
|
-
process_args_static=process_args_static,
|
|
128
|
-
deepcopy_static_args=deepcopy_static_args,
|
|
129
|
-
process_args_len=process_args_len,
|
|
130
|
-
)
|
|
1
|
+
"""Multiprocessing utilities for concurrent execution.
|
|
2
|
+
|
|
3
|
+
This module provides functions for parallel processing using both multiprocessing
|
|
4
|
+
and multithreading approaches. It includes utilities for handling timeouts,
|
|
5
|
+
managing process pools, and organizing parallel execution of functions.
|
|
6
|
+
|
|
7
|
+
Returns:
|
|
8
|
+
Various utility functions for concurrent processing.
|
|
9
|
+
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import multiprocessing
|
|
13
|
+
from collections.abc import Callable, Iterable
|
|
14
|
+
from functools import wraps
|
|
15
|
+
from multiprocessing.pool import Pool
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
from winipedia_utils.logging.logger import get_logger
|
|
19
|
+
|
|
20
|
+
logger = get_logger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_spwan_pool(*args: Any, **kwargs: Any) -> Pool:
|
|
24
|
+
"""Get a multiprocessing pool with the spawn context.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
*args: Positional arguments to pass to the Pool constructor
|
|
28
|
+
**kwargs: Keyword arguments to pass to the Pool constructor
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
A multiprocessing pool with the spawn context
|
|
32
|
+
|
|
33
|
+
"""
|
|
34
|
+
return multiprocessing.get_context("spawn").Pool(*args, **kwargs)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def cancel_on_timeout(seconds: float, message: str) -> Callable[..., Any]:
|
|
38
|
+
"""Cancel a function execution if it exceeds a specified timeout.
|
|
39
|
+
|
|
40
|
+
Creates a wrapper that executes the decorated function in a separate process
|
|
41
|
+
and terminates it if execution time exceeds the specified timeout.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
seconds: Maximum execution time in seconds before timeout
|
|
45
|
+
message: Error message to include in the raised TimeoutError
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
A decorator function that wraps the target function with timeout functionality
|
|
49
|
+
|
|
50
|
+
Raises:
|
|
51
|
+
multiprocessing.TimeoutError: When function execution exceeds the timeout
|
|
52
|
+
|
|
53
|
+
Note:
|
|
54
|
+
Only works with functions that are pickle-able.
|
|
55
|
+
This means it may not work as a decorator.
|
|
56
|
+
Instaed you should use it as a wrapper function.
|
|
57
|
+
Like this:
|
|
58
|
+
my_func = cancel_on_timeout(seconds=2, message="Test timeout")(my_func)
|
|
59
|
+
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
|
|
63
|
+
@wraps(func)
|
|
64
|
+
def wrapper(*args: object, **kwargs: object) -> object:
|
|
65
|
+
spawn_pool = get_spwan_pool(processes=1)
|
|
66
|
+
with spawn_pool as pool:
|
|
67
|
+
async_result = pool.apply_async(func, args, kwargs)
|
|
68
|
+
try:
|
|
69
|
+
return async_result.get(timeout=seconds)
|
|
70
|
+
except multiprocessing.TimeoutError:
|
|
71
|
+
logger.warning(
|
|
72
|
+
"%s -> Execution exceeded %s seconds: %s",
|
|
73
|
+
func.__name__,
|
|
74
|
+
seconds,
|
|
75
|
+
message,
|
|
76
|
+
)
|
|
77
|
+
raise
|
|
78
|
+
finally:
|
|
79
|
+
pool.terminate() # Ensure the worker process is killed
|
|
80
|
+
pool.join() # Wait for cleanup
|
|
81
|
+
|
|
82
|
+
return wrapper
|
|
83
|
+
|
|
84
|
+
return decorator
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def multiprocess_loop(
|
|
88
|
+
process_function: Callable[..., Any],
|
|
89
|
+
process_args: Iterable[Iterable[Any]],
|
|
90
|
+
process_args_static: Iterable[Any] | None = None,
|
|
91
|
+
deepcopy_static_args: Iterable[Any] | None = None,
|
|
92
|
+
process_args_len: int = 1,
|
|
93
|
+
) -> list[Any]:
|
|
94
|
+
"""Process a loop using multiprocessing Pool for parallel execution.
|
|
95
|
+
|
|
96
|
+
Executes the given process_function with the provided arguments in parallel using
|
|
97
|
+
multiprocessing Pool, which is suitable for CPU-bound tasks.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
process_function: Function that processes the given process_args
|
|
101
|
+
process_args: List of args to be processed by the process_function
|
|
102
|
+
e.g. [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
|
|
103
|
+
process_args_static: Optional constant arguments passed to each function call
|
|
104
|
+
deepcopy_static_args: Optional arguments that should be
|
|
105
|
+
deep-copied for each process
|
|
106
|
+
process_args_len: Optional length of process_args
|
|
107
|
+
If not provided, it will ot be taken into account
|
|
108
|
+
when calculating the max number of processes.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
List of results from the process_function executions
|
|
112
|
+
|
|
113
|
+
Note:
|
|
114
|
+
Pool is used for CPU-bound tasks as it bypasses
|
|
115
|
+
Python's GIL by creating separate processes.
|
|
116
|
+
Multiprocessing is not safe for mutable objects unlike ThreadPoolExecutor.
|
|
117
|
+
When debugging, if ConnectionErrors occur, set max_processes to 1.
|
|
118
|
+
Also given functions must be pickle-able.
|
|
119
|
+
|
|
120
|
+
"""
|
|
121
|
+
from winipedia_utils.concurrent.concurrent import concurrent_loop
|
|
122
|
+
|
|
123
|
+
return concurrent_loop(
|
|
124
|
+
threading=False,
|
|
125
|
+
process_function=process_function,
|
|
126
|
+
process_args=process_args,
|
|
127
|
+
process_args_static=process_args_static,
|
|
128
|
+
deepcopy_static_args=deepcopy_static_args,
|
|
129
|
+
process_args_len=process_args_len,
|
|
130
|
+
)
|
|
@@ -1,93 +1,93 @@
|
|
|
1
|
-
"""Multithreading utilities for concurrent execution.
|
|
2
|
-
|
|
3
|
-
This module provides functions for parallel processing using thread pools.
|
|
4
|
-
It includes utilities for handling thread pools, managing futures, and organizing
|
|
5
|
-
parallel execution of I/O-bound tasks.
|
|
6
|
-
Base helper functions that serve threading and processing are located in the
|
|
7
|
-
multiprocessing module.
|
|
8
|
-
|
|
9
|
-
Returns:
|
|
10
|
-
Various utility functions for multithreaded processing.
|
|
11
|
-
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
from collections.abc import Callable, Generator, Iterable
|
|
15
|
-
from concurrent.futures import Future, ThreadPoolExecutor, as_completed
|
|
16
|
-
from typing import Any
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def get_future_results_as_completed(
|
|
20
|
-
futures: Iterable[Future[Any]],
|
|
21
|
-
) -> Generator[Any, None, None]:
|
|
22
|
-
"""Get future results as they complete.
|
|
23
|
-
|
|
24
|
-
Yields results from futures in the order they complete,
|
|
25
|
-
not in the order they were submitted.
|
|
26
|
-
|
|
27
|
-
Args:
|
|
28
|
-
futures: List of Future objects to get results from
|
|
29
|
-
|
|
30
|
-
Yields:
|
|
31
|
-
The result of each completed future
|
|
32
|
-
|
|
33
|
-
"""
|
|
34
|
-
for future in as_completed(futures):
|
|
35
|
-
yield future.result()
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def multithread_loop(
|
|
39
|
-
process_function: Callable[..., Any],
|
|
40
|
-
process_args: Iterable[Iterable[Any]],
|
|
41
|
-
process_args_static: Iterable[Any] | None = None,
|
|
42
|
-
process_args_len: int = 1,
|
|
43
|
-
) -> list[Any]:
|
|
44
|
-
"""Process a loop using ThreadPoolExecutor for parallel execution.
|
|
45
|
-
|
|
46
|
-
Executes the given process_function with the provided arguments in parallel using
|
|
47
|
-
ThreadPoolExecutor, which is suitable for I/O-bound tasks.
|
|
48
|
-
|
|
49
|
-
Args:
|
|
50
|
-
process_function: Function that processes the given process_args
|
|
51
|
-
process_args: list of args to be processed by the process_function
|
|
52
|
-
e.g. [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
|
|
53
|
-
process_args_static: Optional constant arguments passed to each function call
|
|
54
|
-
process_args_len: Optional length of process_args
|
|
55
|
-
If not provided, it will ot be taken into account
|
|
56
|
-
when calculating the max number of workers.
|
|
57
|
-
|
|
58
|
-
Returns:
|
|
59
|
-
List of results from the process_function executions
|
|
60
|
-
|
|
61
|
-
Note:
|
|
62
|
-
ThreadPoolExecutor is used for I/O-bound tasks, not for CPU-bound tasks.
|
|
63
|
-
|
|
64
|
-
"""
|
|
65
|
-
from winipedia_utils.concurrent.concurrent import concurrent_loop
|
|
66
|
-
|
|
67
|
-
return concurrent_loop(
|
|
68
|
-
threading=True,
|
|
69
|
-
process_function=process_function,
|
|
70
|
-
process_args=process_args,
|
|
71
|
-
process_args_static=process_args_static,
|
|
72
|
-
process_args_len=process_args_len,
|
|
73
|
-
)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
def imap_unordered(
|
|
77
|
-
executor: ThreadPoolExecutor,
|
|
78
|
-
func: Callable[..., Any],
|
|
79
|
-
iterable: Iterable[Any],
|
|
80
|
-
) -> Generator[Any, None, None]:
|
|
81
|
-
"""Apply a function to each item in an iterable in parallel.
|
|
82
|
-
|
|
83
|
-
Args:
|
|
84
|
-
executor: ThreadPoolExecutor to use for parallel execution
|
|
85
|
-
func: Function to apply to each item in the iterable
|
|
86
|
-
iterable: Iterable of items to apply the function to
|
|
87
|
-
|
|
88
|
-
Yields:
|
|
89
|
-
Results of applying the function to each item in the iterable
|
|
90
|
-
|
|
91
|
-
"""
|
|
92
|
-
results = [executor.submit(func, item) for item in iterable]
|
|
93
|
-
yield from get_future_results_as_completed(results)
|
|
1
|
+
"""Multithreading utilities for concurrent execution.
|
|
2
|
+
|
|
3
|
+
This module provides functions for parallel processing using thread pools.
|
|
4
|
+
It includes utilities for handling thread pools, managing futures, and organizing
|
|
5
|
+
parallel execution of I/O-bound tasks.
|
|
6
|
+
Base helper functions that serve threading and processing are located in the
|
|
7
|
+
multiprocessing module.
|
|
8
|
+
|
|
9
|
+
Returns:
|
|
10
|
+
Various utility functions for multithreaded processing.
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from collections.abc import Callable, Generator, Iterable
|
|
15
|
+
from concurrent.futures import Future, ThreadPoolExecutor, as_completed
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_future_results_as_completed(
|
|
20
|
+
futures: Iterable[Future[Any]],
|
|
21
|
+
) -> Generator[Any, None, None]:
|
|
22
|
+
"""Get future results as they complete.
|
|
23
|
+
|
|
24
|
+
Yields results from futures in the order they complete,
|
|
25
|
+
not in the order they were submitted.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
futures: List of Future objects to get results from
|
|
29
|
+
|
|
30
|
+
Yields:
|
|
31
|
+
The result of each completed future
|
|
32
|
+
|
|
33
|
+
"""
|
|
34
|
+
for future in as_completed(futures):
|
|
35
|
+
yield future.result()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def multithread_loop(
|
|
39
|
+
process_function: Callable[..., Any],
|
|
40
|
+
process_args: Iterable[Iterable[Any]],
|
|
41
|
+
process_args_static: Iterable[Any] | None = None,
|
|
42
|
+
process_args_len: int = 1,
|
|
43
|
+
) -> list[Any]:
|
|
44
|
+
"""Process a loop using ThreadPoolExecutor for parallel execution.
|
|
45
|
+
|
|
46
|
+
Executes the given process_function with the provided arguments in parallel using
|
|
47
|
+
ThreadPoolExecutor, which is suitable for I/O-bound tasks.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
process_function: Function that processes the given process_args
|
|
51
|
+
process_args: list of args to be processed by the process_function
|
|
52
|
+
e.g. [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
|
|
53
|
+
process_args_static: Optional constant arguments passed to each function call
|
|
54
|
+
process_args_len: Optional length of process_args
|
|
55
|
+
If not provided, it will ot be taken into account
|
|
56
|
+
when calculating the max number of workers.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
List of results from the process_function executions
|
|
60
|
+
|
|
61
|
+
Note:
|
|
62
|
+
ThreadPoolExecutor is used for I/O-bound tasks, not for CPU-bound tasks.
|
|
63
|
+
|
|
64
|
+
"""
|
|
65
|
+
from winipedia_utils.concurrent.concurrent import concurrent_loop
|
|
66
|
+
|
|
67
|
+
return concurrent_loop(
|
|
68
|
+
threading=True,
|
|
69
|
+
process_function=process_function,
|
|
70
|
+
process_args=process_args,
|
|
71
|
+
process_args_static=process_args_static,
|
|
72
|
+
process_args_len=process_args_len,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def imap_unordered(
|
|
77
|
+
executor: ThreadPoolExecutor,
|
|
78
|
+
func: Callable[..., Any],
|
|
79
|
+
iterable: Iterable[Any],
|
|
80
|
+
) -> Generator[Any, None, None]:
|
|
81
|
+
"""Apply a function to each item in an iterable in parallel.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
executor: ThreadPoolExecutor to use for parallel execution
|
|
85
|
+
func: Function to apply to each item in the iterable
|
|
86
|
+
iterable: Iterable of items to apply the function to
|
|
87
|
+
|
|
88
|
+
Yields:
|
|
89
|
+
Results of applying the function to each item in the iterable
|
|
90
|
+
|
|
91
|
+
"""
|
|
92
|
+
results = [executor.submit(func, item) for item in iterable]
|
|
93
|
+
yield from get_future_results_as_completed(results)
|
winipedia_utils/consts.py
CHANGED
|
@@ -1,21 +1,21 @@
|
|
|
1
|
-
"""Constants used throughout the winipedia_utils package.
|
|
2
|
-
|
|
3
|
-
This module contains package-wide constants that are used by various
|
|
4
|
-
modules within the package. These constants define core configuration
|
|
5
|
-
values and identifiers for the package.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
PACKAGE_NAME = "winipedia_utils"
|
|
9
|
-
|
|
10
|
-
_DEV_DEPENDENCIES = [
|
|
11
|
-
"ruff",
|
|
12
|
-
"pre-commit",
|
|
13
|
-
"mypy",
|
|
14
|
-
"pytest",
|
|
15
|
-
"bandit",
|
|
16
|
-
"types-setuptools",
|
|
17
|
-
"types-tqdm",
|
|
18
|
-
"types-defusedxml",
|
|
19
|
-
"types-pyyaml",
|
|
20
|
-
"pytest-mock",
|
|
21
|
-
]
|
|
1
|
+
"""Constants used throughout the winipedia_utils package.
|
|
2
|
+
|
|
3
|
+
This module contains package-wide constants that are used by various
|
|
4
|
+
modules within the package. These constants define core configuration
|
|
5
|
+
values and identifiers for the package.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
PACKAGE_NAME = "winipedia_utils"
|
|
9
|
+
|
|
10
|
+
_DEV_DEPENDENCIES = [
|
|
11
|
+
"ruff",
|
|
12
|
+
"pre-commit",
|
|
13
|
+
"mypy",
|
|
14
|
+
"pytest",
|
|
15
|
+
"bandit",
|
|
16
|
+
"types-setuptools",
|
|
17
|
+
"types-tqdm",
|
|
18
|
+
"types-defusedxml",
|
|
19
|
+
"types-pyyaml",
|
|
20
|
+
"pytest-mock",
|
|
21
|
+
]
|
winipedia_utils/data/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
"""__init__ module for winipedia_utils.data."""
|
|
1
|
+
"""__init__ module for winipedia_utils.data."""
|
|
@@ -1 +1 @@
|
|
|
1
|
-
"""__init__ module."""
|
|
1
|
+
"""__init__ module."""
|