winipedia-utils 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- winipedia_utils/__init__.py +1 -0
- winipedia_utils/concurrent/__init__.py +1 -0
- winipedia_utils/concurrent/concurrent.py +242 -0
- winipedia_utils/concurrent/multiprocessing.py +115 -0
- winipedia_utils/concurrent/multithreading.py +93 -0
- winipedia_utils/consts.py +22 -0
- winipedia_utils/data/__init__.py +1 -0
- winipedia_utils/data/dataframe.py +7 -0
- winipedia_utils/django/__init__.py +27 -0
- winipedia_utils/django/bulk.py +536 -0
- winipedia_utils/django/command.py +334 -0
- winipedia_utils/django/database.py +304 -0
- winipedia_utils/git/__init__.py +1 -0
- winipedia_utils/git/gitignore.py +80 -0
- winipedia_utils/git/pre_commit/__init__.py +1 -0
- winipedia_utils/git/pre_commit/config.py +60 -0
- winipedia_utils/git/pre_commit/hooks.py +109 -0
- winipedia_utils/git/pre_commit/run_hooks.py +49 -0
- winipedia_utils/iterating/__init__.py +1 -0
- winipedia_utils/iterating/iterate.py +29 -0
- winipedia_utils/logging/__init__.py +1 -0
- winipedia_utils/logging/ansi.py +6 -0
- winipedia_utils/logging/config.py +64 -0
- winipedia_utils/logging/logger.py +26 -0
- winipedia_utils/modules/__init__.py +1 -0
- winipedia_utils/modules/class_.py +76 -0
- winipedia_utils/modules/function.py +86 -0
- winipedia_utils/modules/module.py +361 -0
- winipedia_utils/modules/package.py +350 -0
- winipedia_utils/oop/__init__.py +1 -0
- winipedia_utils/oop/mixins/__init__.py +1 -0
- winipedia_utils/oop/mixins/meta.py +315 -0
- winipedia_utils/oop/mixins/mixin.py +28 -0
- winipedia_utils/os/__init__.py +1 -0
- winipedia_utils/os/os.py +61 -0
- winipedia_utils/projects/__init__.py +1 -0
- winipedia_utils/projects/poetry/__init__.py +1 -0
- winipedia_utils/projects/poetry/config.py +91 -0
- winipedia_utils/projects/poetry/poetry.py +30 -0
- winipedia_utils/setup.py +36 -0
- winipedia_utils/testing/__init__.py +1 -0
- winipedia_utils/testing/assertions.py +23 -0
- winipedia_utils/testing/convention.py +177 -0
- winipedia_utils/testing/create_tests.py +286 -0
- winipedia_utils/testing/fixtures.py +28 -0
- winipedia_utils/testing/tests/__init__.py +1 -0
- winipedia_utils/testing/tests/base/__init__.py +1 -0
- winipedia_utils/testing/tests/base/fixtures/__init__.py +1 -0
- winipedia_utils/testing/tests/base/fixtures/fixture.py +6 -0
- winipedia_utils/testing/tests/base/fixtures/scopes/__init__.py +1 -0
- winipedia_utils/testing/tests/base/fixtures/scopes/class_.py +33 -0
- winipedia_utils/testing/tests/base/fixtures/scopes/function.py +7 -0
- winipedia_utils/testing/tests/base/fixtures/scopes/module.py +31 -0
- winipedia_utils/testing/tests/base/fixtures/scopes/package.py +7 -0
- winipedia_utils/testing/tests/base/fixtures/scopes/session.py +224 -0
- winipedia_utils/testing/tests/base/utils/__init__.py +1 -0
- winipedia_utils/testing/tests/base/utils/utils.py +82 -0
- winipedia_utils/testing/tests/conftest.py +26 -0
- winipedia_utils/text/__init__.py +1 -0
- winipedia_utils/text/string.py +126 -0
- winipedia_utils-0.1.0.dist-info/LICENSE +21 -0
- winipedia_utils-0.1.0.dist-info/METADATA +350 -0
- winipedia_utils-0.1.0.dist-info/RECORD +64 -0
- winipedia_utils-0.1.0.dist-info/WHEEL +4 -0
@@ -0,0 +1 @@
|
|
1
|
+
"""__init__ module for winipedia_utils."""
|
@@ -0,0 +1 @@
|
|
1
|
+
"""__init__ module for winipedia_utils.concurrent."""
|
@@ -0,0 +1,242 @@
|
|
1
|
+
"""Concurrent processing utilities for parallel execution.
|
2
|
+
|
3
|
+
This module provides functions for concurrent processing using both multiprocessing
|
4
|
+
and multithreading approaches. It includes utilities for handling timeouts,
|
5
|
+
managing process pools, and organizing parallel execution of functions.
|
6
|
+
|
7
|
+
Returns:
|
8
|
+
Various utility functions for concurrent processing.
|
9
|
+
|
10
|
+
"""
|
11
|
+
|
12
|
+
import multiprocessing
|
13
|
+
import os
|
14
|
+
import threading
|
15
|
+
from collections.abc import Callable, Generator, Iterable
|
16
|
+
from concurrent.futures import ThreadPoolExecutor
|
17
|
+
from copy import deepcopy
|
18
|
+
from functools import partial
|
19
|
+
from multiprocessing.pool import Pool
|
20
|
+
from typing import Any, cast
|
21
|
+
|
22
|
+
from tqdm import tqdm
|
23
|
+
|
24
|
+
from winipedia_utils.concurrent.multithreading import imap_unordered
|
25
|
+
from winipedia_utils.iterating.iterate import get_len_with_default
|
26
|
+
from winipedia_utils.logging.logger import get_logger
|
27
|
+
|
28
|
+
logger = get_logger(__name__)
|
29
|
+
|
30
|
+
|
31
|
+
def get_order_and_func_result(
|
32
|
+
func_order_args: tuple[Any, ...],
|
33
|
+
) -> tuple[int, Any]:
|
34
|
+
"""Process function for imap with arguments unpacking.
|
35
|
+
|
36
|
+
Helper function that gives back a function that can be used with imap_unordered
|
37
|
+
to execute a function with arguments unpacking.
|
38
|
+
|
39
|
+
Args:
|
40
|
+
func_order_args: Tuple containing the function to be executed,
|
41
|
+
the order index, and the arguments for the function
|
42
|
+
|
43
|
+
Returns:
|
44
|
+
A tuple containing the order index and the result of the function execution
|
45
|
+
|
46
|
+
"""
|
47
|
+
function, order, *args = func_order_args
|
48
|
+
return order, function(*args)
|
49
|
+
|
50
|
+
|
51
|
+
def generate_process_args(
|
52
|
+
*,
|
53
|
+
process_function: Callable[..., Any],
|
54
|
+
process_args: Iterable[Iterable[Any]],
|
55
|
+
process_args_static: Iterable[Any] | None = None,
|
56
|
+
deepcopy_static_args: Iterable[Any] | None = None,
|
57
|
+
) -> Generator[tuple[Any, ...], None, None]:
|
58
|
+
"""Prepare arguments for multiprocessing or multithreading execution.
|
59
|
+
|
60
|
+
Converts input arguments into a format suitable for parallel processing,
|
61
|
+
organizing them for efficient unpacking during execution. The function:
|
62
|
+
1. Prepends process func and order indices to arguments
|
63
|
+
2. Handles static arguments (with optional deep copying)
|
64
|
+
3. Restructures arguments into tuples for unpacking
|
65
|
+
|
66
|
+
Args:
|
67
|
+
process_function: Function to be executed
|
68
|
+
process_args: Iterable of argument lists for each parallel call
|
69
|
+
process_args_static: Optional constant arguments to add to each call
|
70
|
+
deepcopy_static_args: Optional constant arguments that should be deep-copied
|
71
|
+
|
72
|
+
Returns:
|
73
|
+
A Genrator that yields one args tuple for each function call
|
74
|
+
First is the process function
|
75
|
+
Second item in the tuple is the order index
|
76
|
+
Second item in the tuple is the function
|
77
|
+
Rest of the items are the arguments for the function
|
78
|
+
The length of the generator
|
79
|
+
"""
|
80
|
+
process_args_static = (
|
81
|
+
() if process_args_static is None else tuple(process_args_static)
|
82
|
+
)
|
83
|
+
deepcopy_static_args = (
|
84
|
+
() if deepcopy_static_args is None else tuple(deepcopy_static_args)
|
85
|
+
)
|
86
|
+
for order, process_arg in enumerate(process_args):
|
87
|
+
yield (
|
88
|
+
process_function,
|
89
|
+
order,
|
90
|
+
*process_arg,
|
91
|
+
*process_args_static,
|
92
|
+
*(
|
93
|
+
deepcopy(deepcopy_static_arg)
|
94
|
+
for deepcopy_static_arg in deepcopy_static_args
|
95
|
+
),
|
96
|
+
)
|
97
|
+
|
98
|
+
|
99
|
+
def get_multiprocess_results_with_tqdm(
|
100
|
+
results: Iterable[Any],
|
101
|
+
process_func: Callable[..., Any],
|
102
|
+
process_args_len: int,
|
103
|
+
*,
|
104
|
+
threads: bool,
|
105
|
+
) -> list[Any]:
|
106
|
+
"""Get multiprocess results with tqdm progress tracking.
|
107
|
+
|
108
|
+
Processes results from parallel execution with a progress bar and ensures
|
109
|
+
they are returned in the original order.
|
110
|
+
|
111
|
+
Args:
|
112
|
+
results: Iterable of results from parallel execution
|
113
|
+
process_func: Function that was executed in parallel
|
114
|
+
process_args_len: Number of items to process in parallel
|
115
|
+
threads: Whether threading (True) or multiprocessing (False) was used
|
116
|
+
|
117
|
+
Returns:
|
118
|
+
list[Any]: Results from parallel execution in original order
|
119
|
+
|
120
|
+
"""
|
121
|
+
results = tqdm(
|
122
|
+
results,
|
123
|
+
total=process_args_len,
|
124
|
+
desc=f"Multi{'threading' if threads else 'processing'} {process_func.__name__}",
|
125
|
+
unit=f" {'threads' if threads else 'processes'}",
|
126
|
+
)
|
127
|
+
results_list = list(results)
|
128
|
+
# results list is a tuple of (order, result),
|
129
|
+
# so we need to sort it by order to get the original order
|
130
|
+
results_list = sorted(results_list, key=lambda x: x[0])
|
131
|
+
# now extract the results from the tuple
|
132
|
+
return [result[1] for result in results_list]
|
133
|
+
|
134
|
+
|
135
|
+
def find_max_pools(
|
136
|
+
*,
|
137
|
+
threads: bool,
|
138
|
+
process_args_len: int | None = None,
|
139
|
+
) -> int:
|
140
|
+
"""Find optimal number of worker processes or threads for parallel execution.
|
141
|
+
|
142
|
+
Determines the maximum number of worker processes or threads based on system
|
143
|
+
resources, active tasks, and the number of items to process.
|
144
|
+
|
145
|
+
Args:
|
146
|
+
threads: Whether to use threading (True) or multiprocessing (False)
|
147
|
+
process_args_len: Number of items to process in parallel
|
148
|
+
|
149
|
+
Returns:
|
150
|
+
int: Maximum number of worker processes or threads to use
|
151
|
+
|
152
|
+
"""
|
153
|
+
# use tee to find length of process_args
|
154
|
+
cpu_count = os.cpu_count() or 1
|
155
|
+
if threads:
|
156
|
+
active_tasks = threading.active_count()
|
157
|
+
max_tasks = cpu_count * 4
|
158
|
+
else:
|
159
|
+
active_tasks = len(multiprocessing.active_children())
|
160
|
+
max_tasks = cpu_count
|
161
|
+
|
162
|
+
available_tasks = max_tasks - active_tasks
|
163
|
+
max_pools = (
|
164
|
+
min(available_tasks, process_args_len) if process_args_len else available_tasks
|
165
|
+
)
|
166
|
+
max_pools = max(max_pools, 1)
|
167
|
+
|
168
|
+
logger.info(
|
169
|
+
"Multi%s with max_pools: %s",
|
170
|
+
"threading" if threads else "processing",
|
171
|
+
max_pools,
|
172
|
+
)
|
173
|
+
|
174
|
+
return max_pools
|
175
|
+
|
176
|
+
|
177
|
+
def concurrent_loop( # noqa: PLR0913
|
178
|
+
*,
|
179
|
+
threading: bool,
|
180
|
+
process_function: Callable[..., Any],
|
181
|
+
process_args: Iterable[Iterable[Any]],
|
182
|
+
process_args_static: Iterable[Any] | None = None,
|
183
|
+
deepcopy_static_args: Iterable[Any] | None = None,
|
184
|
+
process_args_len: int = 1,
|
185
|
+
) -> list[Any]:
|
186
|
+
"""Execute a function concurrently with multiple arguments using a pool executor.
|
187
|
+
|
188
|
+
This function is a helper function for multiprocess_loop and multithread_loop.
|
189
|
+
It is not meant to be used directly.
|
190
|
+
|
191
|
+
Args:
|
192
|
+
threading (bool):
|
193
|
+
Whether to use threading (True) or multiprocessing (False)
|
194
|
+
pool_executor (Pool | ThreadPoolExecutor):
|
195
|
+
Pool executor to use for concurrent execution
|
196
|
+
process_function (Callable[..., Any]):
|
197
|
+
Function to be executed concurrently
|
198
|
+
process_args (Iterable[Iterable[Any]]):
|
199
|
+
Arguments for each process
|
200
|
+
process_args_static (Iterable[Any] | None, optional):
|
201
|
+
Static arguments to pass to each process. Defaults to None.
|
202
|
+
deepcopy_static_args (Iterable[Any] | None, optional):
|
203
|
+
Arguments that should be deep-copied for each process. Defaults to None.
|
204
|
+
process_args_len (int | None, optional):
|
205
|
+
Length of process_args. Defaults to None.
|
206
|
+
|
207
|
+
Returns:
|
208
|
+
list[Any]: Results from the process_function executions
|
209
|
+
"""
|
210
|
+
process_args_len = get_len_with_default(process_args, process_args_len)
|
211
|
+
process_args = generate_process_args(
|
212
|
+
process_function=process_function,
|
213
|
+
process_args=process_args,
|
214
|
+
process_args_static=process_args_static,
|
215
|
+
deepcopy_static_args=deepcopy_static_args,
|
216
|
+
)
|
217
|
+
max_workers = find_max_pools(threads=threading, process_args_len=process_args_len)
|
218
|
+
pool_executor = (
|
219
|
+
ThreadPoolExecutor(max_workers=max_workers)
|
220
|
+
if threading
|
221
|
+
else Pool(processes=max_workers)
|
222
|
+
)
|
223
|
+
with pool_executor as pool:
|
224
|
+
map_func: Callable[[Callable[..., Any], Iterable[Any]], Any]
|
225
|
+
|
226
|
+
if process_args_len == 1:
|
227
|
+
map_func = map
|
228
|
+
elif threading:
|
229
|
+
pool = cast("ThreadPoolExecutor", pool)
|
230
|
+
map_func = partial(imap_unordered, pool)
|
231
|
+
else:
|
232
|
+
pool = cast("Pool", pool)
|
233
|
+
map_func = pool.imap_unordered
|
234
|
+
|
235
|
+
results = map_func(get_order_and_func_result, process_args)
|
236
|
+
|
237
|
+
return get_multiprocess_results_with_tqdm(
|
238
|
+
results=results,
|
239
|
+
process_func=process_function,
|
240
|
+
process_args_len=process_args_len,
|
241
|
+
threads=threading,
|
242
|
+
)
|
@@ -0,0 +1,115 @@
|
|
1
|
+
"""Multiprocessing utilities for concurrent execution.
|
2
|
+
|
3
|
+
This module provides functions for parallel processing using both multiprocessing
|
4
|
+
and multithreading approaches. It includes utilities for handling timeouts,
|
5
|
+
managing process pools, and organizing parallel execution of functions.
|
6
|
+
|
7
|
+
Returns:
|
8
|
+
Various utility functions for concurrent processing.
|
9
|
+
|
10
|
+
"""
|
11
|
+
|
12
|
+
import multiprocessing
|
13
|
+
from collections.abc import Callable, Iterable
|
14
|
+
from functools import wraps
|
15
|
+
from multiprocessing.pool import Pool
|
16
|
+
from typing import Any
|
17
|
+
|
18
|
+
from winipedia_utils.logging.logger import get_logger
|
19
|
+
|
20
|
+
logger = get_logger(__name__)
|
21
|
+
|
22
|
+
|
23
|
+
def cancel_on_timeout(seconds: float, message: str) -> Callable[..., Any]:
|
24
|
+
"""Cancel a function execution if it exceeds a specified timeout.
|
25
|
+
|
26
|
+
Creates a wrapper that executes the decorated function in a separate process
|
27
|
+
and terminates it if execution time exceeds the specified timeout.
|
28
|
+
|
29
|
+
Args:
|
30
|
+
seconds: Maximum execution time in seconds before timeout
|
31
|
+
message: Error message to include in the raised TimeoutError
|
32
|
+
|
33
|
+
Returns:
|
34
|
+
A decorator function that wraps the target function with timeout functionality
|
35
|
+
|
36
|
+
Raises:
|
37
|
+
multiprocessing.TimeoutError: When function execution exceeds the timeout
|
38
|
+
|
39
|
+
Note:
|
40
|
+
Only works with functions that are pickle-able.
|
41
|
+
This means it may not work as a decorator.
|
42
|
+
Instaed you should use it as a wrapper function.
|
43
|
+
Like this:
|
44
|
+
my_func = cancel_on_timeout(seconds=2, message="Test timeout")(my_func)
|
45
|
+
|
46
|
+
"""
|
47
|
+
|
48
|
+
def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
|
49
|
+
@wraps(func)
|
50
|
+
def wrapper(*args: object, **kwargs: object) -> object:
|
51
|
+
with Pool(processes=1) as pool:
|
52
|
+
async_result = pool.apply_async(func, args, kwargs)
|
53
|
+
try:
|
54
|
+
return async_result.get(timeout=seconds)
|
55
|
+
except multiprocessing.TimeoutError:
|
56
|
+
logger.warning(
|
57
|
+
"%s -> Execution exceeded %s seconds: %s",
|
58
|
+
func.__name__,
|
59
|
+
seconds,
|
60
|
+
message,
|
61
|
+
)
|
62
|
+
raise
|
63
|
+
finally:
|
64
|
+
pool.terminate() # Ensure the worker process is killed
|
65
|
+
pool.join() # Wait for cleanup
|
66
|
+
|
67
|
+
return wrapper
|
68
|
+
|
69
|
+
return decorator
|
70
|
+
|
71
|
+
|
72
|
+
def multiprocess_loop(
|
73
|
+
process_function: Callable[..., Any],
|
74
|
+
process_args: Iterable[Iterable[Any]],
|
75
|
+
process_args_static: Iterable[Any] | None = None,
|
76
|
+
deepcopy_static_args: Iterable[Any] | None = None,
|
77
|
+
process_args_len: int = 1,
|
78
|
+
) -> list[Any]:
|
79
|
+
"""Process a loop using multiprocessing Pool for parallel execution.
|
80
|
+
|
81
|
+
Executes the given process_function with the provided arguments in parallel using
|
82
|
+
multiprocessing Pool, which is suitable for CPU-bound tasks.
|
83
|
+
|
84
|
+
Args:
|
85
|
+
process_function: Function that processes the given process_args
|
86
|
+
process_args: List of args to be processed by the process_function
|
87
|
+
e.g. [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
|
88
|
+
process_args_static: Optional constant arguments passed to each function call
|
89
|
+
deepcopy_static_args: Optional arguments that should be
|
90
|
+
deep-copied for each process
|
91
|
+
process_args_len: Optional length of process_args
|
92
|
+
If not provided, it will ot be taken into account
|
93
|
+
when calculating the max number of processes.
|
94
|
+
|
95
|
+
Returns:
|
96
|
+
List of results from the process_function executions
|
97
|
+
|
98
|
+
Note:
|
99
|
+
Pool is used for CPU-bound tasks as it bypasses
|
100
|
+
Python's GIL by creating separate processes.
|
101
|
+
Multiprocessing is not safe for mutable objects unlike ThreadPoolExecutor.
|
102
|
+
When debugging, if ConnectionErrors occur, set max_processes to 1.
|
103
|
+
Also given functions must be pickle-able.
|
104
|
+
|
105
|
+
"""
|
106
|
+
from winipedia_utils.concurrent.concurrent import concurrent_loop
|
107
|
+
|
108
|
+
return concurrent_loop(
|
109
|
+
threading=False,
|
110
|
+
process_function=process_function,
|
111
|
+
process_args=process_args,
|
112
|
+
process_args_static=process_args_static,
|
113
|
+
deepcopy_static_args=deepcopy_static_args,
|
114
|
+
process_args_len=process_args_len,
|
115
|
+
)
|
@@ -0,0 +1,93 @@
|
|
1
|
+
"""Multithreading utilities for concurrent execution.
|
2
|
+
|
3
|
+
This module provides functions for parallel processing using thread pools.
|
4
|
+
It includes utilities for handling thread pools, managing futures, and organizing
|
5
|
+
parallel execution of I/O-bound tasks.
|
6
|
+
Base helper functions that serve threading and processing are located in the
|
7
|
+
multiprocessing module.
|
8
|
+
|
9
|
+
Returns:
|
10
|
+
Various utility functions for multithreaded processing.
|
11
|
+
|
12
|
+
"""
|
13
|
+
|
14
|
+
from collections.abc import Callable, Generator, Iterable
|
15
|
+
from concurrent.futures import Future, ThreadPoolExecutor, as_completed
|
16
|
+
from typing import Any
|
17
|
+
|
18
|
+
|
19
|
+
def get_future_results_as_completed(
|
20
|
+
futures: Iterable[Future[Any]],
|
21
|
+
) -> Generator[Any, None, None]:
|
22
|
+
"""Get future results as they complete.
|
23
|
+
|
24
|
+
Yields results from futures in the order they complete,
|
25
|
+
not in the order they were submitted.
|
26
|
+
|
27
|
+
Args:
|
28
|
+
futures: List of Future objects to get results from
|
29
|
+
|
30
|
+
Yields:
|
31
|
+
The result of each completed future
|
32
|
+
|
33
|
+
"""
|
34
|
+
for future in as_completed(futures):
|
35
|
+
yield future.result()
|
36
|
+
|
37
|
+
|
38
|
+
def multithread_loop(
|
39
|
+
process_function: Callable[..., Any],
|
40
|
+
process_args: Iterable[Iterable[Any]],
|
41
|
+
process_args_static: Iterable[Any] | None = None,
|
42
|
+
process_args_len: int = 1,
|
43
|
+
) -> list[Any]:
|
44
|
+
"""Process a loop using ThreadPoolExecutor for parallel execution.
|
45
|
+
|
46
|
+
Executes the given process_function with the provided arguments in parallel using
|
47
|
+
ThreadPoolExecutor, which is suitable for I/O-bound tasks.
|
48
|
+
|
49
|
+
Args:
|
50
|
+
process_function: Function that processes the given process_args
|
51
|
+
process_args: list of args to be processed by the process_function
|
52
|
+
e.g. [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
|
53
|
+
process_args_static: Optional constant arguments passed to each function call
|
54
|
+
process_args_len: Optional length of process_args
|
55
|
+
If not provided, it will ot be taken into account
|
56
|
+
when calculating the max number of workers.
|
57
|
+
|
58
|
+
Returns:
|
59
|
+
List of results from the process_function executions
|
60
|
+
|
61
|
+
Note:
|
62
|
+
ThreadPoolExecutor is used for I/O-bound tasks, not for CPU-bound tasks.
|
63
|
+
|
64
|
+
"""
|
65
|
+
from winipedia_utils.concurrent.concurrent import concurrent_loop
|
66
|
+
|
67
|
+
return concurrent_loop(
|
68
|
+
threading=True,
|
69
|
+
process_function=process_function,
|
70
|
+
process_args=process_args,
|
71
|
+
process_args_static=process_args_static,
|
72
|
+
process_args_len=process_args_len,
|
73
|
+
)
|
74
|
+
|
75
|
+
|
76
|
+
def imap_unordered(
|
77
|
+
executor: ThreadPoolExecutor,
|
78
|
+
func: Callable[..., Any],
|
79
|
+
iterable: Iterable[Any],
|
80
|
+
) -> Generator[Any, None, None]:
|
81
|
+
"""Apply a function to each item in an iterable in parallel.
|
82
|
+
|
83
|
+
Args:
|
84
|
+
executor: ThreadPoolExecutor to use for parallel execution
|
85
|
+
func: Function to apply to each item in the iterable
|
86
|
+
iterable: Iterable of items to apply the function to
|
87
|
+
|
88
|
+
Yields:
|
89
|
+
Results of applying the function to each item in the iterable
|
90
|
+
|
91
|
+
"""
|
92
|
+
results = [executor.submit(func, item) for item in iterable]
|
93
|
+
yield from get_future_results_as_completed(results)
|
@@ -0,0 +1,22 @@
|
|
1
|
+
"""Constants used throughout the winipedia_utils package.
|
2
|
+
|
3
|
+
This module contains package-wide constants that are used by various
|
4
|
+
modules within the package. These constants define core configuration
|
5
|
+
values and identifiers for the package.
|
6
|
+
"""
|
7
|
+
|
8
|
+
PACKAGE_NAME = "winipedia_utils"
|
9
|
+
|
10
|
+
_DEV_DEPENDENCIES = [
|
11
|
+
"ruff",
|
12
|
+
"pre-commit",
|
13
|
+
"mypy",
|
14
|
+
"pytest",
|
15
|
+
"bandit",
|
16
|
+
"types-setuptools",
|
17
|
+
"types-tqdm",
|
18
|
+
"types-defusedxml",
|
19
|
+
"types-pyyaml",
|
20
|
+
"pytest-mock",
|
21
|
+
"django-stubs",
|
22
|
+
]
|
@@ -0,0 +1 @@
|
|
1
|
+
"""__init__ module for winipedia_utils.data."""
|
@@ -0,0 +1,7 @@
|
|
1
|
+
"""Dataframe utilities for data manipulation and analysis.
|
2
|
+
|
3
|
+
This module provides utility functions for working with pandas DataFrames,
|
4
|
+
including data cleaning, transformation, and aggregation operations.
|
5
|
+
These utilities help with data preprocessing and analysis tasks.
|
6
|
+
|
7
|
+
"""
|
@@ -0,0 +1,27 @@
|
|
1
|
+
"""__init__ module for winipedia_utils.django."""
|
2
|
+
|
3
|
+
import django
|
4
|
+
import django_stubs_ext
|
5
|
+
from django.conf import settings
|
6
|
+
|
7
|
+
from winipedia_utils.logging.logger import get_logger
|
8
|
+
|
9
|
+
logger = get_logger(__name__)
|
10
|
+
|
11
|
+
django_stubs_ext.monkeypatch()
|
12
|
+
logger.info("Monkeypatched django-stubs")
|
13
|
+
|
14
|
+
if not settings.configured:
|
15
|
+
logger.info("Configuring minimal django settings")
|
16
|
+
settings.configure(
|
17
|
+
INSTALLED_APPS=[
|
18
|
+
"django.contrib.contenttypes",
|
19
|
+
],
|
20
|
+
DATABASES={
|
21
|
+
"default": {
|
22
|
+
"ENGINE": "django.db.backends.sqlite3",
|
23
|
+
"NAME": ":memory:",
|
24
|
+
}
|
25
|
+
},
|
26
|
+
)
|
27
|
+
django.setup()
|