winipedia-utils 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. winipedia_utils/__init__.py +1 -0
  2. winipedia_utils/concurrent/__init__.py +1 -0
  3. winipedia_utils/concurrent/concurrent.py +242 -0
  4. winipedia_utils/concurrent/multiprocessing.py +115 -0
  5. winipedia_utils/concurrent/multithreading.py +93 -0
  6. winipedia_utils/consts.py +22 -0
  7. winipedia_utils/data/__init__.py +1 -0
  8. winipedia_utils/data/dataframe.py +7 -0
  9. winipedia_utils/django/__init__.py +27 -0
  10. winipedia_utils/django/bulk.py +536 -0
  11. winipedia_utils/django/command.py +334 -0
  12. winipedia_utils/django/database.py +304 -0
  13. winipedia_utils/git/__init__.py +1 -0
  14. winipedia_utils/git/gitignore.py +80 -0
  15. winipedia_utils/git/pre_commit/__init__.py +1 -0
  16. winipedia_utils/git/pre_commit/config.py +60 -0
  17. winipedia_utils/git/pre_commit/hooks.py +109 -0
  18. winipedia_utils/git/pre_commit/run_hooks.py +49 -0
  19. winipedia_utils/iterating/__init__.py +1 -0
  20. winipedia_utils/iterating/iterate.py +29 -0
  21. winipedia_utils/logging/__init__.py +1 -0
  22. winipedia_utils/logging/ansi.py +6 -0
  23. winipedia_utils/logging/config.py +64 -0
  24. winipedia_utils/logging/logger.py +26 -0
  25. winipedia_utils/modules/__init__.py +1 -0
  26. winipedia_utils/modules/class_.py +76 -0
  27. winipedia_utils/modules/function.py +86 -0
  28. winipedia_utils/modules/module.py +361 -0
  29. winipedia_utils/modules/package.py +350 -0
  30. winipedia_utils/oop/__init__.py +1 -0
  31. winipedia_utils/oop/mixins/__init__.py +1 -0
  32. winipedia_utils/oop/mixins/meta.py +315 -0
  33. winipedia_utils/oop/mixins/mixin.py +28 -0
  34. winipedia_utils/os/__init__.py +1 -0
  35. winipedia_utils/os/os.py +61 -0
  36. winipedia_utils/projects/__init__.py +1 -0
  37. winipedia_utils/projects/poetry/__init__.py +1 -0
  38. winipedia_utils/projects/poetry/config.py +91 -0
  39. winipedia_utils/projects/poetry/poetry.py +30 -0
  40. winipedia_utils/setup.py +36 -0
  41. winipedia_utils/testing/__init__.py +1 -0
  42. winipedia_utils/testing/assertions.py +23 -0
  43. winipedia_utils/testing/convention.py +177 -0
  44. winipedia_utils/testing/create_tests.py +286 -0
  45. winipedia_utils/testing/fixtures.py +28 -0
  46. winipedia_utils/testing/tests/__init__.py +1 -0
  47. winipedia_utils/testing/tests/base/__init__.py +1 -0
  48. winipedia_utils/testing/tests/base/fixtures/__init__.py +1 -0
  49. winipedia_utils/testing/tests/base/fixtures/fixture.py +6 -0
  50. winipedia_utils/testing/tests/base/fixtures/scopes/__init__.py +1 -0
  51. winipedia_utils/testing/tests/base/fixtures/scopes/class_.py +33 -0
  52. winipedia_utils/testing/tests/base/fixtures/scopes/function.py +7 -0
  53. winipedia_utils/testing/tests/base/fixtures/scopes/module.py +31 -0
  54. winipedia_utils/testing/tests/base/fixtures/scopes/package.py +7 -0
  55. winipedia_utils/testing/tests/base/fixtures/scopes/session.py +224 -0
  56. winipedia_utils/testing/tests/base/utils/__init__.py +1 -0
  57. winipedia_utils/testing/tests/base/utils/utils.py +82 -0
  58. winipedia_utils/testing/tests/conftest.py +26 -0
  59. winipedia_utils/text/__init__.py +1 -0
  60. winipedia_utils/text/string.py +126 -0
  61. winipedia_utils-0.1.0.dist-info/LICENSE +21 -0
  62. winipedia_utils-0.1.0.dist-info/METADATA +350 -0
  63. winipedia_utils-0.1.0.dist-info/RECORD +64 -0
  64. winipedia_utils-0.1.0.dist-info/WHEEL +4 -0
@@ -0,0 +1 @@
1
+ """__init__ module for winipedia_utils."""
@@ -0,0 +1 @@
1
+ """__init__ module for winipedia_utils.concurrent."""
@@ -0,0 +1,242 @@
1
+ """Concurrent processing utilities for parallel execution.
2
+
3
+ This module provides functions for concurrent processing using both multiprocessing
4
+ and multithreading approaches. It includes utilities for handling timeouts,
5
+ managing process pools, and organizing parallel execution of functions.
6
+
7
+ Returns:
8
+ Various utility functions for concurrent processing.
9
+
10
+ """
11
+
12
+ import multiprocessing
13
+ import os
14
+ import threading
15
+ from collections.abc import Callable, Generator, Iterable
16
+ from concurrent.futures import ThreadPoolExecutor
17
+ from copy import deepcopy
18
+ from functools import partial
19
+ from multiprocessing.pool import Pool
20
+ from typing import Any, cast
21
+
22
+ from tqdm import tqdm
23
+
24
+ from winipedia_utils.concurrent.multithreading import imap_unordered
25
+ from winipedia_utils.iterating.iterate import get_len_with_default
26
+ from winipedia_utils.logging.logger import get_logger
27
+
28
+ logger = get_logger(__name__)
29
+
30
+
31
+ def get_order_and_func_result(
32
+ func_order_args: tuple[Any, ...],
33
+ ) -> tuple[int, Any]:
34
+ """Process function for imap with arguments unpacking.
35
+
36
+ Helper function that gives back a function that can be used with imap_unordered
37
+ to execute a function with arguments unpacking.
38
+
39
+ Args:
40
+ func_order_args: Tuple containing the function to be executed,
41
+ the order index, and the arguments for the function
42
+
43
+ Returns:
44
+ A tuple containing the order index and the result of the function execution
45
+
46
+ """
47
+ function, order, *args = func_order_args
48
+ return order, function(*args)
49
+
50
+
51
+ def generate_process_args(
52
+ *,
53
+ process_function: Callable[..., Any],
54
+ process_args: Iterable[Iterable[Any]],
55
+ process_args_static: Iterable[Any] | None = None,
56
+ deepcopy_static_args: Iterable[Any] | None = None,
57
+ ) -> Generator[tuple[Any, ...], None, None]:
58
+ """Prepare arguments for multiprocessing or multithreading execution.
59
+
60
+ Converts input arguments into a format suitable for parallel processing,
61
+ organizing them for efficient unpacking during execution. The function:
62
+ 1. Prepends process func and order indices to arguments
63
+ 2. Handles static arguments (with optional deep copying)
64
+ 3. Restructures arguments into tuples for unpacking
65
+
66
+ Args:
67
+ process_function: Function to be executed
68
+ process_args: Iterable of argument lists for each parallel call
69
+ process_args_static: Optional constant arguments to add to each call
70
+ deepcopy_static_args: Optional constant arguments that should be deep-copied
71
+
72
+ Returns:
73
+ A Genrator that yields one args tuple for each function call
74
+ First is the process function
75
+ Second item in the tuple is the order index
76
+ Second item in the tuple is the function
77
+ Rest of the items are the arguments for the function
78
+ The length of the generator
79
+ """
80
+ process_args_static = (
81
+ () if process_args_static is None else tuple(process_args_static)
82
+ )
83
+ deepcopy_static_args = (
84
+ () if deepcopy_static_args is None else tuple(deepcopy_static_args)
85
+ )
86
+ for order, process_arg in enumerate(process_args):
87
+ yield (
88
+ process_function,
89
+ order,
90
+ *process_arg,
91
+ *process_args_static,
92
+ *(
93
+ deepcopy(deepcopy_static_arg)
94
+ for deepcopy_static_arg in deepcopy_static_args
95
+ ),
96
+ )
97
+
98
+
99
+ def get_multiprocess_results_with_tqdm(
100
+ results: Iterable[Any],
101
+ process_func: Callable[..., Any],
102
+ process_args_len: int,
103
+ *,
104
+ threads: bool,
105
+ ) -> list[Any]:
106
+ """Get multiprocess results with tqdm progress tracking.
107
+
108
+ Processes results from parallel execution with a progress bar and ensures
109
+ they are returned in the original order.
110
+
111
+ Args:
112
+ results: Iterable of results from parallel execution
113
+ process_func: Function that was executed in parallel
114
+ process_args_len: Number of items to process in parallel
115
+ threads: Whether threading (True) or multiprocessing (False) was used
116
+
117
+ Returns:
118
+ list[Any]: Results from parallel execution in original order
119
+
120
+ """
121
+ results = tqdm(
122
+ results,
123
+ total=process_args_len,
124
+ desc=f"Multi{'threading' if threads else 'processing'} {process_func.__name__}",
125
+ unit=f" {'threads' if threads else 'processes'}",
126
+ )
127
+ results_list = list(results)
128
+ # results list is a tuple of (order, result),
129
+ # so we need to sort it by order to get the original order
130
+ results_list = sorted(results_list, key=lambda x: x[0])
131
+ # now extract the results from the tuple
132
+ return [result[1] for result in results_list]
133
+
134
+
135
+ def find_max_pools(
136
+ *,
137
+ threads: bool,
138
+ process_args_len: int | None = None,
139
+ ) -> int:
140
+ """Find optimal number of worker processes or threads for parallel execution.
141
+
142
+ Determines the maximum number of worker processes or threads based on system
143
+ resources, active tasks, and the number of items to process.
144
+
145
+ Args:
146
+ threads: Whether to use threading (True) or multiprocessing (False)
147
+ process_args_len: Number of items to process in parallel
148
+
149
+ Returns:
150
+ int: Maximum number of worker processes or threads to use
151
+
152
+ """
153
+ # use tee to find length of process_args
154
+ cpu_count = os.cpu_count() or 1
155
+ if threads:
156
+ active_tasks = threading.active_count()
157
+ max_tasks = cpu_count * 4
158
+ else:
159
+ active_tasks = len(multiprocessing.active_children())
160
+ max_tasks = cpu_count
161
+
162
+ available_tasks = max_tasks - active_tasks
163
+ max_pools = (
164
+ min(available_tasks, process_args_len) if process_args_len else available_tasks
165
+ )
166
+ max_pools = max(max_pools, 1)
167
+
168
+ logger.info(
169
+ "Multi%s with max_pools: %s",
170
+ "threading" if threads else "processing",
171
+ max_pools,
172
+ )
173
+
174
+ return max_pools
175
+
176
+
177
+ def concurrent_loop( # noqa: PLR0913
178
+ *,
179
+ threading: bool,
180
+ process_function: Callable[..., Any],
181
+ process_args: Iterable[Iterable[Any]],
182
+ process_args_static: Iterable[Any] | None = None,
183
+ deepcopy_static_args: Iterable[Any] | None = None,
184
+ process_args_len: int = 1,
185
+ ) -> list[Any]:
186
+ """Execute a function concurrently with multiple arguments using a pool executor.
187
+
188
+ This function is a helper function for multiprocess_loop and multithread_loop.
189
+ It is not meant to be used directly.
190
+
191
+ Args:
192
+ threading (bool):
193
+ Whether to use threading (True) or multiprocessing (False)
194
+ pool_executor (Pool | ThreadPoolExecutor):
195
+ Pool executor to use for concurrent execution
196
+ process_function (Callable[..., Any]):
197
+ Function to be executed concurrently
198
+ process_args (Iterable[Iterable[Any]]):
199
+ Arguments for each process
200
+ process_args_static (Iterable[Any] | None, optional):
201
+ Static arguments to pass to each process. Defaults to None.
202
+ deepcopy_static_args (Iterable[Any] | None, optional):
203
+ Arguments that should be deep-copied for each process. Defaults to None.
204
+ process_args_len (int | None, optional):
205
+ Length of process_args. Defaults to None.
206
+
207
+ Returns:
208
+ list[Any]: Results from the process_function executions
209
+ """
210
+ process_args_len = get_len_with_default(process_args, process_args_len)
211
+ process_args = generate_process_args(
212
+ process_function=process_function,
213
+ process_args=process_args,
214
+ process_args_static=process_args_static,
215
+ deepcopy_static_args=deepcopy_static_args,
216
+ )
217
+ max_workers = find_max_pools(threads=threading, process_args_len=process_args_len)
218
+ pool_executor = (
219
+ ThreadPoolExecutor(max_workers=max_workers)
220
+ if threading
221
+ else Pool(processes=max_workers)
222
+ )
223
+ with pool_executor as pool:
224
+ map_func: Callable[[Callable[..., Any], Iterable[Any]], Any]
225
+
226
+ if process_args_len == 1:
227
+ map_func = map
228
+ elif threading:
229
+ pool = cast("ThreadPoolExecutor", pool)
230
+ map_func = partial(imap_unordered, pool)
231
+ else:
232
+ pool = cast("Pool", pool)
233
+ map_func = pool.imap_unordered
234
+
235
+ results = map_func(get_order_and_func_result, process_args)
236
+
237
+ return get_multiprocess_results_with_tqdm(
238
+ results=results,
239
+ process_func=process_function,
240
+ process_args_len=process_args_len,
241
+ threads=threading,
242
+ )
@@ -0,0 +1,115 @@
1
+ """Multiprocessing utilities for concurrent execution.
2
+
3
+ This module provides functions for parallel processing using both multiprocessing
4
+ and multithreading approaches. It includes utilities for handling timeouts,
5
+ managing process pools, and organizing parallel execution of functions.
6
+
7
+ Returns:
8
+ Various utility functions for concurrent processing.
9
+
10
+ """
11
+
12
+ import multiprocessing
13
+ from collections.abc import Callable, Iterable
14
+ from functools import wraps
15
+ from multiprocessing.pool import Pool
16
+ from typing import Any
17
+
18
+ from winipedia_utils.logging.logger import get_logger
19
+
20
+ logger = get_logger(__name__)
21
+
22
+
23
+ def cancel_on_timeout(seconds: float, message: str) -> Callable[..., Any]:
24
+ """Cancel a function execution if it exceeds a specified timeout.
25
+
26
+ Creates a wrapper that executes the decorated function in a separate process
27
+ and terminates it if execution time exceeds the specified timeout.
28
+
29
+ Args:
30
+ seconds: Maximum execution time in seconds before timeout
31
+ message: Error message to include in the raised TimeoutError
32
+
33
+ Returns:
34
+ A decorator function that wraps the target function with timeout functionality
35
+
36
+ Raises:
37
+ multiprocessing.TimeoutError: When function execution exceeds the timeout
38
+
39
+ Note:
40
+ Only works with functions that are pickle-able.
41
+ This means it may not work as a decorator.
42
+ Instaed you should use it as a wrapper function.
43
+ Like this:
44
+ my_func = cancel_on_timeout(seconds=2, message="Test timeout")(my_func)
45
+
46
+ """
47
+
48
+ def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
49
+ @wraps(func)
50
+ def wrapper(*args: object, **kwargs: object) -> object:
51
+ with Pool(processes=1) as pool:
52
+ async_result = pool.apply_async(func, args, kwargs)
53
+ try:
54
+ return async_result.get(timeout=seconds)
55
+ except multiprocessing.TimeoutError:
56
+ logger.warning(
57
+ "%s -> Execution exceeded %s seconds: %s",
58
+ func.__name__,
59
+ seconds,
60
+ message,
61
+ )
62
+ raise
63
+ finally:
64
+ pool.terminate() # Ensure the worker process is killed
65
+ pool.join() # Wait for cleanup
66
+
67
+ return wrapper
68
+
69
+ return decorator
70
+
71
+
72
+ def multiprocess_loop(
73
+ process_function: Callable[..., Any],
74
+ process_args: Iterable[Iterable[Any]],
75
+ process_args_static: Iterable[Any] | None = None,
76
+ deepcopy_static_args: Iterable[Any] | None = None,
77
+ process_args_len: int = 1,
78
+ ) -> list[Any]:
79
+ """Process a loop using multiprocessing Pool for parallel execution.
80
+
81
+ Executes the given process_function with the provided arguments in parallel using
82
+ multiprocessing Pool, which is suitable for CPU-bound tasks.
83
+
84
+ Args:
85
+ process_function: Function that processes the given process_args
86
+ process_args: List of args to be processed by the process_function
87
+ e.g. [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
88
+ process_args_static: Optional constant arguments passed to each function call
89
+ deepcopy_static_args: Optional arguments that should be
90
+ deep-copied for each process
91
+ process_args_len: Optional length of process_args
92
+ If not provided, it will ot be taken into account
93
+ when calculating the max number of processes.
94
+
95
+ Returns:
96
+ List of results from the process_function executions
97
+
98
+ Note:
99
+ Pool is used for CPU-bound tasks as it bypasses
100
+ Python's GIL by creating separate processes.
101
+ Multiprocessing is not safe for mutable objects unlike ThreadPoolExecutor.
102
+ When debugging, if ConnectionErrors occur, set max_processes to 1.
103
+ Also given functions must be pickle-able.
104
+
105
+ """
106
+ from winipedia_utils.concurrent.concurrent import concurrent_loop
107
+
108
+ return concurrent_loop(
109
+ threading=False,
110
+ process_function=process_function,
111
+ process_args=process_args,
112
+ process_args_static=process_args_static,
113
+ deepcopy_static_args=deepcopy_static_args,
114
+ process_args_len=process_args_len,
115
+ )
@@ -0,0 +1,93 @@
1
+ """Multithreading utilities for concurrent execution.
2
+
3
+ This module provides functions for parallel processing using thread pools.
4
+ It includes utilities for handling thread pools, managing futures, and organizing
5
+ parallel execution of I/O-bound tasks.
6
+ Base helper functions that serve threading and processing are located in the
7
+ multiprocessing module.
8
+
9
+ Returns:
10
+ Various utility functions for multithreaded processing.
11
+
12
+ """
13
+
14
+ from collections.abc import Callable, Generator, Iterable
15
+ from concurrent.futures import Future, ThreadPoolExecutor, as_completed
16
+ from typing import Any
17
+
18
+
19
+ def get_future_results_as_completed(
20
+ futures: Iterable[Future[Any]],
21
+ ) -> Generator[Any, None, None]:
22
+ """Get future results as they complete.
23
+
24
+ Yields results from futures in the order they complete,
25
+ not in the order they were submitted.
26
+
27
+ Args:
28
+ futures: List of Future objects to get results from
29
+
30
+ Yields:
31
+ The result of each completed future
32
+
33
+ """
34
+ for future in as_completed(futures):
35
+ yield future.result()
36
+
37
+
38
+ def multithread_loop(
39
+ process_function: Callable[..., Any],
40
+ process_args: Iterable[Iterable[Any]],
41
+ process_args_static: Iterable[Any] | None = None,
42
+ process_args_len: int = 1,
43
+ ) -> list[Any]:
44
+ """Process a loop using ThreadPoolExecutor for parallel execution.
45
+
46
+ Executes the given process_function with the provided arguments in parallel using
47
+ ThreadPoolExecutor, which is suitable for I/O-bound tasks.
48
+
49
+ Args:
50
+ process_function: Function that processes the given process_args
51
+ process_args: list of args to be processed by the process_function
52
+ e.g. [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
53
+ process_args_static: Optional constant arguments passed to each function call
54
+ process_args_len: Optional length of process_args
55
+ If not provided, it will ot be taken into account
56
+ when calculating the max number of workers.
57
+
58
+ Returns:
59
+ List of results from the process_function executions
60
+
61
+ Note:
62
+ ThreadPoolExecutor is used for I/O-bound tasks, not for CPU-bound tasks.
63
+
64
+ """
65
+ from winipedia_utils.concurrent.concurrent import concurrent_loop
66
+
67
+ return concurrent_loop(
68
+ threading=True,
69
+ process_function=process_function,
70
+ process_args=process_args,
71
+ process_args_static=process_args_static,
72
+ process_args_len=process_args_len,
73
+ )
74
+
75
+
76
+ def imap_unordered(
77
+ executor: ThreadPoolExecutor,
78
+ func: Callable[..., Any],
79
+ iterable: Iterable[Any],
80
+ ) -> Generator[Any, None, None]:
81
+ """Apply a function to each item in an iterable in parallel.
82
+
83
+ Args:
84
+ executor: ThreadPoolExecutor to use for parallel execution
85
+ func: Function to apply to each item in the iterable
86
+ iterable: Iterable of items to apply the function to
87
+
88
+ Yields:
89
+ Results of applying the function to each item in the iterable
90
+
91
+ """
92
+ results = [executor.submit(func, item) for item in iterable]
93
+ yield from get_future_results_as_completed(results)
@@ -0,0 +1,22 @@
1
+ """Constants used throughout the winipedia_utils package.
2
+
3
+ This module contains package-wide constants that are used by various
4
+ modules within the package. These constants define core configuration
5
+ values and identifiers for the package.
6
+ """
7
+
8
+ PACKAGE_NAME = "winipedia_utils"
9
+
10
+ _DEV_DEPENDENCIES = [
11
+ "ruff",
12
+ "pre-commit",
13
+ "mypy",
14
+ "pytest",
15
+ "bandit",
16
+ "types-setuptools",
17
+ "types-tqdm",
18
+ "types-defusedxml",
19
+ "types-pyyaml",
20
+ "pytest-mock",
21
+ "django-stubs",
22
+ ]
@@ -0,0 +1 @@
1
+ """__init__ module for winipedia_utils.data."""
@@ -0,0 +1,7 @@
1
+ """Dataframe utilities for data manipulation and analysis.
2
+
3
+ This module provides utility functions for working with pandas DataFrames,
4
+ including data cleaning, transformation, and aggregation operations.
5
+ These utilities help with data preprocessing and analysis tasks.
6
+
7
+ """
@@ -0,0 +1,27 @@
1
+ """__init__ module for winipedia_utils.django."""
2
+
3
+ import django
4
+ import django_stubs_ext
5
+ from django.conf import settings
6
+
7
+ from winipedia_utils.logging.logger import get_logger
8
+
9
+ logger = get_logger(__name__)
10
+
11
+ django_stubs_ext.monkeypatch()
12
+ logger.info("Monkeypatched django-stubs")
13
+
14
+ if not settings.configured:
15
+ logger.info("Configuring minimal django settings")
16
+ settings.configure(
17
+ INSTALLED_APPS=[
18
+ "django.contrib.contenttypes",
19
+ ],
20
+ DATABASES={
21
+ "default": {
22
+ "ENGINE": "django.db.backends.sqlite3",
23
+ "NAME": ":memory:",
24
+ }
25
+ },
26
+ )
27
+ django.setup()