winiutils 2.3.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. winiutils/__init__.py +1 -0
  2. winiutils/dev/__init__.py +1 -0
  3. winiutils/dev/builders/__init__.py +1 -0
  4. winiutils/dev/cli/__init__.py +1 -0
  5. winiutils/dev/cli/subcommands.py +6 -0
  6. winiutils/dev/configs/__init__.py +1 -0
  7. winiutils/dev/tests/__init__.py +1 -0
  8. winiutils/dev/tests/fixtures/__init__.py +1 -0
  9. winiutils/dev/tests/fixtures/fixtures.py +32 -0
  10. winiutils/main.py +9 -0
  11. winiutils/py.typed +0 -0
  12. winiutils/resources/__init__.py +1 -0
  13. winiutils/src/__init__.py +4 -0
  14. winiutils/src/data/__init__.py +8 -0
  15. winiutils/src/data/dataframe/__init__.py +7 -0
  16. winiutils/src/data/dataframe/cleaning.py +734 -0
  17. winiutils/src/data/structures/__init__.py +8 -0
  18. winiutils/src/data/structures/dicts.py +40 -0
  19. winiutils/src/data/structures/text/__init__.py +7 -0
  20. winiutils/src/data/structures/text/string.py +157 -0
  21. winiutils/src/iterating/__init__.py +8 -0
  22. winiutils/src/iterating/concurrent/__init__.py +9 -0
  23. winiutils/src/iterating/concurrent/concurrent.py +301 -0
  24. winiutils/src/iterating/concurrent/multiprocessing.py +186 -0
  25. winiutils/src/iterating/concurrent/multithreading.py +132 -0
  26. winiutils/src/iterating/iterate.py +45 -0
  27. winiutils/src/oop/__init__.py +7 -0
  28. winiutils/src/oop/mixins/__init__.py +8 -0
  29. winiutils/src/oop/mixins/meta.py +217 -0
  30. winiutils/src/oop/mixins/mixin.py +58 -0
  31. winiutils/src/security/__init__.py +8 -0
  32. winiutils/src/security/cryptography.py +100 -0
  33. winiutils/src/security/keyring.py +167 -0
  34. winiutils-2.3.12.dist-info/METADATA +283 -0
  35. winiutils-2.3.12.dist-info/RECORD +38 -0
  36. winiutils-2.3.12.dist-info/WHEEL +4 -0
  37. winiutils-2.3.12.dist-info/entry_points.txt +4 -0
  38. winiutils-2.3.12.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,8 @@
1
+ """Data structures utilities package.
2
+
3
+ This package provides utilities for working with common data structures:
4
+
5
+ Modules:
6
+ dicts: Dictionary manipulation utilities.
7
+ text: Text and string processing utilities.
8
+ """
@@ -0,0 +1,40 @@
1
+ """Dictionary manipulation utilities.
2
+
3
+ This module provides utility functions for common dictionary operations
4
+ such as reversing key-value pairs.
5
+
6
+ Example:
7
+ >>> from winiutils.src.data.structures.dicts import reverse_dict
8
+ >>> original = {"a": 1, "b": 2}
9
+ >>> reverse_dict(original)
10
+ {1: 'a', 2: 'b'}
11
+ """
12
+
13
+ from typing import Any
14
+
15
+
16
+ def reverse_dict(d: dict[Any, Any]) -> dict[Any, Any]:
17
+ """Reverse the keys and values of a dictionary.
18
+
19
+ Creates a new dictionary where the original values become keys and
20
+ the original keys become values.
21
+
22
+ Args:
23
+ d: The dictionary to reverse. Values must be hashable to serve
24
+ as keys in the resulting dictionary.
25
+
26
+ Returns:
27
+ A new dictionary with keys and values swapped from the original.
28
+
29
+ Raises:
30
+ TypeError: If any value in the input dictionary is not hashable.
31
+
32
+ Warning:
33
+ If the original dictionary contains duplicate values, only the last
34
+ key-value pair for each value will be preserved in the result.
35
+
36
+ Example:
37
+ >>> reverse_dict({"name": "alice", "role": "admin"})
38
+ {'alice': 'name', 'admin': 'role'}
39
+ """
40
+ return {v: k for k, v in d.items()}
@@ -0,0 +1,7 @@
1
+ """Text processing utilities package.
2
+
3
+ This package provides utilities for text and string manipulation:
4
+
5
+ Modules:
6
+ string: String manipulation, hashing, XML parsing, and input utilities.
7
+ """
@@ -0,0 +1,157 @@
1
+ """String manipulation utilities for text processing.
2
+
3
+ This module provides utility functions for common string operations including:
4
+ - User input with timeout constraints
5
+ - XML namespace extraction
6
+ - String truncation for logging
7
+ - Deterministic hash generation
8
+
9
+ Example:
10
+ >>> from winiutils.src.data.structures.text.string import (
11
+ ... value_to_truncated_string,
12
+ ... get_reusable_hash,
13
+ ... )
14
+ >>> value_to_truncated_string("Hello, World!", max_length=10)
15
+ 'Hello,...'
16
+ >>> get_reusable_hash("test") # doctest: +ELLIPSIS
17
+ '9f86d08...'
18
+ """
19
+
20
+ import hashlib
21
+ import logging
22
+ import textwrap
23
+ from io import StringIO
24
+
25
+ from defusedxml import ElementTree as DefusedElementTree
26
+
27
+ from winiutils.src.iterating.concurrent.multiprocessing import (
28
+ cancel_on_timeout,
29
+ )
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ def ask_for_input_with_timeout(prompt: str, timeout: int) -> str:
35
+ """Request user input with a timeout constraint.
36
+
37
+ Displays a prompt to the user and waits for input. If the user does not
38
+ provide input within the specified timeout period, a TimeoutError is raised.
39
+
40
+ This function uses multiprocessing internally to enforce the timeout,
41
+ so it spawns a separate process for the input operation.
42
+
43
+ Args:
44
+ prompt: The text prompt to display to the user before waiting for input.
45
+ timeout: Maximum time in seconds to wait for user input.
46
+
47
+ Returns:
48
+ The user's input as a stripped string.
49
+
50
+ Raises:
51
+ multiprocessing.TimeoutError: If the user doesn't provide input within
52
+ the timeout period.
53
+
54
+ Example:
55
+ >>> # This example would block waiting for input
56
+ >>> # response = ask_for_input_with_timeout("Enter name: ", timeout=30)
57
+ """
58
+
59
+ @cancel_on_timeout(timeout, "Input not given within the timeout")
60
+ def give_input() -> str:
61
+ return input(prompt)
62
+
63
+ user_input: str = give_input()
64
+
65
+ return user_input
66
+
67
+
68
+ def find_xml_namespaces(xml: str | StringIO) -> dict[str, str]:
69
+ """Extract namespace declarations from XML content.
70
+
71
+ Parses the XML content and extracts all namespace prefix-to-URI mappings,
72
+ excluding the default (empty prefix) namespace. Uses defusedxml for safe
73
+ XML parsing to prevent XML-based attacks.
74
+
75
+ Args:
76
+ xml: XML content as a string or StringIO object. If a string is
77
+ provided, it will be wrapped in a StringIO internally.
78
+
79
+ Returns:
80
+ A dictionary mapping namespace prefixes to their URIs. The default
81
+ namespace (empty prefix) is excluded from the result.
82
+
83
+ Example:
84
+ >>> xml_content = '''<?xml version="1.0"?>
85
+ ... <root xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
86
+ ... </root>'''
87
+ >>> find_xml_namespaces(xml_content)
88
+ {'soap': 'http://schemas.xmlsoap.org/soap/envelope/'}
89
+ """
90
+ if not isinstance(xml, StringIO):
91
+ xml = StringIO(xml)
92
+ # Extract the namespaces from the root tag
93
+ namespaces_: dict[str, str] = {}
94
+ iter_ns = DefusedElementTree.iterparse(xml, events=["start-ns"])
95
+ for _, namespace_data in iter_ns:
96
+ prefix, uri = namespace_data
97
+ namespaces_[str(prefix)] = str(uri)
98
+
99
+ namespaces_.pop("", None)
100
+
101
+ return namespaces_
102
+
103
+
104
+ def value_to_truncated_string(value: object, max_length: int) -> str:
105
+ """Convert any value to a string and truncate if it exceeds the maximum length.
106
+
107
+ Useful for logging or displaying values where space is limited. The string
108
+ is truncated at word boundaries when possible, with "..." appended to
109
+ indicate truncation.
110
+
111
+ Args:
112
+ value: Any object to convert to a string representation.
113
+ max_length: Maximum length of the resulting string, including the
114
+ ellipsis placeholder if truncation occurs.
115
+
116
+ Returns:
117
+ The string representation of the value, truncated to max_length
118
+ characters if necessary with "..." as the truncation indicator.
119
+
120
+ Example:
121
+ >>> value_to_truncated_string("Hello, World!", max_length=10)
122
+ 'Hello,...'
123
+ >>> value_to_truncated_string([1, 2, 3], max_length=20)
124
+ '[1, 2, 3]'
125
+ """
126
+ string = str(value)
127
+ return textwrap.shorten(string, width=max_length, placeholder="...")
128
+
129
+
130
+ def get_reusable_hash(value: object) -> str:
131
+ """Generate a deterministic SHA-256 hash for any object.
132
+
133
+ Creates a consistent hash based on the string representation of the given
134
+ value. Unlike Python's built-in ``hash()`` function, this hash is:
135
+ - Deterministic across Python sessions
136
+ - Consistent across different machines
137
+ - Suitable for caching, deduplication, or identification
138
+
139
+ Args:
140
+ value: Any object to hash. The object's ``__str__`` method is used
141
+ to generate the string representation for hashing.
142
+
143
+ Returns:
144
+ A 64-character hexadecimal string representation of the SHA-256 hash.
145
+
146
+ Note:
147
+ Two objects with the same string representation will produce the same
148
+ hash, even if they are different types or have different internal state.
149
+
150
+ Example:
151
+ >>> get_reusable_hash("test")
152
+ '9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08'
153
+ >>> get_reusable_hash({"key": "value"}) # doctest: +ELLIPSIS
154
+ '...'
155
+ """
156
+ value_str = str(value)
157
+ return hashlib.sha256(value_str.encode("utf-8")).hexdigest()
@@ -0,0 +1,8 @@
1
+ """Iteration utilities package.
2
+
3
+ This package provides utilities for iteration and parallel processing:
4
+
5
+ Modules:
6
+ iterate: Basic iteration utilities for working with iterables.
7
+ concurrent: Concurrent processing utilities for multiprocessing and multithreading.
8
+ """
@@ -0,0 +1,9 @@
1
+ """Concurrent processing utilities package.
2
+
3
+ This package provides utilities for parallel execution using processes and threads:
4
+
5
+ Modules:
6
+ concurrent: Core concurrent processing infrastructure and shared utilities.
7
+ multiprocessing: CPU-bound parallel processing using multiprocessing pools.
8
+ multithreading: I/O-bound parallel processing using thread pools.
9
+ """
@@ -0,0 +1,301 @@
1
+ """Concurrent processing utilities for parallel execution.
2
+
3
+ This module provides core functions for concurrent processing using both
4
+ multiprocessing and multithreading approaches. It includes utilities for
5
+ handling timeouts, managing process pools, and organizing parallel execution
6
+ of functions.
7
+
8
+ The main entry point is ``concurrent_loop()``, which provides a unified
9
+ interface for both threading and multiprocessing execution.
10
+
11
+ Example:
12
+ >>> from winiutils.src.iterating.concurrent.concurrent import concurrent_loop
13
+ >>> def square(x):
14
+ ... return x * x
15
+ >>> results = concurrent_loop(
16
+ ... threading=True,
17
+ ... process_function=square,
18
+ ... process_args=[[1], [2], [3]],
19
+ ... process_args_len=3,
20
+ ... )
21
+ >>> results
22
+ [1, 4, 9]
23
+ """
24
+
25
+ import multiprocessing
26
+ import os
27
+ import threading
28
+ from collections.abc import Callable, Generator, Iterable
29
+ from concurrent.futures import ThreadPoolExecutor
30
+ from copy import deepcopy
31
+ from functools import partial
32
+ from typing import TYPE_CHECKING, Any, cast
33
+
34
+ from tqdm import tqdm
35
+
36
+ from winiutils.src.iterating.iterate import get_len_with_default
37
+
38
+ if TYPE_CHECKING:
39
+ from multiprocessing.pool import Pool
40
+
41
+ import logging
42
+
43
+ logger = logging.getLogger(__name__)
44
+
45
+
46
+ def get_order_and_func_result(
47
+ func_order_args: tuple[Any, ...],
48
+ ) -> tuple[int, Any]:
49
+ """Execute a function and return its result with order index.
50
+
51
+ Helper function used with ``imap_unordered`` to execute a function with
52
+ arguments unpacking while preserving the original order of results.
53
+
54
+ Args:
55
+ func_order_args: Tuple containing:
56
+ - The function to be executed
57
+ - The order index (int)
58
+ - The arguments for the function (unpacked)
59
+
60
+ Returns:
61
+ A tuple of (order_index, result) where order_index is the original
62
+ position and result is the function's return value.
63
+ """
64
+ function, order, *args = func_order_args
65
+ return order, function(*args)
66
+
67
+
68
+ def generate_process_args(
69
+ *,
70
+ process_function: Callable[..., Any],
71
+ process_args: Iterable[Iterable[Any]],
72
+ process_args_static: Iterable[Any] | None = None,
73
+ deepcopy_static_args: Iterable[Any] | None = None,
74
+ ) -> Generator[tuple[Any, ...], None, None]:
75
+ """Prepare arguments for multiprocessing or multithreading execution.
76
+
77
+ Converts input arguments into a format suitable for parallel processing,
78
+ organizing them for efficient unpacking during execution.
79
+
80
+ The function performs the following transformations:
81
+ 1. Prepends the process function and order index to each argument tuple
82
+ 2. Appends static arguments to each call
83
+ 3. Deep-copies specified arguments for each call (for mutable objects)
84
+
85
+ Args:
86
+ process_function: The function to be executed in parallel.
87
+ process_args: Iterable of argument lists for each parallel call.
88
+ Each inner iterable contains the arguments for one function call.
89
+ process_args_static: Optional constant arguments to append to each
90
+ call. These are shared across all calls without copying.
91
+ deepcopy_static_args: Optional arguments that should be deep-copied
92
+ for each process. Use this for mutable objects that should not
93
+ be shared between processes.
94
+
95
+ Yields:
96
+ Tuples formatted as: (process_function, order_index, *args,
97
+ *static_args, *deepcopied_args)
98
+
99
+ Example:
100
+ >>> def add(a, b, c):
101
+ ... return a + b + c
102
+ >>> args = generate_process_args(
103
+ ... process_function=add,
104
+ ... process_args=[[1], [2]],
105
+ ... process_args_static=[10],
106
+ ... )
107
+ >>> list(args)
108
+ [(add, 0, 1, 10), (add, 1, 2, 10)]
109
+ """
110
+ process_args_static = (
111
+ () if process_args_static is None else tuple(process_args_static)
112
+ )
113
+ deepcopy_static_args = (
114
+ () if deepcopy_static_args is None else tuple(deepcopy_static_args)
115
+ )
116
+ for order, process_arg in enumerate(process_args):
117
+ yield (
118
+ process_function,
119
+ order,
120
+ *process_arg,
121
+ *process_args_static,
122
+ *(
123
+ deepcopy(deepcopy_static_arg)
124
+ for deepcopy_static_arg in deepcopy_static_args
125
+ ),
126
+ )
127
+
128
+
129
+ def get_multiprocess_results_with_tqdm(
130
+ results: Iterable[Any],
131
+ process_func: Callable[..., Any],
132
+ process_args_len: int,
133
+ *,
134
+ threads: bool,
135
+ ) -> list[Any]:
136
+ """Collect parallel execution results with progress tracking.
137
+
138
+ Processes results from parallel execution with a tqdm progress bar and
139
+ ensures they are returned in the original submission order.
140
+
141
+ Args:
142
+ results: Iterable of (order_index, result) tuples from parallel
143
+ execution.
144
+ process_func: The function that was executed in parallel. Used for
145
+ the progress bar description.
146
+ process_args_len: Total number of items being processed. Used for
147
+ the progress bar total.
148
+ threads: Whether threading (True) or multiprocessing (False) was
149
+ used. Affects the progress bar description.
150
+
151
+ Returns:
152
+ List of results from parallel execution, sorted by original
153
+ submission order.
154
+ """
155
+ results = tqdm(
156
+ results,
157
+ total=process_args_len,
158
+ desc=f"Multi{'threading' if threads else 'processing'} {process_func}",
159
+ unit=f" {'threads' if threads else 'processes'}",
160
+ )
161
+ results_list = list(results)
162
+ # results list is a tuple of (order, result),
163
+ # so we need to sort it by order to get the original order
164
+ results_list = sorted(results_list, key=lambda x: x[0])
165
+ # now extract the results from the tuple
166
+ return [result[1] for result in results_list]
167
+
168
+
169
+ def find_max_pools(
170
+ *,
171
+ threads: bool,
172
+ process_args_len: int | None = None,
173
+ ) -> int:
174
+ """Determine optimal number of workers for parallel execution.
175
+
176
+ Calculates the maximum number of worker processes or threads based on
177
+ system resources, currently active tasks, and the number of items to
178
+ process.
179
+
180
+ Args:
181
+ threads: Whether to use threading (True) or multiprocessing (False).
182
+ Threading allows up to 4x CPU count, while multiprocessing is
183
+ limited to CPU count.
184
+ process_args_len: Number of items to process in parallel. If
185
+ provided, the result will not exceed this value.
186
+
187
+ Returns:
188
+ Maximum number of worker processes or threads to use. Always at
189
+ least 1.
190
+
191
+ Note:
192
+ For threading, the maximum is ``cpu_count * 4`` minus active threads.
193
+ For multiprocessing, the maximum is ``cpu_count`` minus active
194
+ child processes.
195
+ """
196
+ # use tee to find length of process_args
197
+ cpu_count = os.cpu_count() or 1
198
+ if threads:
199
+ active_tasks = threading.active_count()
200
+ max_tasks = cpu_count * 4
201
+ else:
202
+ active_tasks = len(multiprocessing.active_children())
203
+ max_tasks = cpu_count
204
+
205
+ available_tasks = max_tasks - active_tasks
206
+ max_pools = (
207
+ min(available_tasks, process_args_len) if process_args_len else available_tasks
208
+ )
209
+ max_pools = max(max_pools, 1)
210
+
211
+ logger.info(
212
+ "Multi%s with max_pools: %s",
213
+ "threading" if threads else "processing",
214
+ max_pools,
215
+ )
216
+
217
+ return max_pools
218
+
219
+
220
+ def concurrent_loop( # noqa: PLR0913
221
+ *,
222
+ threading: bool,
223
+ process_function: Callable[..., Any],
224
+ process_args: Iterable[Iterable[Any]],
225
+ process_args_static: Iterable[Any] | None = None,
226
+ deepcopy_static_args: Iterable[Any] | None = None,
227
+ process_args_len: int = 1,
228
+ ) -> list[Any]:
229
+ """Execute a function concurrently with multiple argument sets.
230
+
231
+ Core function that provides a unified interface for both multiprocessing
232
+ and multithreading execution. This is the internal implementation used
233
+ by ``multiprocess_loop()`` and ``multithread_loop()``.
234
+
235
+ Args:
236
+ threading: Whether to use threading (True) or multiprocessing
237
+ (False). Use threading for I/O-bound tasks and multiprocessing
238
+ for CPU-bound tasks.
239
+ process_function: The function to execute concurrently. Must be
240
+ pickle-able for multiprocessing.
241
+ process_args: Iterable of argument lists for each parallel call.
242
+ Each inner iterable contains the arguments for one function
243
+ call.
244
+ process_args_static: Optional constant arguments to append to each
245
+ call. These are shared across all calls without copying.
246
+ Defaults to None.
247
+ deepcopy_static_args: Optional arguments that should be deep-copied
248
+ for each process. Use this for mutable objects that should not
249
+ be shared between processes. Defaults to None.
250
+ process_args_len: Length of ``process_args``. Used for progress bar
251
+ and worker pool sizing. Defaults to 1.
252
+
253
+ Returns:
254
+ List of results from the function executions, in the original
255
+ submission order.
256
+
257
+ Note:
258
+ This function is not meant to be used directly. Use
259
+ ``multiprocess_loop()`` for CPU-bound tasks or ``multithread_loop()``
260
+ for I/O-bound tasks instead.
261
+ """
262
+ from winiutils.src.iterating.concurrent.multiprocessing import ( # noqa: PLC0415 # avoid circular import
263
+ get_spwan_pool,
264
+ )
265
+ from winiutils.src.iterating.concurrent.multithreading import ( # noqa: PLC0415 # avoid circular import
266
+ imap_unordered,
267
+ )
268
+
269
+ process_args_len = get_len_with_default(process_args, process_args_len)
270
+ process_args = generate_process_args(
271
+ process_function=process_function,
272
+ process_args=process_args,
273
+ process_args_static=process_args_static,
274
+ deepcopy_static_args=deepcopy_static_args,
275
+ )
276
+ max_workers = find_max_pools(threads=threading, process_args_len=process_args_len)
277
+ pool_executor = (
278
+ ThreadPoolExecutor(max_workers=max_workers)
279
+ if threading
280
+ else get_spwan_pool(processes=max_workers)
281
+ )
282
+ with pool_executor as pool:
283
+ map_func: Callable[[Callable[..., Any], Iterable[Any]], Any]
284
+
285
+ if process_args_len == 1:
286
+ map_func = map
287
+ elif threading:
288
+ pool = cast("ThreadPoolExecutor", pool)
289
+ map_func = partial(imap_unordered, pool)
290
+ else:
291
+ pool = cast("Pool", pool)
292
+ map_func = pool.imap_unordered
293
+
294
+ results = map_func(get_order_and_func_result, process_args)
295
+
296
+ return get_multiprocess_results_with_tqdm(
297
+ results=results,
298
+ process_func=process_function,
299
+ process_args_len=process_args_len,
300
+ threads=threading,
301
+ )