PyPI - winiutils - Versions diffs - 2.3.12__py3-none-any.whl - Mend

winiutils 2.3.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

winiutils/__init__.py +1 -0
winiutils/dev/__init__.py +1 -0
winiutils/dev/builders/__init__.py +1 -0
winiutils/dev/cli/__init__.py +1 -0
winiutils/dev/cli/subcommands.py +6 -0
winiutils/dev/configs/__init__.py +1 -0
winiutils/dev/tests/__init__.py +1 -0
winiutils/dev/tests/fixtures/__init__.py +1 -0
winiutils/dev/tests/fixtures/fixtures.py +32 -0
winiutils/main.py +9 -0
winiutils/py.typed +0 -0
winiutils/resources/__init__.py +1 -0
winiutils/src/__init__.py +4 -0
winiutils/src/data/__init__.py +8 -0
winiutils/src/data/dataframe/__init__.py +7 -0
winiutils/src/data/dataframe/cleaning.py +734 -0
winiutils/src/data/structures/__init__.py +8 -0
winiutils/src/data/structures/dicts.py +40 -0
winiutils/src/data/structures/text/__init__.py +7 -0
winiutils/src/data/structures/text/string.py +157 -0
winiutils/src/iterating/__init__.py +8 -0
winiutils/src/iterating/concurrent/__init__.py +9 -0
winiutils/src/iterating/concurrent/concurrent.py +301 -0
winiutils/src/iterating/concurrent/multiprocessing.py +186 -0
winiutils/src/iterating/concurrent/multithreading.py +132 -0
winiutils/src/iterating/iterate.py +45 -0
winiutils/src/oop/__init__.py +7 -0
winiutils/src/oop/mixins/__init__.py +8 -0
winiutils/src/oop/mixins/meta.py +217 -0
winiutils/src/oop/mixins/mixin.py +58 -0
winiutils/src/security/__init__.py +8 -0
winiutils/src/security/cryptography.py +100 -0
winiutils/src/security/keyring.py +167 -0
winiutils-2.3.12.dist-info/METADATA +283 -0
winiutils-2.3.12.dist-info/RECORD +38 -0
winiutils-2.3.12.dist-info/WHEEL +4 -0
winiutils-2.3.12.dist-info/entry_points.txt +4 -0
winiutils-2.3.12.dist-info/licenses/LICENSE +21 -0

winiutils/src/data/structures/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""Data structures utilities package.
+This package provides utilities for working with common data structures:
+Modules:
+    dicts: Dictionary manipulation utilities.
+    text: Text and string processing utilities.
+"""

winiutils/src/data/structures/dicts.py ADDED Viewed

@@ -0,0 +1,40 @@
+"""Dictionary manipulation utilities.
+This module provides utility functions for common dictionary operations
+such as reversing key-value pairs.
+Example:
+    >>> from winiutils.src.data.structures.dicts import reverse_dict
+    >>> original = {"a": 1, "b": 2}
+    >>> reverse_dict(original)
+    {1: 'a', 2: 'b'}
+"""
+from typing import Any
+def reverse_dict(d: dict[Any, Any]) -> dict[Any, Any]:
+    """Reverse the keys and values of a dictionary.
+    Creates a new dictionary where the original values become keys and
+    the original keys become values.
+    Args:
+        d: The dictionary to reverse. Values must be hashable to serve
+            as keys in the resulting dictionary.
+    Returns:
+        A new dictionary with keys and values swapped from the original.
+    Raises:
+        TypeError: If any value in the input dictionary is not hashable.
+    Warning:
+        If the original dictionary contains duplicate values, only the last
+        key-value pair for each value will be preserved in the result.
+    Example:
+        >>> reverse_dict({"name": "alice", "role": "admin"})
+        {'alice': 'name', 'admin': 'role'}
+    """
+    return {v: k for k, v in d.items()}

winiutils/src/data/structures/text/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""Text processing utilities package.
+This package provides utilities for text and string manipulation:
+Modules:
+    string: String manipulation, hashing, XML parsing, and input utilities.
+"""

winiutils/src/data/structures/text/string.py ADDED Viewed

@@ -0,0 +1,157 @@
+"""String manipulation utilities for text processing.
+This module provides utility functions for common string operations including:
+    - User input with timeout constraints
+    - XML namespace extraction
+    - String truncation for logging
+    - Deterministic hash generation
+Example:
+    >>> from winiutils.src.data.structures.text.string import (
+    ...     value_to_truncated_string,
+    ...     get_reusable_hash,
+    ... )
+    >>> value_to_truncated_string("Hello, World!", max_length=10)
+    'Hello,...'
+    >>> get_reusable_hash("test")  # doctest: +ELLIPSIS
+    '9f86d08...'
+"""
+import hashlib
+import logging
+import textwrap
+from io import StringIO
+from defusedxml import ElementTree as DefusedElementTree
+from winiutils.src.iterating.concurrent.multiprocessing import (
+    cancel_on_timeout,
+)
+logger = logging.getLogger(__name__)
+def ask_for_input_with_timeout(prompt: str, timeout: int) -> str:
+    """Request user input with a timeout constraint.
+    Displays a prompt to the user and waits for input. If the user does not
+    provide input within the specified timeout period, a TimeoutError is raised.
+    This function uses multiprocessing internally to enforce the timeout,
+    so it spawns a separate process for the input operation.
+    Args:
+        prompt: The text prompt to display to the user before waiting for input.
+        timeout: Maximum time in seconds to wait for user input.
+    Returns:
+        The user's input as a stripped string.
+    Raises:
+        multiprocessing.TimeoutError: If the user doesn't provide input within
+            the timeout period.
+    Example:
+        >>> # This example would block waiting for input
+        >>> # response = ask_for_input_with_timeout("Enter name: ", timeout=30)
+    """
+    @cancel_on_timeout(timeout, "Input not given within the timeout")
+    def give_input() -> str:
+        return input(prompt)
+    user_input: str = give_input()
+    return user_input
+def find_xml_namespaces(xml: str | StringIO) -> dict[str, str]:
+    """Extract namespace declarations from XML content.
+    Parses the XML content and extracts all namespace prefix-to-URI mappings,
+    excluding the default (empty prefix) namespace. Uses defusedxml for safe
+    XML parsing to prevent XML-based attacks.
+    Args:
+        xml: XML content as a string or StringIO object. If a string is
+            provided, it will be wrapped in a StringIO internally.
+    Returns:
+        A dictionary mapping namespace prefixes to their URIs. The default
+        namespace (empty prefix) is excluded from the result.
+    Example:
+        >>> xml_content = '''<?xml version="1.0"?>
+        ... <root xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
+        ... </root>'''
+        >>> find_xml_namespaces(xml_content)
+        {'soap': 'http://schemas.xmlsoap.org/soap/envelope/'}
+    """
+    if not isinstance(xml, StringIO):
+        xml = StringIO(xml)
+    # Extract the namespaces from the root tag
+    namespaces_: dict[str, str] = {}
+    iter_ns = DefusedElementTree.iterparse(xml, events=["start-ns"])
+    for _, namespace_data in iter_ns:
+        prefix, uri = namespace_data
+        namespaces_[str(prefix)] = str(uri)
+    namespaces_.pop("", None)
+    return namespaces_
+def value_to_truncated_string(value: object, max_length: int) -> str:
+    """Convert any value to a string and truncate if it exceeds the maximum length.
+    Useful for logging or displaying values where space is limited. The string
+    is truncated at word boundaries when possible, with "..." appended to
+    indicate truncation.
+    Args:
+        value: Any object to convert to a string representation.
+        max_length: Maximum length of the resulting string, including the
+            ellipsis placeholder if truncation occurs.
+    Returns:
+        The string representation of the value, truncated to max_length
+        characters if necessary with "..." as the truncation indicator.
+    Example:
+        >>> value_to_truncated_string("Hello, World!", max_length=10)
+        'Hello,...'
+        >>> value_to_truncated_string([1, 2, 3], max_length=20)
+        '[1, 2, 3]'
+    """
+    string = str(value)
+    return textwrap.shorten(string, width=max_length, placeholder="...")
+def get_reusable_hash(value: object) -> str:
+    """Generate a deterministic SHA-256 hash for any object.
+    Creates a consistent hash based on the string representation of the given
+    value. Unlike Python's built-in ``hash()`` function, this hash is:
+        - Deterministic across Python sessions
+        - Consistent across different machines
+        - Suitable for caching, deduplication, or identification
+    Args:
+        value: Any object to hash. The object's ``__str__`` method is used
+            to generate the string representation for hashing.
+    Returns:
+        A 64-character hexadecimal string representation of the SHA-256 hash.
+    Note:
+        Two objects with the same string representation will produce the same
+        hash, even if they are different types or have different internal state.
+    Example:
+        >>> get_reusable_hash("test")
+        '9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08'
+        >>> get_reusable_hash({"key": "value"})  # doctest: +ELLIPSIS
+        '...'
+    """
+    value_str = str(value)
+    return hashlib.sha256(value_str.encode("utf-8")).hexdigest()

winiutils/src/iterating/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""Iteration utilities package.
+This package provides utilities for iteration and parallel processing:
+Modules:
+    iterate: Basic iteration utilities for working with iterables.
+    concurrent: Concurrent processing utilities for multiprocessing and multithreading.
+"""

winiutils/src/iterating/concurrent/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Concurrent processing utilities package.
+This package provides utilities for parallel execution using processes and threads:
+Modules:
+    concurrent: Core concurrent processing infrastructure and shared utilities.
+    multiprocessing: CPU-bound parallel processing using multiprocessing pools.
+    multithreading: I/O-bound parallel processing using thread pools.
+"""

winiutils/src/iterating/concurrent/concurrent.py ADDED Viewed

@@ -0,0 +1,301 @@
+"""Concurrent processing utilities for parallel execution.
+This module provides core functions for concurrent processing using both
+multiprocessing and multithreading approaches. It includes utilities for
+handling timeouts, managing process pools, and organizing parallel execution
+of functions.
+The main entry point is ``concurrent_loop()``, which provides a unified
+interface for both threading and multiprocessing execution.
+Example:
+    >>> from winiutils.src.iterating.concurrent.concurrent import concurrent_loop
+    >>> def square(x):
+    ...     return x * x
+    >>> results = concurrent_loop(
+    ...     threading=True,
+    ...     process_function=square,
+    ...     process_args=[[1], [2], [3]],
+    ...     process_args_len=3,
+    ... )
+    >>> results
+    [1, 4, 9]
+"""
+import multiprocessing
+import os
+import threading
+from collections.abc import Callable, Generator, Iterable
+from concurrent.futures import ThreadPoolExecutor
+from copy import deepcopy
+from functools import partial
+from typing import TYPE_CHECKING, Any, cast
+from tqdm import tqdm
+from winiutils.src.iterating.iterate import get_len_with_default
+if TYPE_CHECKING:
+    from multiprocessing.pool import Pool
+import logging
+logger = logging.getLogger(__name__)
+def get_order_and_func_result(
+    func_order_args: tuple[Any, ...],
+) -> tuple[int, Any]:
+    """Execute a function and return its result with order index.
+    Helper function used with ``imap_unordered`` to execute a function with
+    arguments unpacking while preserving the original order of results.
+    Args:
+        func_order_args: Tuple containing:
+            - The function to be executed
+            - The order index (int)
+            - The arguments for the function (unpacked)
+    Returns:
+        A tuple of (order_index, result) where order_index is the original
+        position and result is the function's return value.
+    """
+    function, order, *args = func_order_args
+    return order, function(*args)
+def generate_process_args(
+    *,
+    process_function: Callable[..., Any],
+    process_args: Iterable[Iterable[Any]],
+    process_args_static: Iterable[Any] | None = None,
+    deepcopy_static_args: Iterable[Any] | None = None,
+) -> Generator[tuple[Any, ...], None, None]:
+    """Prepare arguments for multiprocessing or multithreading execution.
+    Converts input arguments into a format suitable for parallel processing,
+    organizing them for efficient unpacking during execution.
+    The function performs the following transformations:
+        1. Prepends the process function and order index to each argument tuple
+        2. Appends static arguments to each call
+        3. Deep-copies specified arguments for each call (for mutable objects)
+    Args:
+        process_function: The function to be executed in parallel.
+        process_args: Iterable of argument lists for each parallel call.
+            Each inner iterable contains the arguments for one function call.
+        process_args_static: Optional constant arguments to append to each
+            call. These are shared across all calls without copying.
+        deepcopy_static_args: Optional arguments that should be deep-copied
+            for each process. Use this for mutable objects that should not
+            be shared between processes.
+    Yields:
+        Tuples formatted as: (process_function, order_index, *args,
+        *static_args, *deepcopied_args)
+    Example:
+        >>> def add(a, b, c):
+        ...     return a + b + c
+        >>> args = generate_process_args(
+        ...     process_function=add,
+        ...     process_args=[[1], [2]],
+        ...     process_args_static=[10],
+        ... )
+        >>> list(args)
+        [(add, 0, 1, 10), (add, 1, 2, 10)]
+    """
+    process_args_static = (
+        () if process_args_static is None else tuple(process_args_static)
+    )
+    deepcopy_static_args = (
+        () if deepcopy_static_args is None else tuple(deepcopy_static_args)
+    )
+    for order, process_arg in enumerate(process_args):
+        yield (
+            process_function,
+            order,
+            *process_arg,
+            *process_args_static,
+            *(
+                deepcopy(deepcopy_static_arg)
+                for deepcopy_static_arg in deepcopy_static_args
+            ),
+        )
+def get_multiprocess_results_with_tqdm(
+    results: Iterable[Any],
+    process_func: Callable[..., Any],
+    process_args_len: int,
+    *,
+    threads: bool,
+) -> list[Any]:
+    """Collect parallel execution results with progress tracking.
+    Processes results from parallel execution with a tqdm progress bar and
+    ensures they are returned in the original submission order.
+    Args:
+        results: Iterable of (order_index, result) tuples from parallel
+            execution.
+        process_func: The function that was executed in parallel. Used for
+            the progress bar description.
+        process_args_len: Total number of items being processed. Used for
+            the progress bar total.
+        threads: Whether threading (True) or multiprocessing (False) was
+            used. Affects the progress bar description.
+    Returns:
+        List of results from parallel execution, sorted by original
+        submission order.
+    """
+    results = tqdm(
+        results,
+        total=process_args_len,
+        desc=f"Multi{'threading' if threads else 'processing'} {process_func}",
+        unit=f" {'threads' if threads else 'processes'}",
+    )
+    results_list = list(results)
+    # results list is a tuple of (order, result),
+    # so we need to sort it by order to get the original order
+    results_list = sorted(results_list, key=lambda x: x[0])
+    # now extract the results from the tuple
+    return [result[1] for result in results_list]
+def find_max_pools(
+    *,
+    threads: bool,
+    process_args_len: int | None = None,
+) -> int:
+    """Determine optimal number of workers for parallel execution.
+    Calculates the maximum number of worker processes or threads based on
+    system resources, currently active tasks, and the number of items to
+    process.
+    Args:
+        threads: Whether to use threading (True) or multiprocessing (False).
+            Threading allows up to 4x CPU count, while multiprocessing is
+            limited to CPU count.
+        process_args_len: Number of items to process in parallel. If
+            provided, the result will not exceed this value.
+    Returns:
+        Maximum number of worker processes or threads to use. Always at
+        least 1.
+    Note:
+        For threading, the maximum is ``cpu_count * 4`` minus active threads.
+        For multiprocessing, the maximum is ``cpu_count`` minus active
+        child processes.
+    """
+    # use tee to find length of process_args
+    cpu_count = os.cpu_count() or 1
+    if threads:
+        active_tasks = threading.active_count()
+        max_tasks = cpu_count * 4
+    else:
+        active_tasks = len(multiprocessing.active_children())
+        max_tasks = cpu_count
+    available_tasks = max_tasks - active_tasks
+    max_pools = (
+        min(available_tasks, process_args_len) if process_args_len else available_tasks
+    )
+    max_pools = max(max_pools, 1)
+    logger.info(
+        "Multi%s with max_pools: %s",
+        "threading" if threads else "processing",
+        max_pools,
+    )
+    return max_pools
+def concurrent_loop(  # noqa: PLR0913
+    *,
+    threading: bool,
+    process_function: Callable[..., Any],
+    process_args: Iterable[Iterable[Any]],
+    process_args_static: Iterable[Any] | None = None,
+    deepcopy_static_args: Iterable[Any] | None = None,
+    process_args_len: int = 1,
+) -> list[Any]:
+    """Execute a function concurrently with multiple argument sets.
+    Core function that provides a unified interface for both multiprocessing
+    and multithreading execution. This is the internal implementation used
+    by ``multiprocess_loop()`` and ``multithread_loop()``.
+    Args:
+        threading: Whether to use threading (True) or multiprocessing
+            (False). Use threading for I/O-bound tasks and multiprocessing
+            for CPU-bound tasks.
+        process_function: The function to execute concurrently. Must be
+            pickle-able for multiprocessing.
+        process_args: Iterable of argument lists for each parallel call.
+            Each inner iterable contains the arguments for one function
+            call.
+        process_args_static: Optional constant arguments to append to each
+            call. These are shared across all calls without copying.
+            Defaults to None.
+        deepcopy_static_args: Optional arguments that should be deep-copied
+            for each process. Use this for mutable objects that should not
+            be shared between processes. Defaults to None.
+        process_args_len: Length of ``process_args``. Used for progress bar
+            and worker pool sizing. Defaults to 1.
+    Returns:
+        List of results from the function executions, in the original
+        submission order.
+    Note:
+        This function is not meant to be used directly. Use
+        ``multiprocess_loop()`` for CPU-bound tasks or ``multithread_loop()``
+        for I/O-bound tasks instead.
+    """
+    from winiutils.src.iterating.concurrent.multiprocessing import (  # noqa: PLC0415  # avoid circular import
+        get_spwan_pool,
+    )
+    from winiutils.src.iterating.concurrent.multithreading import (  # noqa: PLC0415  # avoid circular import
+        imap_unordered,
+    )
+    process_args_len = get_len_with_default(process_args, process_args_len)
+    process_args = generate_process_args(
+        process_function=process_function,
+        process_args=process_args,
+        process_args_static=process_args_static,
+        deepcopy_static_args=deepcopy_static_args,
+    )
+    max_workers = find_max_pools(threads=threading, process_args_len=process_args_len)
+    pool_executor = (
+        ThreadPoolExecutor(max_workers=max_workers)
+        if threading
+        else get_spwan_pool(processes=max_workers)
+    )
+    with pool_executor as pool:
+        map_func: Callable[[Callable[..., Any], Iterable[Any]], Any]
+        if process_args_len == 1:
+            map_func = map
+        elif threading:
+            pool = cast("ThreadPoolExecutor", pool)
+            map_func = partial(imap_unordered, pool)
+        else:
+            pool = cast("Pool", pool)
+            map_func = pool.imap_unordered
+        results = map_func(get_order_and_func_result, process_args)
+        return get_multiprocess_results_with_tqdm(
+            results=results,
+            process_func=process_function,
+            process_args_len=process_args_len,
+            threads=threading,
+        )