stouputils 1.17.0__py3-none-any.whl → 1.18.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stouputils/collections.py +37 -7
- stouputils/collections.pyi +35 -8
- stouputils/continuous_delivery/stubs.py +1 -1
- stouputils/ctx.py +1 -3
- stouputils/ctx.pyi +1 -3
- stouputils/image.py +7 -9
- stouputils/image.pyi +3 -5
- stouputils/io.py +21 -0
- stouputils/io.pyi +6 -0
- stouputils/parallel/__init__.py +29 -0
- stouputils/parallel/__init__.pyi +4 -0
- stouputils/parallel/capturer.py +133 -0
- stouputils/parallel/capturer.pyi +38 -0
- stouputils/parallel/common.py +134 -0
- stouputils/parallel/common.pyi +53 -0
- stouputils/parallel/multi.py +309 -0
- stouputils/{parallel.pyi → parallel/multi.pyi} +14 -112
- stouputils/parallel/subprocess.py +163 -0
- stouputils/parallel/subprocess.pyi +64 -0
- stouputils/print.py +2 -3
- stouputils/print.pyi +1 -2
- {stouputils-1.17.0.dist-info → stouputils-1.18.1.dist-info}/METADATA +2 -1
- {stouputils-1.17.0.dist-info → stouputils-1.18.1.dist-info}/RECORD +25 -17
- stouputils/parallel.py +0 -556
- {stouputils-1.17.0.dist-info → stouputils-1.18.1.dist-info}/WHEEL +0 -0
- {stouputils-1.17.0.dist-info → stouputils-1.18.1.dist-info}/entry_points.txt +0 -0
stouputils/collections.py
CHANGED
|
@@ -17,7 +17,7 @@ import os
|
|
|
17
17
|
import shutil
|
|
18
18
|
import tempfile
|
|
19
19
|
from collections.abc import Callable, Iterable
|
|
20
|
-
from typing import TYPE_CHECKING, Any, Literal
|
|
20
|
+
from typing import TYPE_CHECKING, Any, Literal
|
|
21
21
|
|
|
22
22
|
# Lazy imports for typing
|
|
23
23
|
if TYPE_CHECKING:
|
|
@@ -26,9 +26,6 @@ if TYPE_CHECKING:
|
|
|
26
26
|
import zarr # pyright: ignore[reportMissingTypeStubs]
|
|
27
27
|
from numpy.typing import NDArray
|
|
28
28
|
|
|
29
|
-
# Typing
|
|
30
|
-
T = TypeVar("T")
|
|
31
|
-
|
|
32
29
|
# Functions
|
|
33
30
|
def unique_list[T](list_to_clean: Iterable[T], method: Literal["id", "hash", "str"] = "str") -> list[T]:
|
|
34
31
|
""" Remove duplicates from the list while keeping the order using ids, hash, or str
|
|
@@ -79,7 +76,7 @@ def unique_list[T](list_to_clean: Iterable[T], method: Literal["id", "hash", "st
|
|
|
79
76
|
return result
|
|
80
77
|
|
|
81
78
|
|
|
82
|
-
def at_least_n(iterable: Iterable[T], predicate: Callable[[T], bool], n: int) -> bool:
|
|
79
|
+
def at_least_n[T](iterable: Iterable[T], predicate: Callable[[T], bool], n: int) -> bool:
|
|
83
80
|
""" Return True if at least n elements in iterable satisfy predicate.
|
|
84
81
|
It's like the built-in any() but for at least n matches.
|
|
85
82
|
|
|
@@ -135,16 +132,45 @@ def sort_dict_keys[T](dictionary: dict[T, Any], order: list[T], reverse: bool =
|
|
|
135
132
|
def upsert_in_dataframe(
|
|
136
133
|
df: "pl.DataFrame",
|
|
137
134
|
new_entry: dict[str, Any],
|
|
138
|
-
primary_keys: dict[str, Any] | None = None
|
|
135
|
+
primary_keys: list[str] | dict[str, Any] | None = None
|
|
139
136
|
) -> "pl.DataFrame":
|
|
140
137
|
""" Insert or update a row in the Polars DataFrame based on primary keys.
|
|
141
138
|
|
|
142
139
|
Args:
|
|
143
140
|
df (pl.DataFrame): The Polars DataFrame to update.
|
|
144
141
|
new_entry (dict[str, Any]): The new entry to insert or update.
|
|
145
|
-
primary_keys (dict[str, Any]): The primary keys to identify the row (
|
|
142
|
+
primary_keys (list[str] | dict[str, Any] | None): The primary keys to identify the row (for updates).
|
|
146
143
|
Returns:
|
|
147
144
|
pl.DataFrame: The updated Polars DataFrame.
|
|
145
|
+
Examples:
|
|
146
|
+
>>> import polars as pl
|
|
147
|
+
>>> df = pl.DataFrame({"id": [1, 2], "value": ["a", "b"]})
|
|
148
|
+
>>> new_entry = {"id": 2, "value": "updated"}
|
|
149
|
+
>>> updated_df = upsert_in_dataframe(df, new_entry, primary_keys=["id"])
|
|
150
|
+
>>> print(updated_df)
|
|
151
|
+
shape: (2, 2)
|
|
152
|
+
┌─────┬─────────┐
|
|
153
|
+
│ id ┆ value │
|
|
154
|
+
│ --- ┆ --- │
|
|
155
|
+
│ i64 ┆ str │
|
|
156
|
+
╞═════╪═════════╡
|
|
157
|
+
│ 1 ┆ a │
|
|
158
|
+
│ 2 ┆ updated │
|
|
159
|
+
└─────┴─────────┘
|
|
160
|
+
|
|
161
|
+
>>> new_entry = {"id": 3, "value": "new"}
|
|
162
|
+
>>> updated_df = upsert_in_dataframe(updated_df, new_entry, primary_keys=["id"])
|
|
163
|
+
>>> print(updated_df)
|
|
164
|
+
shape: (3, 2)
|
|
165
|
+
┌─────┬─────────┐
|
|
166
|
+
│ id ┆ value │
|
|
167
|
+
│ --- ┆ --- │
|
|
168
|
+
│ i64 ┆ str │
|
|
169
|
+
╞═════╪═════════╡
|
|
170
|
+
│ 1 ┆ a │
|
|
171
|
+
│ 2 ┆ updated │
|
|
172
|
+
│ 3 ┆ new │
|
|
173
|
+
└─────┴─────────┘
|
|
148
174
|
"""
|
|
149
175
|
# Imports
|
|
150
176
|
import polars as pl
|
|
@@ -158,6 +184,10 @@ def upsert_in_dataframe(
|
|
|
158
184
|
new_row_df = pl.DataFrame([new_entry])
|
|
159
185
|
return pl.concat([df, new_row_df], how="diagonal_relaxed")
|
|
160
186
|
|
|
187
|
+
# If primary keys are provided as a list, convert to dict with values from new_entry
|
|
188
|
+
if isinstance(primary_keys, list):
|
|
189
|
+
primary_keys = {key: new_entry[key] for key in primary_keys if key in new_entry}
|
|
190
|
+
|
|
161
191
|
# Build mask based on primary keys
|
|
162
192
|
mask: pl.Expr = pl.lit(True)
|
|
163
193
|
for key, value in primary_keys.items():
|
stouputils/collections.pyi
CHANGED
|
@@ -2,9 +2,7 @@ import polars as pl
|
|
|
2
2
|
import zarr
|
|
3
3
|
from collections.abc import Callable as Callable, Iterable
|
|
4
4
|
from numpy.typing import NDArray as NDArray
|
|
5
|
-
from typing import Any, Literal
|
|
6
|
-
|
|
7
|
-
T = TypeVar('T')
|
|
5
|
+
from typing import Any, Literal
|
|
8
6
|
|
|
9
7
|
def unique_list[T](list_to_clean: Iterable[T], method: Literal['id', 'hash', 'str'] = 'str') -> list[T]:
|
|
10
8
|
''' Remove duplicates from the list while keeping the order using ids, hash, or str
|
|
@@ -31,7 +29,7 @@ def unique_list[T](list_to_clean: Iterable[T], method: Literal['id', 'hash', 'st
|
|
|
31
29
|
\t\t>>> unique_list([s1, s2, s1, s1, s3, s2, s3], method="str")
|
|
32
30
|
\t\t[{1, 2, 3}, {2, 3, 4}]
|
|
33
31
|
\t'''
|
|
34
|
-
def at_least_n(iterable: Iterable[T], predicate: Callable[[T], bool], n: int) -> bool:
|
|
32
|
+
def at_least_n[T](iterable: Iterable[T], predicate: Callable[[T], bool], n: int) -> bool:
|
|
35
33
|
""" Return True if at least n elements in iterable satisfy predicate.
|
|
36
34
|
\tIt's like the built-in any() but for at least n matches.
|
|
37
35
|
|
|
@@ -71,16 +69,45 @@ def sort_dict_keys[T](dictionary: dict[T, Any], order: list[T], reverse: bool =
|
|
|
71
69
|
\t\t>>> sort_dict_keys({\'b\': 2, \'a\': 1, \'c\': 3, \'d\': 4}, order=["c", "b"])
|
|
72
70
|
\t\t{\'c\': 3, \'b\': 2, \'a\': 1, \'d\': 4}
|
|
73
71
|
\t'''
|
|
74
|
-
def upsert_in_dataframe(df: pl.DataFrame, new_entry: dict[str, Any], primary_keys: dict[str, Any] | None = None) -> pl.DataFrame:
|
|
75
|
-
|
|
72
|
+
def upsert_in_dataframe(df: pl.DataFrame, new_entry: dict[str, Any], primary_keys: list[str] | dict[str, Any] | None = None) -> pl.DataFrame:
|
|
73
|
+
''' Insert or update a row in the Polars DataFrame based on primary keys.
|
|
76
74
|
|
|
77
75
|
\tArgs:
|
|
78
76
|
\t\tdf\t\t\t\t(pl.DataFrame):\t\tThe Polars DataFrame to update.
|
|
79
77
|
\t\tnew_entry\t\t(dict[str, Any]):\tThe new entry to insert or update.
|
|
80
|
-
\t\tprimary_keys\t(dict[str, Any]):\tThe primary keys to identify the row (
|
|
78
|
+
\t\tprimary_keys\t(list[str] | dict[str, Any] | None):\tThe primary keys to identify the row (for updates).
|
|
81
79
|
\tReturns:
|
|
82
80
|
\t\tpl.DataFrame: The updated Polars DataFrame.
|
|
83
|
-
\
|
|
81
|
+
\tExamples:
|
|
82
|
+
\t\t>>> import polars as pl
|
|
83
|
+
\t\t>>> df = pl.DataFrame({"id": [1, 2], "value": ["a", "b"]})
|
|
84
|
+
\t\t>>> new_entry = {"id": 2, "value": "updated"}
|
|
85
|
+
\t\t>>> updated_df = upsert_in_dataframe(df, new_entry, primary_keys=["id"])
|
|
86
|
+
\t\t>>> print(updated_df)
|
|
87
|
+
\t\tshape: (2, 2)
|
|
88
|
+
\t\t┌─────┬─────────┐
|
|
89
|
+
\t\t│ id ┆ value │
|
|
90
|
+
\t\t│ --- ┆ --- │
|
|
91
|
+
\t\t│ i64 ┆ str │
|
|
92
|
+
\t\t╞═════╪═════════╡
|
|
93
|
+
\t\t│ 1 ┆ a │
|
|
94
|
+
\t\t│ 2 ┆ updated │
|
|
95
|
+
\t\t└─────┴─────────┘
|
|
96
|
+
|
|
97
|
+
\t\t>>> new_entry = {"id": 3, "value": "new"}
|
|
98
|
+
\t\t>>> updated_df = upsert_in_dataframe(updated_df, new_entry, primary_keys=["id"])
|
|
99
|
+
\t\t>>> print(updated_df)
|
|
100
|
+
\t\tshape: (3, 2)
|
|
101
|
+
\t\t┌─────┬─────────┐
|
|
102
|
+
\t\t│ id ┆ value │
|
|
103
|
+
\t\t│ --- ┆ --- │
|
|
104
|
+
\t\t│ i64 ┆ str │
|
|
105
|
+
\t\t╞═════╪═════════╡
|
|
106
|
+
\t\t│ 1 ┆ a │
|
|
107
|
+
\t\t│ 2 ┆ updated │
|
|
108
|
+
\t\t│ 3 ┆ new │
|
|
109
|
+
\t\t└─────┴─────────┘
|
|
110
|
+
\t'''
|
|
84
111
|
def array_to_disk(data: NDArray[Any] | zarr.Array, delete_input: bool = True, more_data: NDArray[Any] | zarr.Array | None = None) -> tuple['zarr.Array', str, int]:
|
|
85
112
|
""" Easily handle large numpy arrays on disk using zarr for efficient storage and access.
|
|
86
113
|
|
|
@@ -29,7 +29,7 @@ def generate_stubs(
|
|
|
29
29
|
try:
|
|
30
30
|
from mypy.stubgen import main as stubgen_main
|
|
31
31
|
except ImportError as e:
|
|
32
|
-
raise ImportError("mypy is required for
|
|
32
|
+
raise ImportError("mypy is required for generate_stubs function. Please install it via 'pip install mypy'.") from e
|
|
33
33
|
try:
|
|
34
34
|
stubgen_main(["-p", package_name, *extra_args.split()])
|
|
35
35
|
return 0
|
stouputils/ctx.py
CHANGED
|
@@ -20,13 +20,11 @@ import sys
|
|
|
20
20
|
import time
|
|
21
21
|
from collections.abc import Callable
|
|
22
22
|
from contextlib import AbstractAsyncContextManager, AbstractContextManager
|
|
23
|
-
from typing import IO, Any, TextIO
|
|
23
|
+
from typing import IO, Any, TextIO
|
|
24
24
|
|
|
25
25
|
from .io import super_open
|
|
26
26
|
from .print import TeeMultiOutput, debug
|
|
27
27
|
|
|
28
|
-
# Type variable for context managers
|
|
29
|
-
T = TypeVar("T")
|
|
30
28
|
|
|
31
29
|
# Abstract base class for context managers supporting both sync and async usage
|
|
32
30
|
class AbstractBothContextManager[T](AbstractContextManager[T], AbstractAsyncContextManager[T]):
|
stouputils/ctx.pyi
CHANGED
|
@@ -3,9 +3,7 @@ from .io import super_open as super_open
|
|
|
3
3
|
from .print import TeeMultiOutput as TeeMultiOutput, debug as debug
|
|
4
4
|
from collections.abc import Callable as Callable
|
|
5
5
|
from contextlib import AbstractAsyncContextManager, AbstractContextManager
|
|
6
|
-
from typing import Any, IO, TextIO
|
|
7
|
-
|
|
8
|
-
T = TypeVar('T')
|
|
6
|
+
from typing import Any, IO, TextIO
|
|
9
7
|
|
|
10
8
|
class AbstractBothContextManager[T](AbstractContextManager[T], AbstractAsyncContextManager[T], metaclass=abc.ABCMeta):
|
|
11
9
|
""" Abstract base class for context managers that support both synchronous and asynchronous usage. """
|
stouputils/image.py
CHANGED
|
@@ -12,7 +12,7 @@ See stouputils.data_science.data_processing for lots more image processing utili
|
|
|
12
12
|
# Imports
|
|
13
13
|
import os
|
|
14
14
|
from collections.abc import Callable
|
|
15
|
-
from typing import TYPE_CHECKING, Any,
|
|
15
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
16
16
|
|
|
17
17
|
from .io import super_open
|
|
18
18
|
from .print import debug, info
|
|
@@ -22,15 +22,13 @@ if TYPE_CHECKING:
|
|
|
22
22
|
from numpy.typing import NDArray
|
|
23
23
|
from PIL import Image
|
|
24
24
|
|
|
25
|
-
PIL_Image_or_NDArray = TypeVar("PIL_Image_or_NDArray", bound="Image.Image | NDArray[np.number]")
|
|
26
|
-
|
|
27
25
|
# Functions
|
|
28
|
-
def image_resize[
|
|
29
|
-
image:
|
|
26
|
+
def image_resize[T: "Image.Image | NDArray[np.number]"](
|
|
27
|
+
image: T,
|
|
30
28
|
max_result_size: int,
|
|
31
29
|
resampling: "Image.Resampling | None" = None,
|
|
32
30
|
min_or_max: Callable[[int, int], int] = max,
|
|
33
|
-
return_type: type[
|
|
31
|
+
return_type: type[T] | str = "same",
|
|
34
32
|
keep_aspect_ratio: bool = True,
|
|
35
33
|
) -> Any:
|
|
36
34
|
""" Resize an image while preserving its aspect ratio by default.
|
|
@@ -121,11 +119,11 @@ def image_resize[PIL_Image_or_NDArray](
|
|
|
121
119
|
return new_image
|
|
122
120
|
|
|
123
121
|
|
|
124
|
-
def auto_crop[
|
|
125
|
-
image:
|
|
122
|
+
def auto_crop[T: "Image.Image | NDArray[np.number]"](
|
|
123
|
+
image: T,
|
|
126
124
|
mask: "NDArray[np.bool_] | None" = None,
|
|
127
125
|
threshold: int | float | Callable[["NDArray[np.number]"], int | float] | None = None,
|
|
128
|
-
return_type: type[
|
|
126
|
+
return_type: type[T] | str = "same",
|
|
129
127
|
contiguous: bool = True,
|
|
130
128
|
) -> Any:
|
|
131
129
|
""" Automatically crop an image to remove zero or uniform regions.
|
stouputils/image.pyi
CHANGED
|
@@ -4,11 +4,9 @@ from .print import debug as debug, info as info
|
|
|
4
4
|
from PIL import Image
|
|
5
5
|
from collections.abc import Callable
|
|
6
6
|
from numpy.typing import NDArray as NDArray
|
|
7
|
-
from typing import Any
|
|
7
|
+
from typing import Any
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def image_resize[PIL_Image_or_NDArray](image: PIL_Image_or_NDArray, max_result_size: int, resampling: Image.Resampling | None = None, min_or_max: Callable[[int, int], int] = ..., return_type: type[PIL_Image_or_NDArray] | str = 'same', keep_aspect_ratio: bool = True) -> Any:
|
|
9
|
+
def image_resize[T: Image.Image | NDArray[np.number]](image: T, max_result_size: int, resampling: Image.Resampling | None = None, min_or_max: Callable[[int, int], int] = ..., return_type: type[T] | str = 'same', keep_aspect_ratio: bool = True) -> Any:
|
|
12
10
|
''' Resize an image while preserving its aspect ratio by default.
|
|
13
11
|
\tScales the image so that its largest dimension equals max_result_size.
|
|
14
12
|
|
|
@@ -47,7 +45,7 @@ def image_resize[PIL_Image_or_NDArray](image: PIL_Image_or_NDArray, max_result_s
|
|
|
47
45
|
\t\t>>> image_resize(pil_image, 50, resampling=Image.Resampling.NEAREST).size
|
|
48
46
|
\t\t(50, 25)
|
|
49
47
|
\t'''
|
|
50
|
-
def auto_crop[
|
|
48
|
+
def auto_crop[T: Image.Image | NDArray[np.number]](image: T, mask: NDArray[np.bool_] | None = None, threshold: int | float | Callable[[NDArray[np.number]], int | float] | None = None, return_type: type[T] | str = 'same', contiguous: bool = True) -> Any:
|
|
51
49
|
''' Automatically crop an image to remove zero or uniform regions.
|
|
52
50
|
|
|
53
51
|
\tThis function crops the image to keep only the region where pixels are non-zero
|
stouputils/io.py
CHANGED
|
@@ -11,6 +11,7 @@ This module provides utilities for file management.
|
|
|
11
11
|
- super_open: Open a file with the given mode, creating the directory if it doesn't exist (only if writing)
|
|
12
12
|
- replace_tilde: Replace the "~" by the user's home directory
|
|
13
13
|
- clean_path: Clean the path by replacing backslashes with forward slashes and simplifying the path
|
|
14
|
+
- safe_close: Safely close a file descriptor or file object after flushing, ignoring any exceptions
|
|
14
15
|
|
|
15
16
|
.. image:: https://raw.githubusercontent.com/Stoupy51/stouputils/refs/heads/main/assets/io_module.gif
|
|
16
17
|
:alt: stouputils io examples
|
|
@@ -491,3 +492,23 @@ def clean_path(file_path: str, trailing_slash: bool = True) -> str:
|
|
|
491
492
|
# Return the cleaned path
|
|
492
493
|
return file_path if file_path != "." else ""
|
|
493
494
|
|
|
495
|
+
def safe_close(file: IO[Any] | int | None) -> None:
|
|
496
|
+
""" Safely close a file object (or file descriptor) after flushing, ignoring any exceptions.
|
|
497
|
+
|
|
498
|
+
Args:
|
|
499
|
+
file (IO[Any] | int | None): The file object or file descriptor to close
|
|
500
|
+
"""
|
|
501
|
+
if isinstance(file, int):
|
|
502
|
+
if file != -1:
|
|
503
|
+
for func in (os.fsync, os.close):
|
|
504
|
+
try:
|
|
505
|
+
func(file)
|
|
506
|
+
except Exception:
|
|
507
|
+
pass
|
|
508
|
+
elif file:
|
|
509
|
+
for func in (file.flush, file.close):
|
|
510
|
+
try:
|
|
511
|
+
func()
|
|
512
|
+
except Exception:
|
|
513
|
+
pass
|
|
514
|
+
|
stouputils/io.pyi
CHANGED
|
@@ -211,3 +211,9 @@ def clean_path(file_path: str, trailing_slash: bool = True) -> str:
|
|
|
211
211
|
\t\t>>> clean_path("C:/folder1\\\\folder2")
|
|
212
212
|
\t\t\'C:/folder1/folder2\'
|
|
213
213
|
\t'''
|
|
214
|
+
def safe_close(file: IO[Any] | int | None) -> None:
|
|
215
|
+
""" Safely close a file object (or file descriptor) after flushing, ignoring any exceptions.
|
|
216
|
+
|
|
217
|
+
\tArgs:
|
|
218
|
+
\t\tfile (IO[Any] | int | None): The file object or file descriptor to close
|
|
219
|
+
\t"""
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module provides utility functions for parallel processing, such as:
|
|
3
|
+
|
|
4
|
+
- multiprocessing(): Execute a function in parallel using multiprocessing
|
|
5
|
+
- multithreading(): Execute a function in parallel using multithreading
|
|
6
|
+
- run_in_subprocess(): Execute a function in a subprocess with args and kwargs
|
|
7
|
+
|
|
8
|
+
I highly encourage you to read the function docstrings to understand when to use each method.
|
|
9
|
+
|
|
10
|
+
Priority (nice) mapping for multiprocessing():
|
|
11
|
+
|
|
12
|
+
- Unix-style values from -20 (highest priority) to 19 (lowest priority)
|
|
13
|
+
- Windows automatic mapping:
|
|
14
|
+
* -20 to -10: HIGH_PRIORITY_CLASS
|
|
15
|
+
* -9 to -1: ABOVE_NORMAL_PRIORITY_CLASS
|
|
16
|
+
* 0: NORMAL_PRIORITY_CLASS
|
|
17
|
+
* 1 to 9: BELOW_NORMAL_PRIORITY_CLASS
|
|
18
|
+
* 10 to 19: IDLE_PRIORITY_CLASS
|
|
19
|
+
|
|
20
|
+
.. image:: https://raw.githubusercontent.com/Stoupy51/stouputils/refs/heads/main/assets/parallel_module.gif
|
|
21
|
+
:alt: stouputils parallel examples
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
# Imports
|
|
25
|
+
from .capturer import *
|
|
26
|
+
from .common import *
|
|
27
|
+
from .multi import *
|
|
28
|
+
from .subprocess import *
|
|
29
|
+
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
|
|
2
|
+
# Imports
|
|
3
|
+
import os
|
|
4
|
+
from typing import IO, Any
|
|
5
|
+
|
|
6
|
+
from ..io import safe_close
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class PipeWriter:
|
|
10
|
+
""" A writer that sends data to a multiprocessing Connection. """
|
|
11
|
+
def __init__(self, conn: Any, encoding: str, errors: str):
|
|
12
|
+
self.conn: Any = conn
|
|
13
|
+
self.encoding: str = encoding
|
|
14
|
+
self.errors: str = errors
|
|
15
|
+
|
|
16
|
+
def write(self, data: str) -> int:
|
|
17
|
+
self.conn.send_bytes(data.encode(self.encoding, errors=self.errors))
|
|
18
|
+
return len(data)
|
|
19
|
+
|
|
20
|
+
def flush(self) -> None:
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class CaptureOutput:
|
|
25
|
+
""" Utility to capture stdout/stderr from a subprocess and relay it to the parent's stdout.
|
|
26
|
+
|
|
27
|
+
The class creates an os.pipe(), marks fds as inheritable (for spawn method),
|
|
28
|
+
provides methods to start a listener thread that reads from the pipe and writes
|
|
29
|
+
to the main process's sys.stdout/sys.stderr, and to close/join the listener.
|
|
30
|
+
"""
|
|
31
|
+
def __init__(self, encoding: str = "utf-8", errors: str = "replace", chunk_size: int = 1024):
|
|
32
|
+
import multiprocessing as mp
|
|
33
|
+
import threading
|
|
34
|
+
self.encoding: str = encoding
|
|
35
|
+
self.errors: str = errors
|
|
36
|
+
self.chunk_size: int = chunk_size
|
|
37
|
+
self.read_conn, self.write_conn = mp.Pipe(duplex=False)
|
|
38
|
+
self.read_fd = self.read_conn.fileno()
|
|
39
|
+
self.write_fd = self.write_conn.fileno()
|
|
40
|
+
# Internal state for the listener thread and reader handle
|
|
41
|
+
self._thread: threading.Thread | None = None
|
|
42
|
+
self._reader_file: IO[Any] | None = None
|
|
43
|
+
# Sentinel string that will terminate the listener when seen in the stream
|
|
44
|
+
try:
|
|
45
|
+
os.set_inheritable(self.read_fd, True)
|
|
46
|
+
os.set_inheritable(self.write_fd, True)
|
|
47
|
+
except Exception:
|
|
48
|
+
pass
|
|
49
|
+
|
|
50
|
+
def __repr__(self) -> str:
|
|
51
|
+
return f"<CaptureOutput read_fd={self.read_fd} write_fd={self.write_fd}>"
|
|
52
|
+
|
|
53
|
+
# Pickle support: exclude unpicklable attributes
|
|
54
|
+
def __getstate__(self) -> dict[str, Any]:
|
|
55
|
+
state = self.__dict__.copy()
|
|
56
|
+
state["_thread"] = None
|
|
57
|
+
return state
|
|
58
|
+
|
|
59
|
+
def redirect(self) -> None:
|
|
60
|
+
""" Redirect sys.stdout and sys.stderr to the pipe's write end. """
|
|
61
|
+
import sys
|
|
62
|
+
writer = PipeWriter(self.write_conn, self.encoding, self.errors)
|
|
63
|
+
sys.stdout = writer
|
|
64
|
+
sys.stderr = writer
|
|
65
|
+
|
|
66
|
+
def parent_close_write(self) -> None:
|
|
67
|
+
""" Close the parent's copy of the write end; the child's copy remains. """
|
|
68
|
+
safe_close(self.write_fd)
|
|
69
|
+
self.write_conn.close()
|
|
70
|
+
self.write_fd = -1 # Prevent accidental reuse
|
|
71
|
+
|
|
72
|
+
def start_listener(self) -> None:
|
|
73
|
+
""" Start a daemon thread that forwards data from the pipe to sys.stdout/sys.stderr. """
|
|
74
|
+
import sys
|
|
75
|
+
if self._thread is not None:
|
|
76
|
+
return
|
|
77
|
+
|
|
78
|
+
# Handler function for reading from the pipe
|
|
79
|
+
buffer: str = ""
|
|
80
|
+
def _handle_buffer() -> None:
|
|
81
|
+
nonlocal buffer
|
|
82
|
+
if buffer:
|
|
83
|
+
try:
|
|
84
|
+
sys.stdout.write(buffer)
|
|
85
|
+
sys.stdout.flush()
|
|
86
|
+
except Exception:
|
|
87
|
+
pass
|
|
88
|
+
buffer = ""
|
|
89
|
+
|
|
90
|
+
# Thread target function
|
|
91
|
+
def _reader() -> None:
|
|
92
|
+
nonlocal buffer
|
|
93
|
+
try:
|
|
94
|
+
while True:
|
|
95
|
+
# Read a chunk from the pipe, stop loop on error
|
|
96
|
+
try:
|
|
97
|
+
data: bytes = self.read_conn.recv_bytes(self.chunk_size)
|
|
98
|
+
except EOFError:
|
|
99
|
+
_handle_buffer()
|
|
100
|
+
break
|
|
101
|
+
|
|
102
|
+
# Decode bytes to text & append to buffer
|
|
103
|
+
try:
|
|
104
|
+
chunk: str = data.decode(self.encoding, errors=self.errors)
|
|
105
|
+
except Exception:
|
|
106
|
+
chunk = data.decode(self.encoding, errors="replace")
|
|
107
|
+
buffer += chunk
|
|
108
|
+
|
|
109
|
+
# Periodically flush large buffers to avoid holding too much memory
|
|
110
|
+
if len(buffer) > self.chunk_size * 4:
|
|
111
|
+
_handle_buffer()
|
|
112
|
+
finally:
|
|
113
|
+
safe_close(self.read_fd)
|
|
114
|
+
self.read_conn.close()
|
|
115
|
+
self.read_fd = -1
|
|
116
|
+
self._thread = None # Mark thread as stopped so callers don't block unnecessarily
|
|
117
|
+
|
|
118
|
+
# Start the listener thread
|
|
119
|
+
import threading
|
|
120
|
+
self._thread = threading.Thread(target=_reader, daemon=True)
|
|
121
|
+
self._thread.start()
|
|
122
|
+
|
|
123
|
+
def join_listener(self, timeout: float | None = None) -> None:
|
|
124
|
+
""" Wait for the listener thread to finish (until EOF). """
|
|
125
|
+
if self._thread is None:
|
|
126
|
+
safe_close(self.read_fd)
|
|
127
|
+
return self.read_conn.close()
|
|
128
|
+
self._thread.join(timeout)
|
|
129
|
+
|
|
130
|
+
# If thread finished, ensure read fd is closed and clear thread
|
|
131
|
+
if self._thread and not self._thread.is_alive():
|
|
132
|
+
self._thread = None
|
|
133
|
+
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from ..io import safe_close as safe_close
|
|
2
|
+
from _typeshed import Incomplete
|
|
3
|
+
from typing import Any, IO
|
|
4
|
+
|
|
5
|
+
class PipeWriter:
|
|
6
|
+
""" A writer that sends data to a multiprocessing Connection. """
|
|
7
|
+
conn: Any
|
|
8
|
+
encoding: str
|
|
9
|
+
errors: str
|
|
10
|
+
def __init__(self, conn: Any, encoding: str, errors: str) -> None: ...
|
|
11
|
+
def write(self, data: str) -> int: ...
|
|
12
|
+
def flush(self) -> None: ...
|
|
13
|
+
|
|
14
|
+
class CaptureOutput:
|
|
15
|
+
""" Utility to capture stdout/stderr from a subprocess and relay it to the parent's stdout.
|
|
16
|
+
|
|
17
|
+
\tThe class creates an os.pipe(), marks fds as inheritable (for spawn method),
|
|
18
|
+
\tprovides methods to start a listener thread that reads from the pipe and writes
|
|
19
|
+
\tto the main process's sys.stdout/sys.stderr, and to close/join the listener.
|
|
20
|
+
\t"""
|
|
21
|
+
encoding: str
|
|
22
|
+
errors: str
|
|
23
|
+
chunk_size: int
|
|
24
|
+
read_fd: Incomplete
|
|
25
|
+
write_fd: Incomplete
|
|
26
|
+
_thread: threading.Thread | None
|
|
27
|
+
_reader_file: IO[Any] | None
|
|
28
|
+
def __init__(self, encoding: str = 'utf-8', errors: str = 'replace', chunk_size: int = 1024) -> None: ...
|
|
29
|
+
def __repr__(self) -> str: ...
|
|
30
|
+
def __getstate__(self) -> dict[str, Any]: ...
|
|
31
|
+
def redirect(self) -> None:
|
|
32
|
+
""" Redirect sys.stdout and sys.stderr to the pipe's write end. """
|
|
33
|
+
def parent_close_write(self) -> None:
|
|
34
|
+
""" Close the parent's copy of the write end; the child's copy remains. """
|
|
35
|
+
def start_listener(self) -> None:
|
|
36
|
+
""" Start a daemon thread that forwards data from the pipe to sys.stdout/sys.stderr. """
|
|
37
|
+
def join_listener(self, timeout: float | None = None) -> None:
|
|
38
|
+
""" Wait for the listener thread to finish (until EOF). """
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
|
|
2
|
+
# Imports
|
|
3
|
+
import os
|
|
4
|
+
import time
|
|
5
|
+
from collections.abc import Callable
|
|
6
|
+
from typing import cast
|
|
7
|
+
|
|
8
|
+
# Constants
|
|
9
|
+
CPU_COUNT: int = cast(int, os.cpu_count())
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# "Private" function to wrap function execution with nice priority (must be at module level for pickling)
|
|
13
|
+
def nice_wrapper[T, R](args: tuple[int, Callable[[T], R], T]) -> R:
|
|
14
|
+
""" Wrapper that applies nice priority then executes the function.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
args (tuple): Tuple containing (nice_value, func, arg)
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
R: Result of the function execution
|
|
21
|
+
"""
|
|
22
|
+
nice_value, func, arg = args
|
|
23
|
+
set_process_priority(nice_value)
|
|
24
|
+
return func(arg)
|
|
25
|
+
|
|
26
|
+
# "Private" function to set process priority (must be at module level for pickling on Windows)
|
|
27
|
+
def set_process_priority(nice_value: int) -> None:
|
|
28
|
+
""" Set the priority of the current process.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
nice_value (int): Unix-style priority value (-20 to 19)
|
|
32
|
+
"""
|
|
33
|
+
try:
|
|
34
|
+
import sys
|
|
35
|
+
if sys.platform == "win32":
|
|
36
|
+
# Map Unix nice values to Windows priority classes
|
|
37
|
+
# -20 to -10: HIGH, -9 to -1: ABOVE_NORMAL, 0: NORMAL, 1-9: BELOW_NORMAL, 10-19: IDLE
|
|
38
|
+
import ctypes
|
|
39
|
+
# Windows priority class constants
|
|
40
|
+
if nice_value <= -10:
|
|
41
|
+
priority = 0x00000080 # HIGH_PRIORITY_CLASS
|
|
42
|
+
elif nice_value < 0:
|
|
43
|
+
priority = 0x00008000 # ABOVE_NORMAL_PRIORITY_CLASS
|
|
44
|
+
elif nice_value == 0:
|
|
45
|
+
priority = 0x00000020 # NORMAL_PRIORITY_CLASS
|
|
46
|
+
elif nice_value < 10:
|
|
47
|
+
priority = 0x00004000 # BELOW_NORMAL_PRIORITY_CLASS
|
|
48
|
+
else:
|
|
49
|
+
priority = 0x00000040 # IDLE_PRIORITY_CLASS
|
|
50
|
+
kernel32 = ctypes.windll.kernel32
|
|
51
|
+
handle = kernel32.GetCurrentProcess()
|
|
52
|
+
kernel32.SetPriorityClass(handle, priority)
|
|
53
|
+
else:
|
|
54
|
+
# Unix-like systems
|
|
55
|
+
os.nice(nice_value)
|
|
56
|
+
except Exception:
|
|
57
|
+
pass # Silently ignore if we can't set priority
|
|
58
|
+
|
|
59
|
+
# "Private" function to use starmap using args[0](*args[1])
|
|
60
|
+
def starmap[T, R](args: tuple[Callable[[T], R], list[T]]) -> R:
|
|
61
|
+
r""" Private function to use starmap using args[0](\*args[1])
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
args (tuple): Tuple containing the function and the arguments list to pass to the function
|
|
65
|
+
Returns:
|
|
66
|
+
object: Result of the function execution
|
|
67
|
+
"""
|
|
68
|
+
func, arguments = args
|
|
69
|
+
return func(*arguments)
|
|
70
|
+
|
|
71
|
+
# "Private" function to apply delay before calling the target function
|
|
72
|
+
def delayed_call[T, R](args: tuple[Callable[[T], R], float, T]) -> R:
|
|
73
|
+
""" Private function to apply delay before calling the target function
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
args (tuple): Tuple containing the function, delay in seconds, and the argument to pass to the function
|
|
77
|
+
Returns:
|
|
78
|
+
object: Result of the function execution
|
|
79
|
+
"""
|
|
80
|
+
func, delay, arg = args
|
|
81
|
+
time.sleep(delay)
|
|
82
|
+
return func(arg)
|
|
83
|
+
|
|
84
|
+
# "Private" function to handle parameters for multiprocessing or multithreading functions
|
|
85
|
+
def handle_parameters[T, R](
|
|
86
|
+
func: Callable[[T], R] | list[Callable[[T], R]],
|
|
87
|
+
args: list[T],
|
|
88
|
+
use_starmap: bool,
|
|
89
|
+
delay_first_calls: float,
|
|
90
|
+
max_workers: int,
|
|
91
|
+
desc: str,
|
|
92
|
+
color: str
|
|
93
|
+
) -> tuple[str, Callable[[T], R], list[T]]:
|
|
94
|
+
r""" Private function to handle the parameters for multiprocessing or multithreading functions
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
func (Callable | list[Callable]): Function to execute, or list of functions (one per argument)
|
|
98
|
+
args (list): List of arguments to pass to the function(s)
|
|
99
|
+
use_starmap (bool): Whether to use starmap or not (Defaults to False):
|
|
100
|
+
True means the function will be called like func(\*args[i]) instead of func(args[i])
|
|
101
|
+
delay_first_calls (int): Apply i*delay_first_calls seconds delay to the first "max_workers" calls.
|
|
102
|
+
For instance, the first process will be delayed by 0 seconds, the second by 1 second, etc. (Defaults to 0):
|
|
103
|
+
This can be useful to avoid functions being called in the same second.
|
|
104
|
+
max_workers (int): Number of workers to use
|
|
105
|
+
desc (str): Description of the function execution displayed in the progress bar
|
|
106
|
+
color (str): Color of the progress bar
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
tuple[str, Callable[[T], R], list[T]]: Tuple containing the description, function, and arguments
|
|
110
|
+
"""
|
|
111
|
+
desc = color + desc
|
|
112
|
+
|
|
113
|
+
# Handle list of functions: validate and convert to starmap format
|
|
114
|
+
if isinstance(func, list):
|
|
115
|
+
func = cast(list[Callable[[T], R]], func)
|
|
116
|
+
assert len(func) == len(args), f"Length mismatch: {len(func)} functions but {len(args)} arguments"
|
|
117
|
+
args = [(f, arg if use_starmap else (arg,)) for f, arg in zip(func, args, strict=False)] # type: ignore
|
|
118
|
+
func = starmap # type: ignore
|
|
119
|
+
|
|
120
|
+
# If use_starmap is True, we use the _starmap function
|
|
121
|
+
elif use_starmap:
|
|
122
|
+
args = [(func, arg) for arg in args] # type: ignore
|
|
123
|
+
func = starmap # type: ignore
|
|
124
|
+
|
|
125
|
+
# Prepare delayed function calls if delay_first_calls is set
|
|
126
|
+
if delay_first_calls > 0:
|
|
127
|
+
args = [
|
|
128
|
+
(func, i * delay_first_calls if i < max_workers else 0, arg) # type: ignore
|
|
129
|
+
for i, arg in enumerate(args)
|
|
130
|
+
]
|
|
131
|
+
func = delayed_call # type: ignore
|
|
132
|
+
|
|
133
|
+
return desc, func, args # type: ignore
|
|
134
|
+
|