stouputils 1.12.2__py3-none-any.whl → 1.13.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stouputils/__main__.py +11 -6
- stouputils/continuous_delivery/pypi.py +39 -1
- stouputils/continuous_delivery/pypi.pyi +9 -0
- stouputils/ctx.py +408 -408
- stouputils/data_science/config/set.py +125 -125
- stouputils/data_science/models/keras_utils/callbacks/model_checkpoint_v2.py +31 -31
- stouputils/data_science/utils.py +285 -285
- stouputils/installer/__init__.py +18 -18
- stouputils/installer/linux.py +144 -144
- stouputils/installer/main.py +223 -223
- stouputils/installer/windows.py +136 -136
- stouputils/py.typed +1 -1
- stouputils/stouputils/__init__.pyi +15 -0
- stouputils/stouputils/_deprecated.pyi +12 -0
- stouputils/stouputils/all_doctests.pyi +46 -0
- stouputils/stouputils/applications/__init__.pyi +2 -0
- stouputils/stouputils/applications/automatic_docs.pyi +106 -0
- stouputils/stouputils/applications/upscaler/__init__.pyi +3 -0
- stouputils/stouputils/applications/upscaler/config.pyi +18 -0
- stouputils/stouputils/applications/upscaler/image.pyi +109 -0
- stouputils/stouputils/applications/upscaler/video.pyi +60 -0
- stouputils/stouputils/archive.pyi +67 -0
- stouputils/stouputils/backup.pyi +109 -0
- stouputils/stouputils/collections.pyi +86 -0
- stouputils/stouputils/continuous_delivery/__init__.pyi +5 -0
- stouputils/stouputils/continuous_delivery/cd_utils.pyi +129 -0
- stouputils/stouputils/continuous_delivery/github.pyi +162 -0
- stouputils/stouputils/continuous_delivery/pypi.pyi +53 -0
- stouputils/stouputils/continuous_delivery/pyproject.pyi +67 -0
- stouputils/stouputils/continuous_delivery/stubs.pyi +39 -0
- stouputils/stouputils/ctx.pyi +211 -0
- stouputils/stouputils/decorators.pyi +242 -0
- stouputils/stouputils/image.pyi +172 -0
- stouputils/stouputils/installer/__init__.pyi +5 -0
- stouputils/stouputils/installer/common.pyi +39 -0
- stouputils/stouputils/installer/downloader.pyi +24 -0
- stouputils/stouputils/installer/linux.pyi +39 -0
- stouputils/stouputils/installer/main.pyi +57 -0
- stouputils/stouputils/installer/windows.pyi +31 -0
- stouputils/stouputils/io.pyi +213 -0
- stouputils/stouputils/parallel.pyi +211 -0
- stouputils/stouputils/print.pyi +136 -0
- stouputils/stouputils/version_pkg.pyi +15 -0
- {stouputils-1.12.2.dist-info → stouputils-1.13.1.dist-info}/METADATA +2 -2
- {stouputils-1.12.2.dist-info → stouputils-1.13.1.dist-info}/RECORD +47 -16
- {stouputils-1.12.2.dist-info → stouputils-1.13.1.dist-info}/WHEEL +0 -0
- {stouputils-1.12.2.dist-info → stouputils-1.13.1.dist-info}/entry_points.txt +0 -0
stouputils/data_science/utils.py
CHANGED
|
@@ -1,285 +1,285 @@
|
|
|
1
|
-
"""
|
|
2
|
-
This module contains the Utils class, which provides static methods for common operations.
|
|
3
|
-
|
|
4
|
-
This class contains static methods for:
|
|
5
|
-
|
|
6
|
-
- Safe division (with 0 as denominator or None)
|
|
7
|
-
- Safe multiplication (with None)
|
|
8
|
-
- Converting between one-hot encoding and class indices
|
|
9
|
-
- Calculating ROC curves and AUC scores
|
|
10
|
-
"""
|
|
11
|
-
# pyright: reportUnknownMemberType=false
|
|
12
|
-
# pyright: reportUnknownVariableType=false
|
|
13
|
-
|
|
14
|
-
# Imports
|
|
15
|
-
from typing import Any
|
|
16
|
-
|
|
17
|
-
import numpy as np
|
|
18
|
-
from numpy.typing import NDArray
|
|
19
|
-
|
|
20
|
-
from ..ctx import Muffle
|
|
21
|
-
from ..decorators import handle_error
|
|
22
|
-
from .config.get import DataScienceConfig
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
# Class
|
|
26
|
-
class Utils:
|
|
27
|
-
""" Utility class providing common operations. """
|
|
28
|
-
|
|
29
|
-
@staticmethod
|
|
30
|
-
def safe_divide_float(a: float, b: float) -> float:
|
|
31
|
-
""" Safe division of two numbers, return 0 if denominator is 0.
|
|
32
|
-
|
|
33
|
-
Args:
|
|
34
|
-
a (float): First number
|
|
35
|
-
b (float): Second number
|
|
36
|
-
Returns:
|
|
37
|
-
float: Result of the division
|
|
38
|
-
|
|
39
|
-
Examples:
|
|
40
|
-
>>> Utils.safe_divide_float(10, 2)
|
|
41
|
-
5.0
|
|
42
|
-
>>> Utils.safe_divide_float(0, 5)
|
|
43
|
-
0.0
|
|
44
|
-
>>> Utils.safe_divide_float(10, 0)
|
|
45
|
-
0
|
|
46
|
-
>>> Utils.safe_divide_float(-10, 2)
|
|
47
|
-
-5.0
|
|
48
|
-
"""
|
|
49
|
-
return a / b if b > 0 else 0
|
|
50
|
-
|
|
51
|
-
@staticmethod
|
|
52
|
-
def safe_divide_none(a: float | None, b: float | None) -> float | None:
|
|
53
|
-
""" Safe division of two numbers, return None if either number is None or denominator is 0.
|
|
54
|
-
|
|
55
|
-
Args:
|
|
56
|
-
a (float | None): First number
|
|
57
|
-
b (float | None): Second number
|
|
58
|
-
Returns:
|
|
59
|
-
float | None: Result of the division or None if denominator is None
|
|
60
|
-
|
|
61
|
-
Examples:
|
|
62
|
-
>>> None == Utils.safe_divide_none(None, 2)
|
|
63
|
-
True
|
|
64
|
-
>>> None == Utils.safe_divide_none(10, None)
|
|
65
|
-
True
|
|
66
|
-
>>> None == Utils.safe_divide_none(10, 0)
|
|
67
|
-
True
|
|
68
|
-
>>> Utils.safe_divide_none(10, 2)
|
|
69
|
-
5.0
|
|
70
|
-
"""
|
|
71
|
-
return a / b if a is not None and b is not None and b > 0 else None
|
|
72
|
-
|
|
73
|
-
@staticmethod
|
|
74
|
-
def safe_multiply_none(a: float | None, b: float | None) -> float | None:
|
|
75
|
-
""" Safe multiplication of two numbers, return None if either number is None.
|
|
76
|
-
|
|
77
|
-
Args:
|
|
78
|
-
a (float | None): First number
|
|
79
|
-
b (float | None): Second number
|
|
80
|
-
Returns:
|
|
81
|
-
float | None: Result of the multiplication or None if either number is None
|
|
82
|
-
|
|
83
|
-
Examples:
|
|
84
|
-
>>> None == Utils.safe_multiply_none(None, 2)
|
|
85
|
-
True
|
|
86
|
-
>>> None == Utils.safe_multiply_none(10, None)
|
|
87
|
-
True
|
|
88
|
-
>>> Utils.safe_multiply_none(10, 2)
|
|
89
|
-
20
|
|
90
|
-
>>> Utils.safe_multiply_none(-10, 2)
|
|
91
|
-
-20
|
|
92
|
-
"""
|
|
93
|
-
return a * b if a is not None and b is not None else None
|
|
94
|
-
|
|
95
|
-
@staticmethod
|
|
96
|
-
@handle_error(error_log=DataScienceConfig.ERROR_LOG)
|
|
97
|
-
def convert_to_class_indices(y: NDArray[np.intc | np.single] | list[NDArray[np.intc | np.single]]) -> NDArray[Any]:
|
|
98
|
-
""" Convert array from one-hot encoded format to class indices.
|
|
99
|
-
If the input is already class indices, it returns the same array.
|
|
100
|
-
|
|
101
|
-
Args:
|
|
102
|
-
y (NDArray[intc | single] | list[NDArray[intc | single]]): Input array (either one-hot encoded or class indices)
|
|
103
|
-
Returns:
|
|
104
|
-
NDArray[Any]: Array of class indices: [[0, 0, 1, 0], [1, 0, 0, 0]] -> [2, 0]
|
|
105
|
-
|
|
106
|
-
Examples:
|
|
107
|
-
>>> Utils.convert_to_class_indices(np.array([[0, 0, 1, 0], [1, 0, 0, 0]])).tolist()
|
|
108
|
-
[2, 0]
|
|
109
|
-
>>> Utils.convert_to_class_indices(np.array([2, 0, 1])).tolist()
|
|
110
|
-
[2, 0, 1]
|
|
111
|
-
>>> Utils.convert_to_class_indices(np.array([[1], [0]])).tolist()
|
|
112
|
-
[[1], [0]]
|
|
113
|
-
>>> Utils.convert_to_class_indices(np.array([])).tolist()
|
|
114
|
-
[]
|
|
115
|
-
"""
|
|
116
|
-
y = np.array(y)
|
|
117
|
-
if y.ndim > 1 and y.shape[1] > 1:
|
|
118
|
-
return np.argmax(y, axis=1)
|
|
119
|
-
return y
|
|
120
|
-
|
|
121
|
-
@staticmethod
|
|
122
|
-
@handle_error(error_log=DataScienceConfig.ERROR_LOG)
|
|
123
|
-
def convert_to_one_hot(
|
|
124
|
-
y: NDArray[np.intc | np.single] | list[NDArray[np.intc | np.single]], num_classes: int
|
|
125
|
-
) -> NDArray[Any]:
|
|
126
|
-
""" Convert array from class indices to one-hot encoded format.
|
|
127
|
-
If the input is already one-hot encoded, it returns the same array.
|
|
128
|
-
|
|
129
|
-
Args:
|
|
130
|
-
y (NDArray[intc|single] | list[NDArray[intc|single]]): Input array (either class indices or one-hot encoded)
|
|
131
|
-
num_classes (int): Total number of classes
|
|
132
|
-
Returns:
|
|
133
|
-
NDArray[Any]: One-hot encoded array: [2, 0] -> [[0, 0, 1, 0], [1, 0, 0, 0]]
|
|
134
|
-
|
|
135
|
-
Examples:
|
|
136
|
-
>>> Utils.convert_to_one_hot(np.array([2, 0]), 4).tolist()
|
|
137
|
-
[[0.0, 0.0, 1.0, 0.0], [1.0, 0.0, 0.0, 0.0]]
|
|
138
|
-
>>> Utils.convert_to_one_hot(np.array([[0, 0, 1, 0], [1, 0, 0, 0]]), 4).tolist()
|
|
139
|
-
[[0, 0, 1, 0], [1, 0, 0, 0]]
|
|
140
|
-
>>> Utils.convert_to_one_hot(np.array([0, 1, 2]), 3).shape
|
|
141
|
-
(3, 3)
|
|
142
|
-
>>> Utils.convert_to_one_hot(np.array([]), 3)
|
|
143
|
-
array([], shape=(0, 3), dtype=float32)
|
|
144
|
-
|
|
145
|
-
>>> array = np.array([[0.1, 0.9], [0.2, 0.8]])
|
|
146
|
-
>>> array = Utils.convert_to_class_indices(array)
|
|
147
|
-
>>> array = Utils.convert_to_one_hot(array, 2)
|
|
148
|
-
>>> array.tolist()
|
|
149
|
-
[[0.0, 1.0], [0.0, 1.0]]
|
|
150
|
-
"""
|
|
151
|
-
y = np.array(y)
|
|
152
|
-
if y.ndim == 1 or y.shape[1] != num_classes:
|
|
153
|
-
|
|
154
|
-
# Get the number of samples and create a one-hot encoded array
|
|
155
|
-
n_samples: int = len(y)
|
|
156
|
-
one_hot: NDArray[np.float32] = np.zeros((n_samples, num_classes), dtype=np.float32)
|
|
157
|
-
if n_samples > 0:
|
|
158
|
-
# Create a one-hot encoding by setting specific positions to 1.0:
|
|
159
|
-
# - np.arange(n_samples) creates an array [0, 1, 2, ..., n_samples-1] for row indices
|
|
160
|
-
# - y.astype(int) contains the class indices that determine which column gets the 1.0
|
|
161
|
-
# - Together they form coordinate pairs (row_idx, class_idx) where we set values to 1.0
|
|
162
|
-
row_indices: NDArray[np.intc] = np.arange(n_samples)
|
|
163
|
-
one_hot[row_indices, y.astype(int)] = 1.0
|
|
164
|
-
return one_hot
|
|
165
|
-
return y
|
|
166
|
-
|
|
167
|
-
@staticmethod
|
|
168
|
-
@handle_error(error_log=DataScienceConfig.ERROR_LOG)
|
|
169
|
-
def get_roc_curve_and_auc(
|
|
170
|
-
y_true: NDArray[np.intc | np.single],
|
|
171
|
-
y_pred: NDArray[np.single]
|
|
172
|
-
) -> tuple[float, NDArray[np.single], NDArray[np.single], NDArray[np.single]]:
|
|
173
|
-
""" Calculate ROC curve and AUC score.
|
|
174
|
-
|
|
175
|
-
Args:
|
|
176
|
-
y_true (NDArray[intc | single]): True class labels (either one-hot encoded or class indices)
|
|
177
|
-
y_pred (NDArray[single]): Predicted probabilities (must be probability scores, not class indices)
|
|
178
|
-
Returns:
|
|
179
|
-
tuple[float, NDArray[np.single], NDArray[np.single], NDArray[np.single]]:
|
|
180
|
-
Tuple containing AUC score, False Positive Rate, True Positive Rate, and Thresholds
|
|
181
|
-
|
|
182
|
-
Examples:
|
|
183
|
-
>>> # Binary classification example
|
|
184
|
-
>>> y_true = np.array([0.0, 1.0, 0.0, 1.0, 0.0])
|
|
185
|
-
>>> y_pred = np.array([[0.2, 0.8], [0.1, 0.9], [0.8, 0.2], [0.2, 0.8], [0.7, 0.3]])
|
|
186
|
-
>>> auc_value, fpr, tpr, thresholds = Utils.get_roc_curve_and_auc(y_true, y_pred)
|
|
187
|
-
>>> round(auc_value, 2)
|
|
188
|
-
0.92
|
|
189
|
-
>>> [round(x, 2) for x in fpr.tolist()]
|
|
190
|
-
[0.0, 0.0, 0.33, 0.67, 1.0]
|
|
191
|
-
>>> [round(x, 2) for x in tpr.tolist()]
|
|
192
|
-
[0.0, 0.5, 1.0, 1.0, 1.0]
|
|
193
|
-
>>> [round(x, 2) for x in thresholds.tolist()]
|
|
194
|
-
[inf, 0.9, 0.8, 0.3, 0.2]
|
|
195
|
-
"""
|
|
196
|
-
# For predictions, assert they are probabilities (one-hot encoded)
|
|
197
|
-
assert y_pred.ndim > 1 and y_pred.shape[1] > 1, "Predictions must be probability scores in one-hot format"
|
|
198
|
-
pred_probs: NDArray[np.single] = y_pred[:, 1] # Take probability of positive class only
|
|
199
|
-
|
|
200
|
-
# Calculate ROC curve and AUC score using probabilities
|
|
201
|
-
with Muffle(mute_stderr=True): # Suppress "UndefinedMetricWarning: No positive samples in y_true [...]"
|
|
202
|
-
|
|
203
|
-
# Import functions
|
|
204
|
-
try:
|
|
205
|
-
from sklearn.metrics import roc_auc_score, roc_curve
|
|
206
|
-
except ImportError as e:
|
|
207
|
-
raise ImportError("scikit-learn is required for ROC curve calculation. Install with 'pip install scikit-learn'") from e
|
|
208
|
-
|
|
209
|
-
# Convert y_true to class indices for both functions
|
|
210
|
-
y_true_indices: NDArray[np.intc] = Utils.convert_to_class_indices(y_true)
|
|
211
|
-
|
|
212
|
-
# Calculate AUC score directly using roc_auc_score
|
|
213
|
-
auc_value: float = float(roc_auc_score(y_true_indices, pred_probs))
|
|
214
|
-
|
|
215
|
-
# Calculate ROC curve points
|
|
216
|
-
results: tuple[Any, Any, Any] = roc_curve(y_true_indices, pred_probs, drop_intermediate=False)
|
|
217
|
-
fpr: NDArray[np.single] = results[0]
|
|
218
|
-
tpr: NDArray[np.single] = results[1]
|
|
219
|
-
thresholds: NDArray[np.single] = results[2]
|
|
220
|
-
|
|
221
|
-
return auc_value, fpr, tpr, thresholds
|
|
222
|
-
|
|
223
|
-
@staticmethod
|
|
224
|
-
@handle_error(error_log=DataScienceConfig.ERROR_LOG)
|
|
225
|
-
def get_pr_curve_and_auc(
|
|
226
|
-
y_true: NDArray[np.intc | np.single],
|
|
227
|
-
y_pred: NDArray[np.single],
|
|
228
|
-
negative: bool = False
|
|
229
|
-
) -> tuple[float, float, NDArray[np.single], NDArray[np.single], NDArray[np.single]]:
|
|
230
|
-
""" Calculate Precision-Recall Curve (or Negative Precision-Recall Curve) and AUC score.
|
|
231
|
-
|
|
232
|
-
Args:
|
|
233
|
-
y_true (NDArray[intc | single]): True class labels (either one-hot encoded or class indices)
|
|
234
|
-
y_pred (NDArray[single]): Predicted probabilities (must be probability scores, not class indices)
|
|
235
|
-
negative (bool): Whether to calculate the negative Precision-Recall Curve
|
|
236
|
-
Returns:
|
|
237
|
-
tuple[float, NDArray[np.single], NDArray[np.single], NDArray[np.single]]:
|
|
238
|
-
Tuple containing either:
|
|
239
|
-
- AUC score, Average Precision, Precision, Recall, and Thresholds
|
|
240
|
-
- AUC score, Average Precision, Negative Predictive Value, Specificity, and Thresholds for the negative class
|
|
241
|
-
|
|
242
|
-
Examples:
|
|
243
|
-
>>> # Binary classification example
|
|
244
|
-
>>> y_true = np.array([0.0, 1.0, 0.0, 1.0, 0.0])
|
|
245
|
-
>>> y_pred = np.array([[0.2, 0.8], [0.1, 0.9], [0.8, 0.2], [0.2, 0.8], [0.7, 0.3]])
|
|
246
|
-
>>> auc_value, average_precision, precision, recall, thresholds = Utils.get_pr_curve_and_auc(y_true, y_pred)
|
|
247
|
-
>>> round(auc_value, 2)
|
|
248
|
-
0.92
|
|
249
|
-
>>> round(average_precision, 2)
|
|
250
|
-
0.83
|
|
251
|
-
>>> [round(x, 2) for x in precision.tolist()]
|
|
252
|
-
[0.4, 0.5, 0.67, 1.0, 1.0]
|
|
253
|
-
>>> [round(x, 2) for x in recall.tolist()]
|
|
254
|
-
[1.0, 1.0, 1.0, 0.5, 0.0]
|
|
255
|
-
>>> [round(x, 2) for x in thresholds.tolist()]
|
|
256
|
-
[0.2, 0.3, 0.8, 0.9]
|
|
257
|
-
"""
|
|
258
|
-
# For predictions, assert they are probabilities (one-hot encoded)
|
|
259
|
-
assert y_pred.ndim > 1 and y_pred.shape[1] > 1, "Predictions must be probability scores in one-hot format"
|
|
260
|
-
pred_probs: NDArray[np.single] = y_pred[:, 1] if not negative else y_pred[:, 0]
|
|
261
|
-
|
|
262
|
-
# Calculate Precision-Recall Curve and AUC score using probabilities
|
|
263
|
-
with Muffle(mute_stderr=True): # Suppress "UndefinedMetricWarning: No positive samples in y_true [...]"
|
|
264
|
-
|
|
265
|
-
# Import functions
|
|
266
|
-
try:
|
|
267
|
-
from sklearn.metrics import auc, average_precision_score, precision_recall_curve
|
|
268
|
-
except ImportError as e:
|
|
269
|
-
raise ImportError("scikit-learn is required for PR Curve calculation. Install with 'pip install scikit-learn'") from e
|
|
270
|
-
|
|
271
|
-
# Convert y_true to class indices for both functions
|
|
272
|
-
y_true_indices: NDArray[np.intc] = Utils.convert_to_class_indices(y_true)
|
|
273
|
-
|
|
274
|
-
results: tuple[Any, Any, Any] = precision_recall_curve(
|
|
275
|
-
y_true_indices,
|
|
276
|
-
pred_probs,
|
|
277
|
-
pos_label=1 if not negative else 0
|
|
278
|
-
)
|
|
279
|
-
precision: NDArray[np.single] = results[0]
|
|
280
|
-
recall: NDArray[np.single] = results[1]
|
|
281
|
-
thresholds: NDArray[np.single] = results[2]
|
|
282
|
-
auc_value: float = float(auc(recall, precision))
|
|
283
|
-
average_precision: float = float(average_precision_score(y_true_indices, pred_probs))
|
|
284
|
-
return auc_value, average_precision, precision, recall, thresholds
|
|
285
|
-
|
|
1
|
+
"""
|
|
2
|
+
This module contains the Utils class, which provides static methods for common operations.
|
|
3
|
+
|
|
4
|
+
This class contains static methods for:
|
|
5
|
+
|
|
6
|
+
- Safe division (with 0 as denominator or None)
|
|
7
|
+
- Safe multiplication (with None)
|
|
8
|
+
- Converting between one-hot encoding and class indices
|
|
9
|
+
- Calculating ROC curves and AUC scores
|
|
10
|
+
"""
|
|
11
|
+
# pyright: reportUnknownMemberType=false
|
|
12
|
+
# pyright: reportUnknownVariableType=false
|
|
13
|
+
|
|
14
|
+
# Imports
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
from numpy.typing import NDArray
|
|
19
|
+
|
|
20
|
+
from ..ctx import Muffle
|
|
21
|
+
from ..decorators import handle_error
|
|
22
|
+
from .config.get import DataScienceConfig
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# Class
|
|
26
|
+
class Utils:
|
|
27
|
+
""" Utility class providing common operations. """
|
|
28
|
+
|
|
29
|
+
@staticmethod
|
|
30
|
+
def safe_divide_float(a: float, b: float) -> float:
|
|
31
|
+
""" Safe division of two numbers, return 0 if denominator is 0.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
a (float): First number
|
|
35
|
+
b (float): Second number
|
|
36
|
+
Returns:
|
|
37
|
+
float: Result of the division
|
|
38
|
+
|
|
39
|
+
Examples:
|
|
40
|
+
>>> Utils.safe_divide_float(10, 2)
|
|
41
|
+
5.0
|
|
42
|
+
>>> Utils.safe_divide_float(0, 5)
|
|
43
|
+
0.0
|
|
44
|
+
>>> Utils.safe_divide_float(10, 0)
|
|
45
|
+
0
|
|
46
|
+
>>> Utils.safe_divide_float(-10, 2)
|
|
47
|
+
-5.0
|
|
48
|
+
"""
|
|
49
|
+
return a / b if b > 0 else 0
|
|
50
|
+
|
|
51
|
+
@staticmethod
|
|
52
|
+
def safe_divide_none(a: float | None, b: float | None) -> float | None:
|
|
53
|
+
""" Safe division of two numbers, return None if either number is None or denominator is 0.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
a (float | None): First number
|
|
57
|
+
b (float | None): Second number
|
|
58
|
+
Returns:
|
|
59
|
+
float | None: Result of the division or None if denominator is None
|
|
60
|
+
|
|
61
|
+
Examples:
|
|
62
|
+
>>> None == Utils.safe_divide_none(None, 2)
|
|
63
|
+
True
|
|
64
|
+
>>> None == Utils.safe_divide_none(10, None)
|
|
65
|
+
True
|
|
66
|
+
>>> None == Utils.safe_divide_none(10, 0)
|
|
67
|
+
True
|
|
68
|
+
>>> Utils.safe_divide_none(10, 2)
|
|
69
|
+
5.0
|
|
70
|
+
"""
|
|
71
|
+
return a / b if a is not None and b is not None and b > 0 else None
|
|
72
|
+
|
|
73
|
+
@staticmethod
|
|
74
|
+
def safe_multiply_none(a: float | None, b: float | None) -> float | None:
|
|
75
|
+
""" Safe multiplication of two numbers, return None if either number is None.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
a (float | None): First number
|
|
79
|
+
b (float | None): Second number
|
|
80
|
+
Returns:
|
|
81
|
+
float | None: Result of the multiplication or None if either number is None
|
|
82
|
+
|
|
83
|
+
Examples:
|
|
84
|
+
>>> None == Utils.safe_multiply_none(None, 2)
|
|
85
|
+
True
|
|
86
|
+
>>> None == Utils.safe_multiply_none(10, None)
|
|
87
|
+
True
|
|
88
|
+
>>> Utils.safe_multiply_none(10, 2)
|
|
89
|
+
20
|
|
90
|
+
>>> Utils.safe_multiply_none(-10, 2)
|
|
91
|
+
-20
|
|
92
|
+
"""
|
|
93
|
+
return a * b if a is not None and b is not None else None
|
|
94
|
+
|
|
95
|
+
@staticmethod
|
|
96
|
+
@handle_error(error_log=DataScienceConfig.ERROR_LOG)
|
|
97
|
+
def convert_to_class_indices(y: NDArray[np.intc | np.single] | list[NDArray[np.intc | np.single]]) -> NDArray[Any]:
|
|
98
|
+
""" Convert array from one-hot encoded format to class indices.
|
|
99
|
+
If the input is already class indices, it returns the same array.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
y (NDArray[intc | single] | list[NDArray[intc | single]]): Input array (either one-hot encoded or class indices)
|
|
103
|
+
Returns:
|
|
104
|
+
NDArray[Any]: Array of class indices: [[0, 0, 1, 0], [1, 0, 0, 0]] -> [2, 0]
|
|
105
|
+
|
|
106
|
+
Examples:
|
|
107
|
+
>>> Utils.convert_to_class_indices(np.array([[0, 0, 1, 0], [1, 0, 0, 0]])).tolist()
|
|
108
|
+
[2, 0]
|
|
109
|
+
>>> Utils.convert_to_class_indices(np.array([2, 0, 1])).tolist()
|
|
110
|
+
[2, 0, 1]
|
|
111
|
+
>>> Utils.convert_to_class_indices(np.array([[1], [0]])).tolist()
|
|
112
|
+
[[1], [0]]
|
|
113
|
+
>>> Utils.convert_to_class_indices(np.array([])).tolist()
|
|
114
|
+
[]
|
|
115
|
+
"""
|
|
116
|
+
y = np.array(y)
|
|
117
|
+
if y.ndim > 1 and y.shape[1] > 1:
|
|
118
|
+
return np.argmax(y, axis=1)
|
|
119
|
+
return y
|
|
120
|
+
|
|
121
|
+
@staticmethod
|
|
122
|
+
@handle_error(error_log=DataScienceConfig.ERROR_LOG)
|
|
123
|
+
def convert_to_one_hot(
|
|
124
|
+
y: NDArray[np.intc | np.single] | list[NDArray[np.intc | np.single]], num_classes: int
|
|
125
|
+
) -> NDArray[Any]:
|
|
126
|
+
""" Convert array from class indices to one-hot encoded format.
|
|
127
|
+
If the input is already one-hot encoded, it returns the same array.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
y (NDArray[intc|single] | list[NDArray[intc|single]]): Input array (either class indices or one-hot encoded)
|
|
131
|
+
num_classes (int): Total number of classes
|
|
132
|
+
Returns:
|
|
133
|
+
NDArray[Any]: One-hot encoded array: [2, 0] -> [[0, 0, 1, 0], [1, 0, 0, 0]]
|
|
134
|
+
|
|
135
|
+
Examples:
|
|
136
|
+
>>> Utils.convert_to_one_hot(np.array([2, 0]), 4).tolist()
|
|
137
|
+
[[0.0, 0.0, 1.0, 0.0], [1.0, 0.0, 0.0, 0.0]]
|
|
138
|
+
>>> Utils.convert_to_one_hot(np.array([[0, 0, 1, 0], [1, 0, 0, 0]]), 4).tolist()
|
|
139
|
+
[[0, 0, 1, 0], [1, 0, 0, 0]]
|
|
140
|
+
>>> Utils.convert_to_one_hot(np.array([0, 1, 2]), 3).shape
|
|
141
|
+
(3, 3)
|
|
142
|
+
>>> Utils.convert_to_one_hot(np.array([]), 3)
|
|
143
|
+
array([], shape=(0, 3), dtype=float32)
|
|
144
|
+
|
|
145
|
+
>>> array = np.array([[0.1, 0.9], [0.2, 0.8]])
|
|
146
|
+
>>> array = Utils.convert_to_class_indices(array)
|
|
147
|
+
>>> array = Utils.convert_to_one_hot(array, 2)
|
|
148
|
+
>>> array.tolist()
|
|
149
|
+
[[0.0, 1.0], [0.0, 1.0]]
|
|
150
|
+
"""
|
|
151
|
+
y = np.array(y)
|
|
152
|
+
if y.ndim == 1 or y.shape[1] != num_classes:
|
|
153
|
+
|
|
154
|
+
# Get the number of samples and create a one-hot encoded array
|
|
155
|
+
n_samples: int = len(y)
|
|
156
|
+
one_hot: NDArray[np.float32] = np.zeros((n_samples, num_classes), dtype=np.float32)
|
|
157
|
+
if n_samples > 0:
|
|
158
|
+
# Create a one-hot encoding by setting specific positions to 1.0:
|
|
159
|
+
# - np.arange(n_samples) creates an array [0, 1, 2, ..., n_samples-1] for row indices
|
|
160
|
+
# - y.astype(int) contains the class indices that determine which column gets the 1.0
|
|
161
|
+
# - Together they form coordinate pairs (row_idx, class_idx) where we set values to 1.0
|
|
162
|
+
row_indices: NDArray[np.intc] = np.arange(n_samples)
|
|
163
|
+
one_hot[row_indices, y.astype(int)] = 1.0
|
|
164
|
+
return one_hot
|
|
165
|
+
return y
|
|
166
|
+
|
|
167
|
+
@staticmethod
|
|
168
|
+
@handle_error(error_log=DataScienceConfig.ERROR_LOG)
|
|
169
|
+
def get_roc_curve_and_auc(
|
|
170
|
+
y_true: NDArray[np.intc | np.single],
|
|
171
|
+
y_pred: NDArray[np.single]
|
|
172
|
+
) -> tuple[float, NDArray[np.single], NDArray[np.single], NDArray[np.single]]:
|
|
173
|
+
""" Calculate ROC curve and AUC score.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
y_true (NDArray[intc | single]): True class labels (either one-hot encoded or class indices)
|
|
177
|
+
y_pred (NDArray[single]): Predicted probabilities (must be probability scores, not class indices)
|
|
178
|
+
Returns:
|
|
179
|
+
tuple[float, NDArray[np.single], NDArray[np.single], NDArray[np.single]]:
|
|
180
|
+
Tuple containing AUC score, False Positive Rate, True Positive Rate, and Thresholds
|
|
181
|
+
|
|
182
|
+
Examples:
|
|
183
|
+
>>> # Binary classification example
|
|
184
|
+
>>> y_true = np.array([0.0, 1.0, 0.0, 1.0, 0.0])
|
|
185
|
+
>>> y_pred = np.array([[0.2, 0.8], [0.1, 0.9], [0.8, 0.2], [0.2, 0.8], [0.7, 0.3]])
|
|
186
|
+
>>> auc_value, fpr, tpr, thresholds = Utils.get_roc_curve_and_auc(y_true, y_pred)
|
|
187
|
+
>>> round(auc_value, 2)
|
|
188
|
+
0.92
|
|
189
|
+
>>> [round(x, 2) for x in fpr.tolist()]
|
|
190
|
+
[0.0, 0.0, 0.33, 0.67, 1.0]
|
|
191
|
+
>>> [round(x, 2) for x in tpr.tolist()]
|
|
192
|
+
[0.0, 0.5, 1.0, 1.0, 1.0]
|
|
193
|
+
>>> [round(x, 2) for x in thresholds.tolist()]
|
|
194
|
+
[inf, 0.9, 0.8, 0.3, 0.2]
|
|
195
|
+
"""
|
|
196
|
+
# For predictions, assert they are probabilities (one-hot encoded)
|
|
197
|
+
assert y_pred.ndim > 1 and y_pred.shape[1] > 1, "Predictions must be probability scores in one-hot format"
|
|
198
|
+
pred_probs: NDArray[np.single] = y_pred[:, 1] # Take probability of positive class only
|
|
199
|
+
|
|
200
|
+
# Calculate ROC curve and AUC score using probabilities
|
|
201
|
+
with Muffle(mute_stderr=True): # Suppress "UndefinedMetricWarning: No positive samples in y_true [...]"
|
|
202
|
+
|
|
203
|
+
# Import functions
|
|
204
|
+
try:
|
|
205
|
+
from sklearn.metrics import roc_auc_score, roc_curve
|
|
206
|
+
except ImportError as e:
|
|
207
|
+
raise ImportError("scikit-learn is required for ROC curve calculation. Install with 'pip install scikit-learn'") from e
|
|
208
|
+
|
|
209
|
+
# Convert y_true to class indices for both functions
|
|
210
|
+
y_true_indices: NDArray[np.intc] = Utils.convert_to_class_indices(y_true)
|
|
211
|
+
|
|
212
|
+
# Calculate AUC score directly using roc_auc_score
|
|
213
|
+
auc_value: float = float(roc_auc_score(y_true_indices, pred_probs))
|
|
214
|
+
|
|
215
|
+
# Calculate ROC curve points
|
|
216
|
+
results: tuple[Any, Any, Any] = roc_curve(y_true_indices, pred_probs, drop_intermediate=False)
|
|
217
|
+
fpr: NDArray[np.single] = results[0]
|
|
218
|
+
tpr: NDArray[np.single] = results[1]
|
|
219
|
+
thresholds: NDArray[np.single] = results[2]
|
|
220
|
+
|
|
221
|
+
return auc_value, fpr, tpr, thresholds
|
|
222
|
+
|
|
223
|
+
@staticmethod
|
|
224
|
+
@handle_error(error_log=DataScienceConfig.ERROR_LOG)
|
|
225
|
+
def get_pr_curve_and_auc(
|
|
226
|
+
y_true: NDArray[np.intc | np.single],
|
|
227
|
+
y_pred: NDArray[np.single],
|
|
228
|
+
negative: bool = False
|
|
229
|
+
) -> tuple[float, float, NDArray[np.single], NDArray[np.single], NDArray[np.single]]:
|
|
230
|
+
""" Calculate Precision-Recall Curve (or Negative Precision-Recall Curve) and AUC score.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
y_true (NDArray[intc | single]): True class labels (either one-hot encoded or class indices)
|
|
234
|
+
y_pred (NDArray[single]): Predicted probabilities (must be probability scores, not class indices)
|
|
235
|
+
negative (bool): Whether to calculate the negative Precision-Recall Curve
|
|
236
|
+
Returns:
|
|
237
|
+
tuple[float, NDArray[np.single], NDArray[np.single], NDArray[np.single]]:
|
|
238
|
+
Tuple containing either:
|
|
239
|
+
- AUC score, Average Precision, Precision, Recall, and Thresholds
|
|
240
|
+
- AUC score, Average Precision, Negative Predictive Value, Specificity, and Thresholds for the negative class
|
|
241
|
+
|
|
242
|
+
Examples:
|
|
243
|
+
>>> # Binary classification example
|
|
244
|
+
>>> y_true = np.array([0.0, 1.0, 0.0, 1.0, 0.0])
|
|
245
|
+
>>> y_pred = np.array([[0.2, 0.8], [0.1, 0.9], [0.8, 0.2], [0.2, 0.8], [0.7, 0.3]])
|
|
246
|
+
>>> auc_value, average_precision, precision, recall, thresholds = Utils.get_pr_curve_and_auc(y_true, y_pred)
|
|
247
|
+
>>> round(auc_value, 2)
|
|
248
|
+
0.92
|
|
249
|
+
>>> round(average_precision, 2)
|
|
250
|
+
0.83
|
|
251
|
+
>>> [round(x, 2) for x in precision.tolist()]
|
|
252
|
+
[0.4, 0.5, 0.67, 1.0, 1.0]
|
|
253
|
+
>>> [round(x, 2) for x in recall.tolist()]
|
|
254
|
+
[1.0, 1.0, 1.0, 0.5, 0.0]
|
|
255
|
+
>>> [round(x, 2) for x in thresholds.tolist()]
|
|
256
|
+
[0.2, 0.3, 0.8, 0.9]
|
|
257
|
+
"""
|
|
258
|
+
# For predictions, assert they are probabilities (one-hot encoded)
|
|
259
|
+
assert y_pred.ndim > 1 and y_pred.shape[1] > 1, "Predictions must be probability scores in one-hot format"
|
|
260
|
+
pred_probs: NDArray[np.single] = y_pred[:, 1] if not negative else y_pred[:, 0]
|
|
261
|
+
|
|
262
|
+
# Calculate Precision-Recall Curve and AUC score using probabilities
|
|
263
|
+
with Muffle(mute_stderr=True): # Suppress "UndefinedMetricWarning: No positive samples in y_true [...]"
|
|
264
|
+
|
|
265
|
+
# Import functions
|
|
266
|
+
try:
|
|
267
|
+
from sklearn.metrics import auc, average_precision_score, precision_recall_curve
|
|
268
|
+
except ImportError as e:
|
|
269
|
+
raise ImportError("scikit-learn is required for PR Curve calculation. Install with 'pip install scikit-learn'") from e
|
|
270
|
+
|
|
271
|
+
# Convert y_true to class indices for both functions
|
|
272
|
+
y_true_indices: NDArray[np.intc] = Utils.convert_to_class_indices(y_true)
|
|
273
|
+
|
|
274
|
+
results: tuple[Any, Any, Any] = precision_recall_curve(
|
|
275
|
+
y_true_indices,
|
|
276
|
+
pred_probs,
|
|
277
|
+
pos_label=1 if not negative else 0
|
|
278
|
+
)
|
|
279
|
+
precision: NDArray[np.single] = results[0]
|
|
280
|
+
recall: NDArray[np.single] = results[1]
|
|
281
|
+
thresholds: NDArray[np.single] = results[2]
|
|
282
|
+
auc_value: float = float(auc(recall, precision))
|
|
283
|
+
average_precision: float = float(average_precision_score(y_true_indices, pred_probs))
|
|
284
|
+
return auc_value, average_precision, precision, recall, thresholds
|
|
285
|
+
|
stouputils/installer/__init__.py
CHANGED
|
@@ -1,18 +1,18 @@
|
|
|
1
|
-
""" Installer module for stouputils.
|
|
2
|
-
|
|
3
|
-
Provides functions for platform-agnostic installation tasks by dispatching
|
|
4
|
-
to platform-specific implementations (Windows, Linux/macOS).
|
|
5
|
-
|
|
6
|
-
It handles getting installation paths, adding programs to the PATH environment variable,
|
|
7
|
-
and installing programs from local zip files or URLs.
|
|
8
|
-
"""
|
|
9
|
-
# ruff: noqa: F403
|
|
10
|
-
# ruff: noqa: F405
|
|
11
|
-
|
|
12
|
-
# Imports
|
|
13
|
-
from .common import *
|
|
14
|
-
from .downloader import *
|
|
15
|
-
from .linux import *
|
|
16
|
-
from .main import *
|
|
17
|
-
from .windows import *
|
|
18
|
-
|
|
1
|
+
""" Installer module for stouputils.
|
|
2
|
+
|
|
3
|
+
Provides functions for platform-agnostic installation tasks by dispatching
|
|
4
|
+
to platform-specific implementations (Windows, Linux/macOS).
|
|
5
|
+
|
|
6
|
+
It handles getting installation paths, adding programs to the PATH environment variable,
|
|
7
|
+
and installing programs from local zip files or URLs.
|
|
8
|
+
"""
|
|
9
|
+
# ruff: noqa: F403
|
|
10
|
+
# ruff: noqa: F405
|
|
11
|
+
|
|
12
|
+
# Imports
|
|
13
|
+
from .common import *
|
|
14
|
+
from .downloader import *
|
|
15
|
+
from .linux import *
|
|
16
|
+
from .main import *
|
|
17
|
+
from .windows import *
|
|
18
|
+
|