PyPI - stouputils - Versions diffs - 1.3.5__tar.gz → 1.3.7__tar.gz - Mend

stouputils 1.3.5tar.gz → 1.3.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

{stouputils-1.3.5 → stouputils-1.3.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: stouputils
-Version: 1.3.5
+Version: 1.3.7
 Summary: Stouputils is a collection of utility modules designed to simplify and enhance the development process. It includes a range of tools for tasks such as execution of doctests, display utilities, decorators, as well as context managers, and many more.
 Project-URL: Homepage, https://github.com/Stoupy51/stouputils
 Project-URL: Issues, https://github.com/Stoupy51/stouputils/issues

{stouputils-1.3.5 → stouputils-1.3.7}/pyproject.toml RENAMED Viewed

@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
 [project]
 name = "stouputils"
-version = "1.3.5"
+version = "1.3.7"
 description = "Stouputils is a collection of utility modules designed to simplify and enhance the development process. It includes a range of tools for tasks such as execution of doctests, display utilities, decorators, as well as context managers, and many more."
 readme = "README.md"
 requires-python = ">=3.10"

{stouputils-1.3.5 → stouputils-1.3.7}/stouputils/data_science/metric_dictionnary.py RENAMED Viewed

@@ -8,12 +8,16 @@ This class contains the following metrics:
 1. Main metrics:
    - Area Under the Curve (AUC)
+   - Area Under the Precision-Recall Curve (AUPRC)
+   - Area Under the NPV-Specificity Curve (NEGATIVE_AUPRC)
    - Specificity (True Negative Rate)
    - Recall/Sensitivity (True Positive Rate)
    - Precision (Positive Predictive Value)
    - Negative Predictive Value (NPV)
    - Accuracy
    - F1 Score
+   - Precision-Recall Average
+   - Precision-Recall Average for Negative Class
 2. Confusion matrix metrics:
@@ -39,6 +43,8 @@ This class contains the following metrics:
    - Youden's J statistic
    - Cost-based threshold
+   - F1 Score threshold
+   - F1 Score threshold for the negative class
 6. Average metrics across folds:
@@ -52,7 +58,9 @@ This class contains the following metrics:
 class MetricDictionnary:
 	# Main metrics (starting with '1:')
-	AUC: str								= "1: Area Under the Curve: AUC"
+	AUC: str								= "1: Area Under the ROC Curve: AUC / AUROC"
+	AUPRC: str								= "1: Area Under the Precision-Recall Curve: AUPRC / PR AUC"
+	NEGATIVE_AUPRC: str						= "1: Area Under the NPV-Specificity Curve: AUNPRC / NPR AUC"
 	SPECIFICITY: str						= "1: Specificity: True Negative Rate"
 	RECALL: str								= "1: Recall/Sensitivity: True Positive Rate"
 	PRECISION: str							= "1: Precision: Positive Predictive Value"
@@ -61,6 +69,8 @@ class MetricDictionnary:
 	BALANCED_ACCURACY: str					= "1: Balanced Accuracy"
 	F1_SCORE: str							= "1: F1 Score"
 	F1_SCORE_NEGATIVE: str					= "1: F1 Score for Negative Class"
+	PR_AVERAGE: str							= "1: Precision-Recall Average"
+	PR_AVERAGE_NEGATIVE: str				= "1: Precision-Recall Average for Negative Class"
 	# Confusion matrix metrics (starting with '2:')
 	CONFUSION_MATRIX_TN: str				= "2: Confusion Matrix: TN"
@@ -82,6 +92,8 @@ class MetricDictionnary:
 	# Optimal thresholds (starting with '5:')
 	OPTIMAL_THRESHOLD_YOUDEN: str			= "5: Optimal Threshold: Youden"
 	OPTIMAL_THRESHOLD_COST: str				= "5: Optimal Threshold: Cost"
+	OPTIMAL_THRESHOLD_F1: str				= "5: Optimal Threshold: F1"
+	OPTIMAL_THRESHOLD_F1_NEGATIVE: str		= "5: Optimal Threshold: F1 for Negative Class"
 	# Average metrics across folds (starting with '6:')
 	AVERAGE_METRIC: str						= "6: Average METRIC_NAME across folds"

{stouputils-1.3.5 → stouputils-1.3.7}/stouputils/data_science/metric_utils.py RENAMED Viewed

@@ -117,10 +117,10 @@ class MetricUtils:
 			if mcc_metric:
 				metrics.update(mcc_metric)
-			# Calculate and plot ROC/AUC
-			roc_metrics: dict[str, float] = MetricUtils.roc_and_auc(true_classes, y_pred, fold_number=-1, run_name=run_name)
-			if roc_metrics:
-				metrics.update(roc_metrics)
+			# Calculate and plot (ROC Curve / AUC) and (PR Curve / AUC, and negative one)
+			curves_metrics: dict[str, float] = MetricUtils.all_curves(true_classes, y_pred, fold_number=-1, run_name=run_name)
+			if curves_metrics:
+				metrics.update(curves_metrics)
 		# Multiclass classification
 		elif mode == "multiclass":
@@ -287,7 +287,7 @@ class MetricUtils:
 	@staticmethod
 	@handle_error(error_log=DataScienceConfig.ERROR_LOG)
-	def roc_and_auc(
+	def roc_curve_and_auc(
 		true_classes: NDArray[np.intc] | NDArray[np.single],
 		pred_probs: NDArray[np.single],
 		fold_number: int = -1,
@@ -311,7 +311,7 @@ class MetricUtils:
 			>>> pred_probs = np.array([[0.9, 0.1], [0.1, 0.9], [0.1, 0.9]])
 			>>> from stouputils.ctx import Muffle
 			>>> with Muffle():
-			... 	metrics = MetricUtils.roc_and_auc(true_classes, pred_probs, run_name="")
+			... 	metrics = MetricUtils.roc_curve_and_auc(true_classes, pred_probs, run_name="")
 			>>> # Check metrics
 			>>> round(float(metrics[MetricDictionnary.AUC]), 2)
@@ -377,6 +377,158 @@ class MetricUtils:
 		return metrics
+	@staticmethod
+	@handle_error(error_log=DataScienceConfig.ERROR_LOG)
+	def pr_curve_and_auc(
+		true_classes: NDArray[np.intc] | NDArray[np.single],
+		pred_probs: NDArray[np.single],
+		fold_number: int = -1,
+		run_name: str = ""
+	) -> dict[str, float]:
+		""" Calculate Precision-Recall curve and AUC score. (and NPV-Specificity curve and AUC)
+		Args:
+			true_classes  (NDArray[np.intc | np.single]):  True class labels (one-hot encoded or class indices)
+			pred_probs    (NDArray[np.single]):            Predicted probabilities (must be probability scores, not class indices)
+			fold_number   (int):                           Fold number, used for naming the plot file, usually
+				-1 for final model with test set,
+				0 for final model with validation set,
+				>0 for other folds with their validation set
+			run_name      (str):                           Name for saving the plot
+		Returns:
+			dict[str, float]:	Dictionary containing AUC score and optimal thresholds
+		Examples:
+			>>> true_classes = np.array([0, 1, 0])
+			>>> pred_probs = np.array([[0.9, 0.1], [0.1, 0.9], [0.1, 0.9]])
+			>>> from stouputils.ctx import Muffle
+			>>> with Muffle():
+			... 	metrics = MetricUtils.pr_curve_and_auc(true_classes, pred_probs, run_name="")
+			>>> # Check metrics
+			>>> round(float(metrics[MetricDictionnary.AUPRC]), 2)
+			0.75
+			>>> round(float(metrics[MetricDictionnary.NEGATIVE_AUPRC]), 2)
+			0.92
+			>>> round(float(metrics[MetricDictionnary.PR_AVERAGE]), 2)
+			0.5
+			>>> round(float(metrics[MetricDictionnary.PR_AVERAGE_NEGATIVE]), 2)
+			0.33
+			>>> round(float(metrics[MetricDictionnary.OPTIMAL_THRESHOLD_F1]), 2)
+			0.9
+			>>> round(float(metrics[MetricDictionnary.OPTIMAL_THRESHOLD_F1_NEGATIVE]), 2)
+			0.1
+		"""
+		auc_value, average_precision, precision, recall, thresholds = Utils.get_pr_curve_and_auc(true_classes, pred_probs)
+		neg_auc_value, average_precision_neg, npv, specificity, neg_thresholds = (
+			Utils.get_pr_curve_and_auc(true_classes, pred_probs, negative=True)
+		)
+		# Calculate metrics
+		metrics: dict[str, float] = {
+			MetricDictionnary.AUPRC: auc_value,
+			MetricDictionnary.NEGATIVE_AUPRC: neg_auc_value,
+			MetricDictionnary.PR_AVERAGE: average_precision,
+			MetricDictionnary.PR_AVERAGE_NEGATIVE: average_precision_neg
+		}
+		# Calculate optimal thresholds for both PR curves
+		for is_negative in (False, True):
+			# Get the right values based on positive/negative case
+			if not is_negative:
+				curr_precision = precision
+				curr_recall = recall
+				curr_thresholds = thresholds
+				curr_auc = auc_value
+				curr_ap = average_precision
+			else:
+				curr_precision = npv
+				curr_recall = specificity
+				curr_thresholds = neg_thresholds
+				curr_auc = neg_auc_value
+				curr_ap = average_precision_neg
+			# Calculate F-score for each threshold
+			fscore: NDArray[np.single] = (2 * curr_precision * curr_recall) / (curr_precision + curr_recall)
+			fscore = fscore[~np.isnan(fscore)]
+			# Get optimal threshold (maximum F-score)
+			if len(fscore) > 0:
+				optimal_idx: int = int(np.argmax(fscore))
+				optimal_threshold: float = curr_thresholds[optimal_idx]
+			else:
+				optimal_idx: int = 0
+				optimal_threshold = float('inf')
+			# Store in metrics dictionary
+			if not is_negative:
+				metrics[MetricDictionnary.OPTIMAL_THRESHOLD_F1] = optimal_threshold
+			else:
+				metrics[MetricDictionnary.OPTIMAL_THRESHOLD_F1_NEGATIVE] = optimal_threshold
+			# Plot ROC curve if not nan
+			if run_name:
+				label: str = "Precision - Recall" if not is_negative else "Negative Predictive Value - Specificity"
+				plt.figure(figsize=(12, 6))
+				plt.plot(curr_recall, curr_precision, "b", label=f"{label} curve (AUC = {curr_auc:.2f}, AP = {curr_ap:.2f})")
+				# Prepare the path
+				fold_name: str = ""
+				if fold_number > 0:
+					fold_name = f"_fold_{fold_number}_val"
+				elif fold_number == 0:
+					fold_name = "_val"
+				elif fold_number == -1:
+					fold_name = "_test"
+				elif fold_number == -2:
+					fold_name = "_train"
+				pr: str = "pr" if not is_negative else "negative_pr"
+				curve_path: str = f"{DataScienceConfig.TEMP_FOLDER}/{run_name}_{pr}_curve{fold_name}.png"
+				plt.plot(
+					curr_recall[optimal_idx], curr_precision[optimal_idx], 'go', label=f"Optimal threshold (t={optimal_threshold:.2f})"
+				)
+				plt.xlim([-0.01, 1.01])
+				plt.ylim([-0.01, 1.01])
+				plt.xlabel("Recall" if not is_negative else "Specificity")
+				plt.ylabel("Precision" if not is_negative else "Negative Predictive Value")
+				plt.title(f"{label} Curve")
+				plt.legend(loc="lower right")
+				plt.savefig(curve_path)
+				mlflow.log_artifact(curve_path)
+				os.remove(curve_path)
+				plt.close()
+		return metrics
+	@staticmethod
+	@handle_error(error_log=DataScienceConfig.ERROR_LOG)
+	def all_curves(
+		true_classes: NDArray[np.intc] | NDArray[np.single],
+		pred_probs: NDArray[np.single],
+		fold_number: int = -1,
+		run_name: str = ""
+	) -> dict[str, float]:
+		""" Run all X_curve_and_auc functions and return a dictionary of metrics.
+		Args:
+			true_classes  (NDArray[np.intc | np.single]):  True class labels (one-hot encoded or class indices)
+			pred_probs    (NDArray[np.single]):            Predicted probabilities (must be probability scores, not class indices)
+			fold_number   (int):                           Fold number, used for naming the plot file, usually
+				-1 for final model with test set,
+				0 for final model with validation set,
+				>0 for other folds with their validation set
+			run_name      (str):                           Name for saving the plot
+		Returns:
+			dict[str, float]: Dictionary containing AUC score and optimal thresholds for ROC and PR curves
+		"""
+		metrics: dict[str, float] = {}
+		metrics.update(MetricUtils.roc_curve_and_auc(true_classes, pred_probs, fold_number, run_name))
+		metrics.update(MetricUtils.pr_curve_and_auc(true_classes, pred_probs, fold_number, run_name))
+		return metrics
 	@staticmethod
 	@handle_error(error_log=DataScienceConfig.ERROR_LOG)
@@ -609,7 +761,7 @@ class MetricUtils:
 				# Find all local minima
 				from scipy.signal import argrelextrema
-				local_minima_idx: NDArray[np.intp] = argrelextrema(y_array, np.less)[0]
+				local_minima_idx: NDArray[np.intp] = np.array(argrelextrema(y_array, np.less)[0], dtype=np.intp)
 				distinct_candidates = np.unique(np.append(local_minima_idx, best_idx))
 		else:
 			assert 0 <= best_idx < len(x_array), "Best x index is out of bounds"

{stouputils-1.3.5 → stouputils-1.3.7}/stouputils/data_science/models/model_interface.py RENAMED Viewed

@@ -851,22 +851,13 @@ class ModelInterface(AbstractModel):
 		self.history.append(history)
 		self.evaluation_results.append(eval_results)
-		# Generate and save ROC curve for this fold
-		MetricUtils.roc_and_auc(
-			true_classes=true_classes,
-			pred_probs=predictions,
-			fold_number=fold_number,
-			run_name=self.run_name
-		)
+		# Generate and save ROC Curve and PR Curve for this fold
+		MetricUtils.all_curves(true_classes, predictions, fold_number, run_name=self.run_name)
-		# If final model, also log the ROC curve for the train set
+		# If final model, also log the ROC curve and PR curve for the train set
 		if fold_number == 0:
-			MetricUtils.roc_and_auc(
-				true_classes=training_true_classes,
-				pred_probs=training_predictions,
-				fold_number=-2,
-				run_name=self.run_name
-			)
+			fold_number = -2	# -2 is the train set
+			MetricUtils.all_curves(training_true_classes, training_predictions, fold_number, run_name=self.run_name)
 		# Log visualization artifacts if they were generated
 		if temp_dir is not None:

{stouputils-1.3.5 → stouputils-1.3.7}/stouputils/data_science/utils.py RENAMED Viewed

@@ -197,22 +197,89 @@ class Utils:
 		assert y_pred.ndim > 1 and y_pred.shape[1] > 1, "Predictions must be probability scores in one-hot format"
 		pred_probs: NDArray[np.single] = y_pred[:, 1]  # Take probability of positive class only
-		# Convert true labels to class indices if they're one-hot encoded
-		true_classes: NDArray[np.intc] = Utils.convert_to_class_indices(y_true)
 		# Calculate ROC curve and AUC score using probabilities
 		with Muffle(mute_stderr=True):	# Suppress "UndefinedMetricWarning: No positive samples in y_true [...]"
 			# Import functions
 			try:
-				from sklearn.metrics import auc, roc_curve
+				from sklearn.metrics import roc_auc_score, roc_curve
 			except ImportError as e:
 				raise ImportError("scikit-learn is required for ROC curve calculation. Install with 'pip install scikit-learn'") from e
-			results: tuple[Any, Any, Any] = roc_curve(true_classes, pred_probs, drop_intermediate=False)
+			# Convert y_true to class indices for both functions
+			y_true_indices: NDArray[np.intc] = Utils.convert_to_class_indices(y_true)
+			# Calculate AUC score directly using roc_auc_score
+			auc_value: float = float(roc_auc_score(y_true_indices, pred_probs))
+			# Calculate ROC curve points
+			results: tuple[Any, Any, Any] = roc_curve(y_true_indices, pred_probs, drop_intermediate=False)
 			fpr: NDArray[np.single] = results[0]
 			tpr: NDArray[np.single] = results[1]
 			thresholds: NDArray[np.single] = results[2]
-			auc_value: float = float(auc(fpr, tpr))
 		return auc_value, fpr, tpr, thresholds
+	@staticmethod
+	@handle_error(error_log=DataScienceConfig.ERROR_LOG)
+	def get_pr_curve_and_auc(
+		y_true: NDArray[np.intc | np.single],
+		y_pred: NDArray[np.single],
+		negative: bool = False
+	) -> tuple[float, float, NDArray[np.single], NDArray[np.single], NDArray[np.single]]:
+		""" Calculate Precision-Recall Curve (or Negative Precision-Recall Curve) and AUC score.
+		Args:
+			y_true  (NDArray[intc | single]):   True class labels (either one-hot encoded or class indices)
+			y_pred  (NDArray[single]):          Predicted probabilities (must be probability scores, not class indices)
+			negative (bool):                    Whether to calculate the negative Precision-Recall Curve
+		Returns:
+			tuple[float, NDArray[np.single], NDArray[np.single], NDArray[np.single]]:
+				Tuple containing either:
+					- AUC score, Average Precision, Precision, Recall, and Thresholds
+					- AUC score, Average Precision, Negative Predictive Value, Specificity, and Thresholds for the negative class
+		Examples:
+			>>> # Binary classification example
+			>>> y_true = np.array([0.0, 1.0, 0.0, 1.0, 0.0])
+			>>> y_pred = np.array([[0.2, 0.8], [0.1, 0.9], [0.8, 0.2], [0.2, 0.8], [0.7, 0.3]])
+			>>> auc_value, average_precision, precision, recall, thresholds = Utils.get_pr_curve_and_auc(y_true, y_pred)
+			>>> round(auc_value, 2)
+			0.92
+			>>> round(average_precision, 2)
+			0.83
+			>>> [round(x, 2) for x in precision.tolist()]
+			[0.4, 0.5, 0.67, 1.0, 1.0]
+			>>> [round(x, 2) for x in recall.tolist()]
+			[1.0, 1.0, 1.0, 0.5, 0.0]
+			>>> [round(x, 2) for x in thresholds.tolist()]
+			[0.2, 0.3, 0.8, 0.9]
+		"""
+		# For predictions, assert they are probabilities (one-hot encoded)
+		assert y_pred.ndim > 1 and y_pred.shape[1] > 1, "Predictions must be probability scores in one-hot format"
+		pred_probs: NDArray[np.single] = y_pred[:, 1] if not negative else y_pred[:, 0]
+		# Calculate Precision-Recall Curve and AUC score using probabilities
+		with Muffle(mute_stderr=True):	# Suppress "UndefinedMetricWarning: No positive samples in y_true [...]"
+			# Import functions
+			try:
+				from sklearn.metrics import auc, average_precision_score, precision_recall_curve
+			except ImportError as e:
+				raise ImportError("scikit-learn is required for PR Curve calculation. Install with 'pip install scikit-learn'") from e
+			# Convert y_true to class indices for both functions
+			y_true_indices: NDArray[np.intc] = Utils.convert_to_class_indices(y_true)
+			results: tuple[Any, Any, Any] = precision_recall_curve(
+				y_true=y_true_indices,
+				probas_pred=pred_probs,
+				pos_label=1 if not negative else 0
+			)
+			precision: NDArray[np.single] = results[0]
+			recall: NDArray[np.single] = results[1]
+			thresholds: NDArray[np.single] = results[2]
+			auc_value: float = float(auc(recall, precision))
+			average_precision: float = float(average_precision_score(y_true_indices, pred_probs))
+		return auc_value, average_precision, precision, recall, thresholds

{stouputils-1.3.5 → stouputils-1.3.7}/stouputils/decorators.py RENAMED Viewed

@@ -33,6 +33,17 @@ def get_func_name(func: Callable[..., Any]) -> str:
 	except AttributeError:
 		return "<unknown>"
+def get_wrapper_name(decorator_name: str, func: Callable[..., Any]) -> str:
+	""" Get a descriptive name for a wrapper function.
+	Args:
+		decorator_name	(str):					Name of the decorator
+		func			(Callable[..., Any]):	Function being decorated
+	Returns:
+		str: Combined name for the wrapper function
+	"""
+	return f"{decorator_name}_{get_func_name(func)}"
 # Decorator that make a function silent (disable stdout)
 def silent(
 	func: Callable[..., Any] | None = None,
@@ -66,6 +77,7 @@ def silent(
 			# Use Muffle context manager to silence output
 			with Muffle(mute_stderr=mute_stderr):
 				return func(*args, **kwargs)
+		wrapper.__name__ = get_wrapper_name("stouputils.decorators.silent", func)
 		return wrapper
 	# Handle both @silent and @silent(mute_stderr=...)
@@ -108,6 +120,7 @@ def measure_time(
 		def wrapper(*args: tuple[Any, ...], **kwargs: dict[str, Any]) -> Any:
 			with MeasureTime(print_func=print_func, message=new_msg, perf_counter=perf_counter):
 				return func(*args, **kwargs)
+		wrapper.__name__ = get_wrapper_name("stouputils.decorators.measure_time", func)
 		return wrapper
 	return decorator
@@ -189,6 +202,7 @@ def handle_error(
 				# Sleep for the specified time, only if the error_log is not ERROR_TRACEBACK (because it's blocking)
 				if sleep_time > 0.0 and error_log != LogLevels.ERROR_TRACEBACK:
 					time.sleep(sleep_time)
+		wrapper.__name__ = get_wrapper_name("stouputils.decorators.handle_error", func)
 		return wrapper
 	# Handle both @handle_error and @handle_error(exceptions=..., message=..., error_log=...)
@@ -255,6 +269,7 @@ def simple_cache(
 				return result
 		# Return the wrapper
+		wrapper.__name__ = get_wrapper_name("stouputils.decorators.simple_cache", func)
 		return wrapper
 	# Handle both @simple_cache and @simple_cache(method=...)
@@ -312,6 +327,7 @@ def deprecated(
 			# Call the original function
 			return func(*args, **kwargs)
+		wrapper.__name__ = get_wrapper_name("stouputils.decorators.deprecated", func)
 		return wrapper
 	# Handle both @deprecated and @deprecated(message=..., error_log=...)
@@ -362,6 +378,7 @@ def abstract(
 		@handle_error(exceptions=NotImplementedError, error_log=error_log)
 		def wrapper(*args: tuple[Any, ...], **kwargs: dict[str, Any]) -> Any:
 			raise NotImplementedError(message)
+		wrapper.__name__ = get_wrapper_name("stouputils.decorators.abstract", func)
 		return wrapper
 	# Handle both @abstract and @abstract(error_log=...)