validmind 2.8.29__py3-none-any.whl → 2.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/utils.py +4 -24
  3. validmind/api_client.py +6 -17
  4. validmind/logging.py +48 -0
  5. validmind/tests/__init__.py +2 -0
  6. validmind/tests/__types__.py +18 -0
  7. validmind/tests/output.py +9 -2
  8. validmind/tests/plots/BoxPlot.py +260 -0
  9. validmind/tests/plots/CorrelationHeatmap.py +235 -0
  10. validmind/tests/plots/HistogramPlot.py +233 -0
  11. validmind/tests/plots/ViolinPlot.py +125 -0
  12. validmind/tests/plots/__init__.py +0 -0
  13. validmind/tests/stats/CorrelationAnalysis.py +251 -0
  14. validmind/tests/stats/DescriptiveStats.py +197 -0
  15. validmind/tests/stats/NormalityTests.py +147 -0
  16. validmind/tests/stats/OutlierDetection.py +173 -0
  17. validmind/tests/stats/__init__.py +0 -0
  18. validmind/unit_metrics/classification/individual/AbsoluteError.py +42 -0
  19. validmind/unit_metrics/classification/individual/BrierScore.py +56 -0
  20. validmind/unit_metrics/classification/individual/CalibrationError.py +77 -0
  21. validmind/unit_metrics/classification/individual/ClassBalance.py +65 -0
  22. validmind/unit_metrics/classification/individual/Confidence.py +52 -0
  23. validmind/unit_metrics/classification/individual/Correctness.py +41 -0
  24. validmind/unit_metrics/classification/individual/LogLoss.py +61 -0
  25. validmind/unit_metrics/classification/individual/OutlierScore.py +86 -0
  26. validmind/unit_metrics/classification/individual/ProbabilityError.py +54 -0
  27. validmind/unit_metrics/classification/individual/Uncertainty.py +60 -0
  28. validmind/unit_metrics/classification/individual/__init__.py +0 -0
  29. validmind/vm_models/dataset/dataset.py +147 -1
  30. validmind/vm_models/result/result.py +26 -4
  31. {validmind-2.8.29.dist-info → validmind-2.9.1.dist-info}/METADATA +2 -2
  32. {validmind-2.8.29.dist-info → validmind-2.9.1.dist-info}/RECORD +35 -14
  33. {validmind-2.8.29.dist-info → validmind-2.9.1.dist-info}/LICENSE +0 -0
  34. {validmind-2.8.29.dist-info → validmind-2.9.1.dist-info}/WHEEL +0 -0
  35. {validmind-2.8.29.dist-info → validmind-2.9.1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,173 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ from scipy import stats
10
+ from sklearn.ensemble import IsolationForest
11
+
12
+ from validmind import tags, tasks
13
+ from validmind.errors import SkipTestError
14
+ from validmind.utils import format_records
15
+ from validmind.vm_models import VMDataset
16
+
17
+
18
+ def _validate_columns(dataset: VMDataset, columns: Optional[List[str]]):
19
+ """Validate and return numerical columns."""
20
+ if columns is None:
21
+ columns = dataset.feature_columns_numeric
22
+ else:
23
+ available_columns = set(dataset.feature_columns_numeric)
24
+ columns = [col for col in columns if col in available_columns]
25
+
26
+ # Filter out boolean columns as they can't be used for outlier detection
27
+ numeric_columns = []
28
+ for col in columns:
29
+ if col in dataset.df.columns:
30
+ col_dtype = dataset.df[col].dtype
31
+ # Exclude boolean and object types, keep only true numeric types
32
+ if pd.api.types.is_numeric_dtype(col_dtype) and col_dtype != bool:
33
+ numeric_columns.append(col)
34
+
35
+ columns = numeric_columns
36
+
37
+ if not columns:
38
+ raise SkipTestError("No suitable numerical columns found for outlier detection")
39
+
40
+ return columns
41
+
42
+
43
+ def _detect_iqr_outliers(data, iqr_threshold: float):
44
+ """Detect outliers using IQR method."""
45
+ q1, q3 = data.quantile(0.25), data.quantile(0.75)
46
+ iqr = q3 - q1
47
+ lower_bound = q1 - iqr_threshold * iqr
48
+ upper_bound = q3 + iqr_threshold * iqr
49
+ # Fix numpy boolean operation error by using pandas boolean indexing properly
50
+ outlier_mask = (data < lower_bound) | (data > upper_bound)
51
+ iqr_outliers = data[outlier_mask]
52
+ return len(iqr_outliers), (len(iqr_outliers) / len(data)) * 100
53
+
54
+
55
+ def _detect_zscore_outliers(data, zscore_threshold: float):
56
+ """Detect outliers using Z-score method."""
57
+ z_scores = np.abs(stats.zscore(data))
58
+ # Fix potential numpy boolean operation error
59
+ outlier_mask = z_scores > zscore_threshold
60
+ zscore_outliers = data[outlier_mask]
61
+ return len(zscore_outliers), (len(zscore_outliers) / len(data)) * 100
62
+
63
+
64
+ def _detect_isolation_forest_outliers(data, contamination: float):
65
+ """Detect outliers using Isolation Forest method."""
66
+ if len(data) <= 10:
67
+ return 0, 0
68
+
69
+ try:
70
+ iso_forest = IsolationForest(contamination=contamination, random_state=42)
71
+ outlier_pred = iso_forest.fit_predict(data.values.reshape(-1, 1))
72
+ iso_outliers = data[outlier_pred == -1]
73
+ return len(iso_outliers), (len(iso_outliers) / len(data)) * 100
74
+ except Exception:
75
+ return 0, 0
76
+
77
+
78
+ def _process_column_outliers(
79
+ column: str,
80
+ data,
81
+ methods: List[str],
82
+ iqr_threshold: float,
83
+ zscore_threshold: float,
84
+ contamination: float,
85
+ ):
86
+ """Process outlier detection for a single column."""
87
+ outliers_dict = {"Feature": column, "Total Count": len(data)}
88
+
89
+ # IQR method
90
+ if "iqr" in methods:
91
+ count, percentage = _detect_iqr_outliers(data, iqr_threshold)
92
+ outliers_dict["IQR Outliers"] = count
93
+ outliers_dict["IQR %"] = percentage
94
+
95
+ # Z-score method
96
+ if "zscore" in methods:
97
+ count, percentage = _detect_zscore_outliers(data, zscore_threshold)
98
+ outliers_dict["Z-Score Outliers"] = count
99
+ outliers_dict["Z-Score %"] = percentage
100
+
101
+ # Isolation Forest method
102
+ if "isolation_forest" in methods:
103
+ count, percentage = _detect_isolation_forest_outliers(data, contamination)
104
+ outliers_dict["Isolation Forest Outliers"] = count
105
+ outliers_dict["Isolation Forest %"] = percentage
106
+
107
+ return outliers_dict
108
+
109
+
110
+ @tags("tabular_data", "statistics", "outliers")
111
+ @tasks("classification", "regression", "clustering")
112
+ def OutlierDetection(
113
+ dataset: VMDataset,
114
+ columns: Optional[List[str]] = None,
115
+ methods: List[str] = ["iqr", "zscore", "isolation_forest"],
116
+ iqr_threshold: float = 1.5,
117
+ zscore_threshold: float = 3.0,
118
+ contamination: float = 0.1,
119
+ ) -> Dict[str, Any]:
120
+ """
121
+ Detects outliers in numerical features using multiple statistical methods.
122
+
123
+ ### Purpose
124
+
125
+ This test identifies outliers in numerical features using various statistical
126
+ methods including IQR, Z-score, and Isolation Forest. It provides comprehensive
127
+ outlier detection to help identify data quality issues and potential anomalies.
128
+
129
+ ### Test Mechanism
130
+
131
+ The test applies multiple outlier detection methods:
132
+ - IQR method: Values beyond Q1 - 1.5*IQR or Q3 + 1.5*IQR
133
+ - Z-score method: Values with |z-score| > threshold
134
+ - Isolation Forest: ML-based anomaly detection
135
+
136
+ ### Signs of High Risk
137
+
138
+ - High percentage of outliers indicating data quality issues
139
+ - Inconsistent outlier detection across methods
140
+ - Extreme outliers that significantly deviate from normal patterns
141
+
142
+ ### Strengths
143
+
144
+ - Multiple detection methods for robust outlier identification
145
+ - Customizable thresholds for different sensitivity levels
146
+ - Clear summary of outlier patterns across features
147
+
148
+ ### Limitations
149
+
150
+ - Limited to numerical features only
151
+ - Some methods assume normal distributions
152
+ - Threshold selection can be subjective
153
+ """
154
+ # Validate inputs
155
+ columns = _validate_columns(dataset, columns)
156
+
157
+ # Process each column
158
+ outlier_summary = []
159
+ for column in columns:
160
+ data = dataset._df[column].dropna()
161
+
162
+ if len(data) >= 3:
163
+ outliers_dict = _process_column_outliers(
164
+ column, data, methods, iqr_threshold, zscore_threshold, contamination
165
+ )
166
+ outlier_summary.append(outliers_dict)
167
+
168
+ # Format results
169
+ results = {}
170
+ if outlier_summary:
171
+ results["Outlier Summary"] = format_records(pd.DataFrame(outlier_summary))
172
+
173
+ return results
File without changes
@@ -0,0 +1,42 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from typing import List
6
+
7
+ import numpy as np
8
+
9
+ from validmind import tags, tasks
10
+ from validmind.vm_models import VMDataset, VMModel
11
+
12
+
13
+ @tasks("classification")
14
+ @tags("classification")
15
+ def AbsoluteError(model: VMModel, dataset: VMDataset, **kwargs) -> List[float]:
16
+ """Calculates the absolute error per row for a classification model.
17
+
18
+ For classification tasks, this computes the absolute difference between
19
+ the true class labels and predicted class labels for each individual row.
20
+ For binary classification with probabilities, it can also compute the
21
+ absolute difference between true labels and predicted probabilities.
22
+
23
+ Args:
24
+ model: The classification model to evaluate
25
+ dataset: The dataset containing true labels and predictions
26
+ **kwargs: Additional parameters (unused for compatibility)
27
+
28
+ Returns:
29
+ List[float]: Per-row absolute errors as a list of float values
30
+ """
31
+ y_true = dataset.y
32
+ y_pred = dataset.y_pred(model)
33
+
34
+ # Convert to numpy arrays and ensure same data type
35
+ y_true = np.asarray(y_true)
36
+ y_pred = np.asarray(y_pred)
37
+
38
+ # For classification, compute absolute difference between true and predicted labels
39
+ absolute_errors = np.abs(y_true - y_pred)
40
+
41
+ # Return as a list of floats
42
+ return absolute_errors.astype(float).tolist()
@@ -0,0 +1,56 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from typing import List
6
+
7
+ import numpy as np
8
+
9
+ from validmind import tags, tasks
10
+ from validmind.vm_models import VMDataset, VMModel
11
+
12
+
13
+ @tasks("classification")
14
+ @tags("classification")
15
+ def BrierScore(model: VMModel, dataset: VMDataset, **kwargs) -> List[float]:
16
+ """Calculates the Brier score per row for a classification model.
17
+
18
+ The Brier score is a proper score function that measures the accuracy of
19
+ probabilistic predictions. It is calculated as the mean squared difference
20
+ between predicted probabilities and the actual binary outcomes.
21
+ Lower scores indicate better calibration.
22
+
23
+ Args:
24
+ model: The classification model to evaluate
25
+ dataset: The dataset containing true labels and predicted probabilities
26
+ **kwargs: Additional parameters (unused for compatibility)
27
+
28
+ Returns:
29
+ List[float]: Per-row Brier scores as a list of float values
30
+
31
+ Raises:
32
+ ValueError: If probability column is not found for the model
33
+ """
34
+ y_true = dataset.y
35
+
36
+ # Try to get probabilities
37
+ try:
38
+ y_prob = dataset.y_prob(model)
39
+ # For binary classification, use the positive class probability
40
+ if y_prob.ndim > 1 and y_prob.shape[1] > 1:
41
+ y_prob = y_prob[:, 1] # Use probability of positive class
42
+ except ValueError:
43
+ # Fall back to predictions if probabilities not available
44
+ # Convert predictions to "probabilities" (1.0 for predicted class, 0.0 for other)
45
+ y_pred = dataset.y_pred(model)
46
+ y_prob = y_pred.astype(float)
47
+
48
+ # Convert to numpy arrays and ensure same data type
49
+ y_true = np.asarray(y_true, dtype=float)
50
+ y_prob = np.asarray(y_prob, dtype=float)
51
+
52
+ # Calculate Brier score per row: (predicted_probability - actual_outcome)²
53
+ brier_scores = (y_prob - y_true) ** 2
54
+
55
+ # Return as a list of floats
56
+ return brier_scores.tolist()
@@ -0,0 +1,77 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from typing import List
6
+
7
+ import numpy as np
8
+
9
+ from validmind import tags, tasks
10
+ from validmind.vm_models import VMDataset, VMModel
11
+
12
+
13
+ @tasks("classification")
14
+ @tags("classification")
15
+ def CalibrationError(
16
+ model: VMModel, dataset: VMDataset, n_bins: int = 10, **kwargs
17
+ ) -> List[float]:
18
+ """Calculates the calibration error per row for a classification model.
19
+
20
+ Calibration error measures how well the predicted probabilities reflect the
21
+ actual likelihood of the positive class. For each prediction, this computes
22
+ the absolute difference between the predicted probability and the empirical
23
+ frequency of the positive class in the corresponding probability bin.
24
+
25
+ Args:
26
+ model: The classification model to evaluate
27
+ dataset: The dataset containing true labels and predicted probabilities
28
+ n_bins: Number of bins for probability calibration, defaults to 10
29
+ **kwargs: Additional parameters (unused for compatibility)
30
+
31
+ Returns:
32
+ List[float]: Per-row calibration errors as a list of float values
33
+
34
+ Raises:
35
+ ValueError: If probability column is not found for the model
36
+ """
37
+ y_true = dataset.y
38
+
39
+ # Try to get probabilities
40
+ try:
41
+ y_prob = dataset.y_prob(model)
42
+ # For binary classification, use the positive class probability
43
+ if y_prob.ndim > 1 and y_prob.shape[1] > 1:
44
+ y_prob = y_prob[:, 1] # Use probability of positive class
45
+ except ValueError:
46
+ # If no probabilities available, return zeros (perfect calibration for hard predictions)
47
+ return [0.0] * len(y_true)
48
+
49
+ # Convert to numpy arrays
50
+ y_true = np.asarray(y_true, dtype=float)
51
+ y_prob = np.asarray(y_prob, dtype=float)
52
+
53
+ # Create probability bins
54
+ bin_boundaries = np.linspace(0, 1, n_bins + 1)
55
+ bin_lowers = bin_boundaries[:-1]
56
+ bin_uppers = bin_boundaries[1:]
57
+
58
+ # Calculate calibration error for each sample
59
+ calibration_errors = np.zeros_like(y_prob)
60
+
61
+ for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
62
+ # Find samples in this bin
63
+ in_bin = (y_prob > bin_lower) & (y_prob <= bin_upper)
64
+ if not np.any(in_bin):
65
+ continue
66
+
67
+ # Calculate empirical frequency for this bin
68
+ empirical_freq = np.mean(y_true[in_bin])
69
+
70
+ # Calculate average predicted probability for this bin
71
+ avg_predicted_prob = np.mean(y_prob[in_bin])
72
+
73
+ # Assign calibration error to all samples in this bin
74
+ calibration_errors[in_bin] = abs(avg_predicted_prob - empirical_freq)
75
+
76
+ # Return as a list of floats
77
+ return calibration_errors.tolist()
@@ -0,0 +1,65 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from typing import List
6
+
7
+ import numpy as np
8
+
9
+ from validmind import tags, tasks
10
+ from validmind.vm_models import VMDataset, VMModel
11
+
12
+
13
+ @tasks("classification")
14
+ @tags("classification")
15
+ def ClassBalance(model: VMModel, dataset: VMDataset, **kwargs) -> List[float]:
16
+ """Calculates the class balance score per row for a classification model.
17
+
18
+ For each prediction, this returns how balanced the predicted class is in the
19
+ training distribution. Lower scores indicate predictions on rare classes,
20
+ higher scores indicate predictions on common classes. This helps understand
21
+ if model errors are more likely on imbalanced classes.
22
+
23
+ Args:
24
+ model: The classification model to evaluate
25
+ dataset: The dataset containing true labels and predictions
26
+ **kwargs: Additional parameters (unused for compatibility)
27
+
28
+ Returns:
29
+ List[float]: Per-row class balance scores as a list of float values
30
+
31
+ Note:
32
+ Scores range from 0 to 0.5, where 0.5 indicates perfectly balanced classes
33
+ and lower values indicate more imbalanced classes.
34
+ """
35
+ y_true = dataset.y
36
+ y_pred = dataset.y_pred(model)
37
+
38
+ # Convert to numpy arrays
39
+ y_true = np.asarray(y_true)
40
+ y_pred = np.asarray(y_pred)
41
+
42
+ # Calculate class frequencies in the true labels (proxy for training distribution)
43
+ unique_classes, class_counts = np.unique(y_true, return_counts=True)
44
+ class_frequencies = class_counts / len(y_true)
45
+
46
+ # Create a mapping from class to frequency
47
+ class_to_freq = dict(zip(unique_classes, class_frequencies))
48
+
49
+ # Calculate balance score for each prediction
50
+ balance_scores = []
51
+
52
+ for pred in y_pred:
53
+ if pred in class_to_freq:
54
+ freq = class_to_freq[pred]
55
+ # Balance score: how close to 0.5 (perfectly balanced) the frequency is
56
+ # Score = 0.5 - |freq - 0.5| = min(freq, 1-freq)
57
+ balance_score = min(freq, 1 - freq)
58
+ else:
59
+ # Predicted class not seen in true labels (very rare)
60
+ balance_score = 0.0
61
+
62
+ balance_scores.append(balance_score)
63
+
64
+ # Return as a list of floats
65
+ return balance_scores
@@ -0,0 +1,52 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from typing import List
6
+
7
+ import numpy as np
8
+
9
+ from validmind import tags, tasks
10
+ from validmind.vm_models import VMDataset, VMModel
11
+
12
+
13
+ @tasks("classification")
14
+ @tags("classification")
15
+ def Confidence(model: VMModel, dataset: VMDataset, **kwargs) -> List[float]:
16
+ """Calculates the prediction confidence per row for a classification model.
17
+
18
+ For binary classification, confidence is calculated as the maximum probability
19
+ across classes, or alternatively as the distance from the decision boundary (0.5).
20
+ Higher values indicate more confident predictions.
21
+
22
+ Args:
23
+ model: The classification model to evaluate
24
+ dataset: The dataset containing true labels and predicted probabilities
25
+ **kwargs: Additional parameters (unused for compatibility)
26
+
27
+ Returns:
28
+ List[float]: Per-row confidence scores as a list of float values
29
+
30
+ Raises:
31
+ ValueError: If probability column is not found for the model
32
+ """
33
+ # Try to get probabilities, fall back to predictions if not available
34
+ try:
35
+ y_prob = dataset.y_prob(model)
36
+ # For binary classification, use max probability approach
37
+ if y_prob.ndim > 1 and y_prob.shape[1] > 1:
38
+ # Multi-class: confidence is the maximum probability
39
+ confidence = np.max(y_prob, axis=1)
40
+ else:
41
+ # Binary classification: confidence based on distance from 0.5
42
+ y_prob = np.asarray(y_prob, dtype=float)
43
+ confidence = np.abs(y_prob - 0.5) + 0.5
44
+ except ValueError:
45
+ # Fall back to binary correctness if probabilities not available
46
+ y_true = dataset.y
47
+ y_pred = dataset.y_pred(model)
48
+ # If no probabilities, confidence is 1.0 for correct, 0.0 for incorrect
49
+ confidence = (y_true == y_pred).astype(float)
50
+
51
+ # Return as a list of floats
52
+ return confidence.tolist()
@@ -0,0 +1,41 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from typing import List
6
+
7
+ import numpy as np
8
+
9
+ from validmind import tags, tasks
10
+ from validmind.vm_models import VMDataset, VMModel
11
+
12
+
13
+ @tasks("classification")
14
+ @tags("classification")
15
+ def Correctness(model: VMModel, dataset: VMDataset, **kwargs) -> List[int]:
16
+ """Calculates the correctness per row for a classification model.
17
+
18
+ For classification tasks, this returns 1 for correctly classified rows
19
+ and 0 for incorrectly classified rows. This provides a binary indicator
20
+ of model performance for each individual prediction.
21
+
22
+ Args:
23
+ model: The classification model to evaluate
24
+ dataset: The dataset containing true labels and predictions
25
+ **kwargs: Additional parameters (unused for compatibility)
26
+
27
+ Returns:
28
+ List[int]: Per-row correctness as a list of 1s and 0s
29
+ """
30
+ y_true = dataset.y
31
+ y_pred = dataset.y_pred(model)
32
+
33
+ # Convert to numpy arrays
34
+ y_true = np.asarray(y_true)
35
+ y_pred = np.asarray(y_pred)
36
+
37
+ # For classification, check if predictions match true labels
38
+ correctness = (y_true == y_pred).astype(int)
39
+
40
+ # Return as a list of integers
41
+ return correctness.tolist()
@@ -0,0 +1,61 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from typing import List
6
+
7
+ import numpy as np
8
+
9
+ from validmind import tags, tasks
10
+ from validmind.vm_models import VMDataset, VMModel
11
+
12
+
13
+ @tasks("classification")
14
+ @tags("classification")
15
+ def LogLoss(
16
+ model: VMModel, dataset: VMDataset, eps: float = 1e-15, **kwargs
17
+ ) -> List[float]:
18
+ """Calculates the logarithmic loss per row for a classification model.
19
+
20
+ Log loss measures the performance of a classification model where the prediction
21
+ is a probability value between 0 and 1. The log loss increases as the predicted
22
+ probability diverges from the actual label.
23
+
24
+ Args:
25
+ model: The classification model to evaluate
26
+ dataset: The dataset containing true labels and predicted probabilities
27
+ eps: Small value to avoid log(0), defaults to 1e-15
28
+ **kwargs: Additional parameters (unused for compatibility)
29
+
30
+ Returns:
31
+ List[float]: Per-row log loss values as a list of float values
32
+
33
+ Raises:
34
+ ValueError: If probability column is not found for the model
35
+ """
36
+ y_true = dataset.y
37
+
38
+ # Try to get probabilities
39
+ try:
40
+ y_prob = dataset.y_prob(model)
41
+ # For binary classification, use the positive class probability
42
+ if y_prob.ndim > 1 and y_prob.shape[1] > 1:
43
+ y_prob = y_prob[:, 1] # Use probability of positive class
44
+ except ValueError:
45
+ # Fall back to predictions if probabilities not available
46
+ # Convert predictions to "probabilities" (0.99 for correct class, 0.01 for wrong)
47
+ y_pred = dataset.y_pred(model)
48
+ y_prob = np.where(y_true == y_pred, 0.99, 0.01)
49
+
50
+ # Convert to numpy arrays and ensure same data type
51
+ y_true = np.asarray(y_true, dtype=float)
52
+ y_prob = np.asarray(y_prob, dtype=float)
53
+
54
+ # Clip probabilities to avoid log(0) and log(1)
55
+ y_prob = np.clip(y_prob, eps, 1 - eps)
56
+
57
+ # Calculate log loss per row: -[y*log(p) + (1-y)*log(1-p)]
58
+ log_loss_per_row = -(y_true * np.log(y_prob) + (1 - y_true) * np.log(1 - y_prob))
59
+
60
+ # Return as a list of floats
61
+ return log_loss_per_row.tolist()
@@ -0,0 +1,86 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from typing import List
6
+
7
+ import numpy as np
8
+ from sklearn.ensemble import IsolationForest
9
+ from sklearn.preprocessing import StandardScaler
10
+
11
+ from validmind import tags, tasks
12
+ from validmind.vm_models import VMDataset, VMModel
13
+
14
+
15
+ @tasks("classification")
16
+ @tags("classification")
17
+ def OutlierScore(
18
+ model: VMModel, dataset: VMDataset, contamination: float = 0.1, **kwargs
19
+ ) -> List[float]:
20
+ """Calculates the outlier score per row for a classification model.
21
+
22
+ Uses Isolation Forest to identify samples that deviate significantly from
23
+ the typical patterns in the feature space. Higher scores indicate more
24
+ anomalous/outlier-like samples. This can help identify out-of-distribution
25
+ samples or data points that might be harder to predict accurately.
26
+
27
+ Args:
28
+ model: The classification model to evaluate (unused but kept for consistency)
29
+ dataset: The dataset containing feature data
30
+ contamination: Expected proportion of outliers, defaults to 0.1
31
+ **kwargs: Additional parameters (unused for compatibility)
32
+
33
+ Returns:
34
+ List[float]: Per-row outlier scores as a list of float values
35
+
36
+ Note:
37
+ Scores are normalized to [0, 1] where higher values indicate more outlier-like samples
38
+ """
39
+ # Get feature data
40
+ X = dataset.x_df()
41
+
42
+ # Handle case where we have no features or only categorical features
43
+ if X.empty or X.shape[1] == 0:
44
+ # Return zero outlier scores if no features available
45
+ return [0.0] * len(dataset.y)
46
+
47
+ # Select only numeric features for outlier detection
48
+ numeric_features = dataset.feature_columns_numeric
49
+ if not numeric_features:
50
+ # If no numeric features, return zero outlier scores
51
+ return [0.0] * len(dataset.y)
52
+
53
+ X_numeric = X[numeric_features]
54
+
55
+ # Handle missing values by filling with median
56
+ X_filled = X_numeric.fillna(X_numeric.median())
57
+
58
+ # Standardize features for better outlier detection
59
+ scaler = StandardScaler()
60
+ X_scaled = scaler.fit_transform(X_filled)
61
+
62
+ # Fit Isolation Forest
63
+ isolation_forest = IsolationForest(
64
+ contamination=contamination, random_state=42, n_estimators=100
65
+ )
66
+
67
+ # Fit the model on the data
68
+ isolation_forest.fit(X_scaled)
69
+
70
+ # Get anomaly scores (negative values for outliers)
71
+ anomaly_scores = isolation_forest.decision_function(X_scaled)
72
+
73
+ # Convert to outlier scores (0 to 1, where 1 is most outlier-like)
74
+ # Normalize using min-max scaling
75
+ min_score = np.min(anomaly_scores)
76
+ max_score = np.max(anomaly_scores)
77
+
78
+ if max_score == min_score:
79
+ # All samples have same score, no outliers detected
80
+ outlier_scores = np.zeros_like(anomaly_scores)
81
+ else:
82
+ # Invert and normalize: higher values = more outlier-like
83
+ outlier_scores = (max_score - anomaly_scores) / (max_score - min_score)
84
+
85
+ # Return as a list of floats
86
+ return outlier_scores.tolist()