validmind 2.8.29__py3-none-any.whl → 2.10.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. validmind/__init__.py +16 -5
  2. validmind/__version__.py +1 -1
  3. validmind/ai/utils.py +4 -24
  4. validmind/api_client.py +6 -17
  5. validmind/datasets/credit_risk/lending_club.py +13 -1
  6. validmind/datasets/nlp/cnn_dailymail.py +15 -1
  7. validmind/logging.py +48 -0
  8. validmind/tests/__init__.py +2 -0
  9. validmind/tests/__types__.py +18 -0
  10. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +14 -2
  11. validmind/tests/data_validation/DickeyFullerGLS.py +13 -2
  12. validmind/tests/data_validation/PhillipsPerronArch.py +13 -2
  13. validmind/tests/data_validation/SeasonalDecompose.py +14 -2
  14. validmind/tests/data_validation/ShapiroWilk.py +14 -1
  15. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +14 -1
  16. validmind/tests/data_validation/WOEBinPlots.py +14 -1
  17. validmind/tests/data_validation/WOEBinTable.py +13 -2
  18. validmind/tests/data_validation/ZivotAndrewsArch.py +13 -2
  19. validmind/tests/data_validation/nlp/CommonWords.py +14 -2
  20. validmind/tests/data_validation/nlp/LanguageDetection.py +14 -1
  21. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +13 -1
  22. validmind/tests/data_validation/nlp/Sentiment.py +13 -1
  23. validmind/tests/data_validation/nlp/StopWords.py +14 -2
  24. validmind/tests/data_validation/nlp/TextDescription.py +14 -2
  25. validmind/tests/data_validation/nlp/Toxicity.py +13 -1
  26. validmind/tests/model_validation/BertScore.py +13 -2
  27. validmind/tests/model_validation/BleuScore.py +13 -2
  28. validmind/tests/model_validation/ContextualRecall.py +13 -1
  29. validmind/tests/model_validation/MeteorScore.py +13 -2
  30. validmind/tests/model_validation/ModelPredictionResiduals.py +14 -1
  31. validmind/tests/model_validation/RegardScore.py +13 -2
  32. validmind/tests/model_validation/RougeScore.py +14 -1
  33. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +14 -1
  34. validmind/tests/model_validation/ToxicityScore.py +13 -1
  35. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +14 -2
  36. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +13 -2
  37. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +14 -2
  38. validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +14 -1
  39. validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +14 -1
  40. validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +14 -1
  41. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +14 -1
  42. validmind/tests/output.py +9 -2
  43. validmind/tests/plots/BoxPlot.py +260 -0
  44. validmind/tests/plots/CorrelationHeatmap.py +235 -0
  45. validmind/tests/plots/HistogramPlot.py +233 -0
  46. validmind/tests/plots/ViolinPlot.py +125 -0
  47. validmind/tests/plots/__init__.py +0 -0
  48. validmind/tests/stats/CorrelationAnalysis.py +251 -0
  49. validmind/tests/stats/DescriptiveStats.py +197 -0
  50. validmind/tests/stats/NormalityTests.py +147 -0
  51. validmind/tests/stats/OutlierDetection.py +173 -0
  52. validmind/tests/stats/__init__.py +0 -0
  53. validmind/unit_metrics/classification/individual/AbsoluteError.py +42 -0
  54. validmind/unit_metrics/classification/individual/BrierScore.py +56 -0
  55. validmind/unit_metrics/classification/individual/CalibrationError.py +77 -0
  56. validmind/unit_metrics/classification/individual/ClassBalance.py +65 -0
  57. validmind/unit_metrics/classification/individual/Confidence.py +52 -0
  58. validmind/unit_metrics/classification/individual/Correctness.py +41 -0
  59. validmind/unit_metrics/classification/individual/LogLoss.py +61 -0
  60. validmind/unit_metrics/classification/individual/OutlierScore.py +86 -0
  61. validmind/unit_metrics/classification/individual/ProbabilityError.py +54 -0
  62. validmind/unit_metrics/classification/individual/Uncertainty.py +60 -0
  63. validmind/unit_metrics/classification/individual/__init__.py +0 -0
  64. validmind/vm_models/dataset/dataset.py +147 -1
  65. validmind/vm_models/result/result.py +30 -6
  66. validmind-2.10.0rc1.dist-info/METADATA +845 -0
  67. {validmind-2.8.29.dist-info → validmind-2.10.0rc1.dist-info}/RECORD +70 -49
  68. validmind-2.8.29.dist-info/METADATA +0 -137
  69. {validmind-2.8.29.dist-info → validmind-2.10.0rc1.dist-info}/LICENSE +0 -0
  70. {validmind-2.8.29.dist-info → validmind-2.10.0rc1.dist-info}/WHEEL +0 -0
  71. {validmind-2.8.29.dist-info → validmind-2.10.0rc1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,197 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ from scipy import stats
10
+
11
+ from validmind import tags, tasks
12
+ from validmind.errors import SkipTestError
13
+ from validmind.utils import format_records
14
+ from validmind.vm_models import VMDataset
15
+
16
+
17
+ def _validate_columns(dataset: VMDataset, columns: Optional[List[str]]):
18
+ """Validate and return numerical columns (excluding boolean columns)."""
19
+ if columns is None:
20
+ # Get all columns marked as numeric
21
+ numeric_columns = dataset.feature_columns_numeric
22
+ else:
23
+ available_columns = set(dataset.feature_columns_numeric)
24
+ numeric_columns = [col for col in columns if col in available_columns]
25
+
26
+ # Filter out boolean columns as they can't have proper statistical measures computed
27
+ columns = []
28
+ for col in numeric_columns:
29
+ dtype = dataset.df[col].dtype
30
+ # Only include integer and float types, exclude boolean
31
+ if pd.api.types.is_integer_dtype(dtype) or pd.api.types.is_float_dtype(dtype):
32
+ columns.append(col)
33
+
34
+ if not columns:
35
+ raise SkipTestError(
36
+ "No numerical columns (integer/float) found for descriptive statistics"
37
+ )
38
+
39
+ return columns
40
+
41
+
42
+ def _compute_basic_stats(column: str, data, total_count: int):
43
+ """Compute basic statistics for a column."""
44
+ return {
45
+ "Feature": column,
46
+ "Count": len(data),
47
+ "Missing": total_count - len(data),
48
+ "Missing %": ((total_count - len(data)) / total_count) * 100,
49
+ "Mean": data.mean(),
50
+ "Median": data.median(),
51
+ "Std": data.std(),
52
+ "Min": data.min(),
53
+ "Max": data.max(),
54
+ "Q1": data.quantile(0.25),
55
+ "Q3": data.quantile(0.75),
56
+ "IQR": data.quantile(0.75) - data.quantile(0.25),
57
+ }
58
+
59
+
60
+ def _compute_advanced_stats(column: str, data, confidence_level: float):
61
+ """Compute advanced statistics for a column."""
62
+ try:
63
+ # Distribution measures
64
+ skewness = stats.skew(data)
65
+ kurtosis_val = stats.kurtosis(data)
66
+ cv = (data.std() / data.mean()) * 100 if data.mean() != 0 else np.nan
67
+
68
+ # Confidence interval for mean
69
+ ci_lower, ci_upper = stats.t.interval(
70
+ confidence_level,
71
+ len(data) - 1,
72
+ loc=data.mean(),
73
+ scale=data.std() / np.sqrt(len(data)),
74
+ )
75
+
76
+ # Normality test
77
+ if len(data) <= 5000:
78
+ normality_stat, normality_p = stats.shapiro(data)
79
+ normality_test = "Shapiro-Wilk"
80
+ else:
81
+ ad_result = stats.anderson(data, dist="norm")
82
+ normality_stat = ad_result.statistic
83
+ normality_p = 0.05 if normality_stat > ad_result.critical_values[2] else 0.1
84
+ normality_test = "Anderson-Darling"
85
+
86
+ # Outlier detection using IQR method
87
+ iqr = data.quantile(0.75) - data.quantile(0.25)
88
+ lower_bound = data.quantile(0.25) - 1.5 * iqr
89
+ upper_bound = data.quantile(0.75) + 1.5 * iqr
90
+ outliers = data[(data < lower_bound) | (data > upper_bound)]
91
+ outlier_count = len(outliers)
92
+ outlier_pct = (outlier_count / len(data)) * 100
93
+
94
+ return {
95
+ "Feature": column,
96
+ "Skewness": skewness,
97
+ "Kurtosis": kurtosis_val,
98
+ "CV %": cv,
99
+ f"CI Lower ({confidence_level*100:.0f}%)": ci_lower,
100
+ f"CI Upper ({confidence_level*100:.0f}%)": ci_upper,
101
+ "Normality Test": normality_test,
102
+ "Normality Stat": normality_stat,
103
+ "Normality p-value": normality_p,
104
+ "Normal Distribution": "Yes" if normality_p > 0.05 else "No",
105
+ "Outliers (IQR)": outlier_count,
106
+ "Outliers %": outlier_pct,
107
+ }
108
+ except Exception:
109
+ return None
110
+
111
+
112
+ @tags("tabular_data", "statistics", "data_quality")
113
+ @tasks("classification", "regression", "clustering")
114
+ def DescriptiveStats(
115
+ dataset: VMDataset,
116
+ columns: Optional[List[str]] = None,
117
+ include_advanced: bool = True,
118
+ confidence_level: float = 0.95,
119
+ ) -> Dict[str, Any]:
120
+ """
121
+ Provides comprehensive descriptive statistics for numerical features in a dataset.
122
+
123
+ ### Purpose
124
+
125
+ This test generates detailed descriptive statistics for numerical features, including
126
+ basic statistics, distribution measures, confidence intervals, and normality tests.
127
+ It provides a comprehensive overview of data characteristics essential for
128
+ understanding data quality and distribution properties.
129
+
130
+ ### Test Mechanism
131
+
132
+ The test computes various statistical measures for each numerical column:
133
+ - Basic statistics: count, mean, median, std, min, max, quartiles
134
+ - Distribution measures: skewness, kurtosis, coefficient of variation
135
+ - Confidence intervals for the mean
136
+ - Normality tests (Shapiro-Wilk for small samples, Anderson-Darling for larger)
137
+ - Missing value analysis
138
+
139
+ ### Signs of High Risk
140
+
141
+ - High skewness or kurtosis indicating non-normal distributions
142
+ - Large coefficients of variation suggesting high data variability
143
+ - Significant results in normality tests when normality is expected
144
+ - High percentage of missing values
145
+ - Extreme outliers based on IQR analysis
146
+
147
+ ### Strengths
148
+
149
+ - Comprehensive statistical analysis in a single test
150
+ - Includes advanced statistical measures beyond basic descriptives
151
+ - Provides confidence intervals for uncertainty quantification
152
+ - Handles missing values appropriately
153
+ - Suitable for both exploratory and confirmatory analysis
154
+
155
+ ### Limitations
156
+
157
+ - Limited to numerical features only
158
+ - Normality tests may not be meaningful for all data types
159
+ - Large datasets may make some tests computationally expensive
160
+ - Interpretation requires statistical knowledge
161
+ """
162
+ # Validate inputs
163
+ columns = _validate_columns(dataset, columns)
164
+
165
+ # Compute statistics
166
+ basic_stats = []
167
+ advanced_stats = []
168
+
169
+ for column in columns:
170
+ data = dataset.df[column].dropna()
171
+ total_count = len(dataset.df[column])
172
+
173
+ if len(data) == 0:
174
+ continue
175
+
176
+ # Basic statistics
177
+ basic_row = _compute_basic_stats(column, data, total_count)
178
+ basic_stats.append(basic_row)
179
+
180
+ # Advanced statistics
181
+ if include_advanced and len(data) > 2:
182
+ advanced_row = _compute_advanced_stats(column, data, confidence_level)
183
+ if advanced_row is not None:
184
+ advanced_stats.append(advanced_row)
185
+
186
+ # Format results
187
+ results = {}
188
+ if basic_stats:
189
+ results["Basic Statistics"] = format_records(pd.DataFrame(basic_stats))
190
+
191
+ if advanced_stats and include_advanced:
192
+ results["Advanced Statistics"] = format_records(pd.DataFrame(advanced_stats))
193
+
194
+ if not results:
195
+ raise SkipTestError("Unable to compute statistics for any columns")
196
+
197
+ return results
@@ -0,0 +1,147 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ import pandas as pd
8
+ from scipy import stats
9
+
10
+ from validmind import tags, tasks
11
+ from validmind.errors import SkipTestError
12
+ from validmind.utils import format_records
13
+ from validmind.vm_models import VMDataset
14
+
15
+
16
+ def _validate_columns(dataset: VMDataset, columns: Optional[List[str]]):
17
+ """Validate and return numerical columns."""
18
+ if columns is None:
19
+ columns = dataset.feature_columns_numeric
20
+ else:
21
+ available_columns = set(dataset.feature_columns_numeric)
22
+ columns = [col for col in columns if col in available_columns]
23
+
24
+ if not columns:
25
+ raise SkipTestError("No numerical columns found for normality testing")
26
+
27
+ return columns
28
+
29
+
30
+ def _run_shapiro_test(data, tests: List[str], alpha: float):
31
+ """Run Shapiro-Wilk test if requested and data size is appropriate."""
32
+ results = {}
33
+ if "shapiro" in tests and len(data) <= 5000:
34
+ try:
35
+ stat, p_value = stats.shapiro(data)
36
+ results["Shapiro-Wilk Stat"] = stat
37
+ results["Shapiro-Wilk p-value"] = p_value
38
+ results["Shapiro-Wilk Normal"] = "Yes" if p_value > alpha else "No"
39
+ except Exception:
40
+ results["Shapiro-Wilk Normal"] = "Test Failed"
41
+ return results
42
+
43
+
44
+ def _run_anderson_test(data, tests: List[str]):
45
+ """Run Anderson-Darling test if requested."""
46
+ results = {}
47
+ if "anderson" in tests:
48
+ try:
49
+ ad_result = stats.anderson(data, dist="norm")
50
+ critical_value = ad_result.critical_values[2] # 5% level
51
+ results["Anderson-Darling Stat"] = ad_result.statistic
52
+ results["Anderson-Darling Critical"] = critical_value
53
+ results["Anderson-Darling Normal"] = (
54
+ "Yes" if ad_result.statistic < critical_value else "No"
55
+ )
56
+ except Exception:
57
+ results["Anderson-Darling Normal"] = "Test Failed"
58
+ return results
59
+
60
+
61
+ def _run_ks_test(data, tests: List[str], alpha: float):
62
+ """Run Kolmogorov-Smirnov test if requested."""
63
+ results = {}
64
+ if "kstest" in tests:
65
+ try:
66
+ standardized = (data - data.mean()) / data.std()
67
+ stat, p_value = stats.kstest(standardized, "norm")
68
+ results["KS Test Stat"] = stat
69
+ results["KS Test p-value"] = p_value
70
+ results["KS Test Normal"] = "Yes" if p_value > alpha else "No"
71
+ except Exception:
72
+ results["KS Test Normal"] = "Test Failed"
73
+ return results
74
+
75
+
76
+ def _process_column_tests(column: str, data, tests: List[str], alpha: float):
77
+ """Process all normality tests for a single column."""
78
+ result_row = {"Feature": column, "Sample Size": len(data)}
79
+
80
+ # Run individual tests
81
+ result_row.update(_run_shapiro_test(data, tests, alpha))
82
+ result_row.update(_run_anderson_test(data, tests))
83
+ result_row.update(_run_ks_test(data, tests, alpha))
84
+
85
+ return result_row
86
+
87
+
88
+ @tags("tabular_data", "statistics", "normality")
89
+ @tasks("classification", "regression", "clustering")
90
+ def NormalityTests(
91
+ dataset: VMDataset,
92
+ columns: Optional[List[str]] = None,
93
+ alpha: float = 0.05,
94
+ tests: List[str] = ["shapiro", "anderson", "kstest"],
95
+ ) -> Dict[str, Any]:
96
+ """
97
+ Performs multiple normality tests on numerical features to assess distribution normality.
98
+
99
+ ### Purpose
100
+
101
+ This test evaluates whether numerical features follow a normal distribution using
102
+ various statistical tests. Understanding distribution normality is crucial for
103
+ selecting appropriate statistical methods and model assumptions.
104
+
105
+ ### Test Mechanism
106
+
107
+ The test applies multiple normality tests:
108
+ - Shapiro-Wilk test: Best for small to medium samples
109
+ - Anderson-Darling test: More sensitive to deviations in tails
110
+ - Kolmogorov-Smirnov test: General goodness-of-fit test
111
+
112
+ ### Signs of High Risk
113
+
114
+ - Multiple normality tests failing consistently
115
+ - Very low p-values indicating strong evidence against normality
116
+ - Conflicting results between different normality tests
117
+
118
+ ### Strengths
119
+
120
+ - Multiple statistical tests for robust assessment
121
+ - Clear pass/fail indicators for each test
122
+ - Suitable for different sample sizes
123
+
124
+ ### Limitations
125
+
126
+ - Limited to numerical features only
127
+ - Some tests sensitive to sample size
128
+ - Perfect normality is rare in real data
129
+ """
130
+ # Validate inputs
131
+ columns = _validate_columns(dataset, columns)
132
+
133
+ # Process each column
134
+ normality_results = []
135
+ for column in columns:
136
+ data = dataset.df[column].dropna()
137
+
138
+ if len(data) >= 3:
139
+ result_row = _process_column_tests(column, data, tests, alpha)
140
+ normality_results.append(result_row)
141
+
142
+ # Format results
143
+ results = {}
144
+ if normality_results:
145
+ results["Normality Tests"] = format_records(pd.DataFrame(normality_results))
146
+
147
+ return results
@@ -0,0 +1,173 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ from scipy import stats
10
+ from sklearn.ensemble import IsolationForest
11
+
12
+ from validmind import tags, tasks
13
+ from validmind.errors import SkipTestError
14
+ from validmind.utils import format_records
15
+ from validmind.vm_models import VMDataset
16
+
17
+
18
+ def _validate_columns(dataset: VMDataset, columns: Optional[List[str]]):
19
+ """Validate and return numerical columns."""
20
+ if columns is None:
21
+ columns = dataset.feature_columns_numeric
22
+ else:
23
+ available_columns = set(dataset.feature_columns_numeric)
24
+ columns = [col for col in columns if col in available_columns]
25
+
26
+ # Filter out boolean columns as they can't be used for outlier detection
27
+ numeric_columns = []
28
+ for col in columns:
29
+ if col in dataset.df.columns:
30
+ col_dtype = dataset.df[col].dtype
31
+ # Exclude boolean and object types, keep only true numeric types
32
+ if pd.api.types.is_numeric_dtype(col_dtype) and col_dtype != bool:
33
+ numeric_columns.append(col)
34
+
35
+ columns = numeric_columns
36
+
37
+ if not columns:
38
+ raise SkipTestError("No suitable numerical columns found for outlier detection")
39
+
40
+ return columns
41
+
42
+
43
+ def _detect_iqr_outliers(data, iqr_threshold: float):
44
+ """Detect outliers using IQR method."""
45
+ q1, q3 = data.quantile(0.25), data.quantile(0.75)
46
+ iqr = q3 - q1
47
+ lower_bound = q1 - iqr_threshold * iqr
48
+ upper_bound = q3 + iqr_threshold * iqr
49
+ # Fix numpy boolean operation error by using pandas boolean indexing properly
50
+ outlier_mask = (data < lower_bound) | (data > upper_bound)
51
+ iqr_outliers = data[outlier_mask]
52
+ return len(iqr_outliers), (len(iqr_outliers) / len(data)) * 100
53
+
54
+
55
+ def _detect_zscore_outliers(data, zscore_threshold: float):
56
+ """Detect outliers using Z-score method."""
57
+ z_scores = np.abs(stats.zscore(data))
58
+ # Fix potential numpy boolean operation error
59
+ outlier_mask = z_scores > zscore_threshold
60
+ zscore_outliers = data[outlier_mask]
61
+ return len(zscore_outliers), (len(zscore_outliers) / len(data)) * 100
62
+
63
+
64
+ def _detect_isolation_forest_outliers(data, contamination: float):
65
+ """Detect outliers using Isolation Forest method."""
66
+ if len(data) <= 10:
67
+ return 0, 0
68
+
69
+ try:
70
+ iso_forest = IsolationForest(contamination=contamination, random_state=42)
71
+ outlier_pred = iso_forest.fit_predict(data.values.reshape(-1, 1))
72
+ iso_outliers = data[outlier_pred == -1]
73
+ return len(iso_outliers), (len(iso_outliers) / len(data)) * 100
74
+ except Exception:
75
+ return 0, 0
76
+
77
+
78
+ def _process_column_outliers(
79
+ column: str,
80
+ data,
81
+ methods: List[str],
82
+ iqr_threshold: float,
83
+ zscore_threshold: float,
84
+ contamination: float,
85
+ ):
86
+ """Process outlier detection for a single column."""
87
+ outliers_dict = {"Feature": column, "Total Count": len(data)}
88
+
89
+ # IQR method
90
+ if "iqr" in methods:
91
+ count, percentage = _detect_iqr_outliers(data, iqr_threshold)
92
+ outliers_dict["IQR Outliers"] = count
93
+ outliers_dict["IQR %"] = percentage
94
+
95
+ # Z-score method
96
+ if "zscore" in methods:
97
+ count, percentage = _detect_zscore_outliers(data, zscore_threshold)
98
+ outliers_dict["Z-Score Outliers"] = count
99
+ outliers_dict["Z-Score %"] = percentage
100
+
101
+ # Isolation Forest method
102
+ if "isolation_forest" in methods:
103
+ count, percentage = _detect_isolation_forest_outliers(data, contamination)
104
+ outliers_dict["Isolation Forest Outliers"] = count
105
+ outliers_dict["Isolation Forest %"] = percentage
106
+
107
+ return outliers_dict
108
+
109
+
110
+ @tags("tabular_data", "statistics", "outliers")
111
+ @tasks("classification", "regression", "clustering")
112
+ def OutlierDetection(
113
+ dataset: VMDataset,
114
+ columns: Optional[List[str]] = None,
115
+ methods: List[str] = ["iqr", "zscore", "isolation_forest"],
116
+ iqr_threshold: float = 1.5,
117
+ zscore_threshold: float = 3.0,
118
+ contamination: float = 0.1,
119
+ ) -> Dict[str, Any]:
120
+ """
121
+ Detects outliers in numerical features using multiple statistical methods.
122
+
123
+ ### Purpose
124
+
125
+ This test identifies outliers in numerical features using various statistical
126
+ methods including IQR, Z-score, and Isolation Forest. It provides comprehensive
127
+ outlier detection to help identify data quality issues and potential anomalies.
128
+
129
+ ### Test Mechanism
130
+
131
+ The test applies multiple outlier detection methods:
132
+ - IQR method: Values beyond Q1 - 1.5*IQR or Q3 + 1.5*IQR
133
+ - Z-score method: Values with |z-score| > threshold
134
+ - Isolation Forest: ML-based anomaly detection
135
+
136
+ ### Signs of High Risk
137
+
138
+ - High percentage of outliers indicating data quality issues
139
+ - Inconsistent outlier detection across methods
140
+ - Extreme outliers that significantly deviate from normal patterns
141
+
142
+ ### Strengths
143
+
144
+ - Multiple detection methods for robust outlier identification
145
+ - Customizable thresholds for different sensitivity levels
146
+ - Clear summary of outlier patterns across features
147
+
148
+ ### Limitations
149
+
150
+ - Limited to numerical features only
151
+ - Some methods assume normal distributions
152
+ - Threshold selection can be subjective
153
+ """
154
+ # Validate inputs
155
+ columns = _validate_columns(dataset, columns)
156
+
157
+ # Process each column
158
+ outlier_summary = []
159
+ for column in columns:
160
+ data = dataset._df[column].dropna()
161
+
162
+ if len(data) >= 3:
163
+ outliers_dict = _process_column_outliers(
164
+ column, data, methods, iqr_threshold, zscore_threshold, contamination
165
+ )
166
+ outlier_summary.append(outliers_dict)
167
+
168
+ # Format results
169
+ results = {}
170
+ if outlier_summary:
171
+ results["Outlier Summary"] = format_records(pd.DataFrame(outlier_summary))
172
+
173
+ return results
File without changes
@@ -0,0 +1,42 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from typing import List
6
+
7
+ import numpy as np
8
+
9
+ from validmind import tags, tasks
10
+ from validmind.vm_models import VMDataset, VMModel
11
+
12
+
13
+ @tasks("classification")
14
+ @tags("classification")
15
+ def AbsoluteError(model: VMModel, dataset: VMDataset, **kwargs) -> List[float]:
16
+ """Calculates the absolute error per row for a classification model.
17
+
18
+ For classification tasks, this computes the absolute difference between
19
+ the true class labels and predicted class labels for each individual row.
20
+ For binary classification with probabilities, it can also compute the
21
+ absolute difference between true labels and predicted probabilities.
22
+
23
+ Args:
24
+ model: The classification model to evaluate
25
+ dataset: The dataset containing true labels and predictions
26
+ **kwargs: Additional parameters (unused for compatibility)
27
+
28
+ Returns:
29
+ List[float]: Per-row absolute errors as a list of float values
30
+ """
31
+ y_true = dataset.y
32
+ y_pred = dataset.y_pred(model)
33
+
34
+ # Convert to numpy arrays and ensure same data type
35
+ y_true = np.asarray(y_true)
36
+ y_pred = np.asarray(y_pred)
37
+
38
+ # For classification, compute absolute difference between true and predicted labels
39
+ absolute_errors = np.abs(y_true - y_pred)
40
+
41
+ # Return as a list of floats
42
+ return absolute_errors.astype(float).tolist()
@@ -0,0 +1,56 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from typing import List
6
+
7
+ import numpy as np
8
+
9
+ from validmind import tags, tasks
10
+ from validmind.vm_models import VMDataset, VMModel
11
+
12
+
13
+ @tasks("classification")
14
+ @tags("classification")
15
+ def BrierScore(model: VMModel, dataset: VMDataset, **kwargs) -> List[float]:
16
+ """Calculates the Brier score per row for a classification model.
17
+
18
+ The Brier score is a proper score function that measures the accuracy of
19
+ probabilistic predictions. It is calculated as the mean squared difference
20
+ between predicted probabilities and the actual binary outcomes.
21
+ Lower scores indicate better calibration.
22
+
23
+ Args:
24
+ model: The classification model to evaluate
25
+ dataset: The dataset containing true labels and predicted probabilities
26
+ **kwargs: Additional parameters (unused for compatibility)
27
+
28
+ Returns:
29
+ List[float]: Per-row Brier scores as a list of float values
30
+
31
+ Raises:
32
+ ValueError: If probability column is not found for the model
33
+ """
34
+ y_true = dataset.y
35
+
36
+ # Try to get probabilities
37
+ try:
38
+ y_prob = dataset.y_prob(model)
39
+ # For binary classification, use the positive class probability
40
+ if y_prob.ndim > 1 and y_prob.shape[1] > 1:
41
+ y_prob = y_prob[:, 1] # Use probability of positive class
42
+ except ValueError:
43
+ # Fall back to predictions if probabilities not available
44
+ # Convert predictions to "probabilities" (1.0 for predicted class, 0.0 for other)
45
+ y_pred = dataset.y_pred(model)
46
+ y_prob = y_pred.astype(float)
47
+
48
+ # Convert to numpy arrays and ensure same data type
49
+ y_true = np.asarray(y_true, dtype=float)
50
+ y_prob = np.asarray(y_prob, dtype=float)
51
+
52
+ # Calculate Brier score per row: (predicted_probability - actual_outcome)²
53
+ brier_scores = (y_prob - y_true) ** 2
54
+
55
+ # Return as a list of floats
56
+ return brier_scores.tolist()
@@ -0,0 +1,77 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from typing import List
6
+
7
+ import numpy as np
8
+
9
+ from validmind import tags, tasks
10
+ from validmind.vm_models import VMDataset, VMModel
11
+
12
+
13
+ @tasks("classification")
14
+ @tags("classification")
15
+ def CalibrationError(
16
+ model: VMModel, dataset: VMDataset, n_bins: int = 10, **kwargs
17
+ ) -> List[float]:
18
+ """Calculates the calibration error per row for a classification model.
19
+
20
+ Calibration error measures how well the predicted probabilities reflect the
21
+ actual likelihood of the positive class. For each prediction, this computes
22
+ the absolute difference between the predicted probability and the empirical
23
+ frequency of the positive class in the corresponding probability bin.
24
+
25
+ Args:
26
+ model: The classification model to evaluate
27
+ dataset: The dataset containing true labels and predicted probabilities
28
+ n_bins: Number of bins for probability calibration, defaults to 10
29
+ **kwargs: Additional parameters (unused for compatibility)
30
+
31
+ Returns:
32
+ List[float]: Per-row calibration errors as a list of float values
33
+
34
+ Raises:
35
+ ValueError: If probability column is not found for the model
36
+ """
37
+ y_true = dataset.y
38
+
39
+ # Try to get probabilities
40
+ try:
41
+ y_prob = dataset.y_prob(model)
42
+ # For binary classification, use the positive class probability
43
+ if y_prob.ndim > 1 and y_prob.shape[1] > 1:
44
+ y_prob = y_prob[:, 1] # Use probability of positive class
45
+ except ValueError:
46
+ # If no probabilities available, return zeros (perfect calibration for hard predictions)
47
+ return [0.0] * len(y_true)
48
+
49
+ # Convert to numpy arrays
50
+ y_true = np.asarray(y_true, dtype=float)
51
+ y_prob = np.asarray(y_prob, dtype=float)
52
+
53
+ # Create probability bins
54
+ bin_boundaries = np.linspace(0, 1, n_bins + 1)
55
+ bin_lowers = bin_boundaries[:-1]
56
+ bin_uppers = bin_boundaries[1:]
57
+
58
+ # Calculate calibration error for each sample
59
+ calibration_errors = np.zeros_like(y_prob)
60
+
61
+ for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
62
+ # Find samples in this bin
63
+ in_bin = (y_prob > bin_lower) & (y_prob <= bin_upper)
64
+ if not np.any(in_bin):
65
+ continue
66
+
67
+ # Calculate empirical frequency for this bin
68
+ empirical_freq = np.mean(y_true[in_bin])
69
+
70
+ # Calculate average predicted probability for this bin
71
+ avg_predicted_prob = np.mean(y_prob[in_bin])
72
+
73
+ # Assign calibration error to all samples in this bin
74
+ calibration_errors[in_bin] = abs(avg_predicted_prob - empirical_freq)
75
+
76
+ # Return as a list of floats
77
+ return calibration_errors.tolist()