validmind 2.3.3__py3-none-any.whl → 2.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/test_descriptions.py +8 -1
  3. validmind/ai/utils.py +2 -1
  4. validmind/client.py +1 -0
  5. validmind/datasets/regression/fred_timeseries.py +272 -0
  6. validmind/tests/__init__.py +14 -468
  7. validmind/tests/__types__.py +10 -0
  8. validmind/tests/_store.py +102 -0
  9. validmind/tests/data_validation/ACFandPACFPlot.py +7 -9
  10. validmind/tests/data_validation/ADF.py +8 -10
  11. validmind/tests/data_validation/ANOVAOneWayTable.py +8 -10
  12. validmind/tests/data_validation/AutoAR.py +2 -4
  13. validmind/tests/data_validation/AutoMA.py +2 -4
  14. validmind/tests/data_validation/AutoSeasonality.py +8 -10
  15. validmind/tests/data_validation/AutoStationarity.py +8 -10
  16. validmind/tests/data_validation/BivariateFeaturesBarPlots.py +8 -10
  17. validmind/tests/data_validation/BivariateHistograms.py +8 -10
  18. validmind/tests/data_validation/BivariateScatterPlots.py +8 -10
  19. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +8 -10
  20. validmind/tests/data_validation/ClassImbalance.py +2 -4
  21. validmind/tests/data_validation/DFGLSArch.py +2 -4
  22. validmind/tests/data_validation/DatasetDescription.py +7 -9
  23. validmind/tests/data_validation/DatasetSplit.py +8 -9
  24. validmind/tests/data_validation/DescriptiveStatistics.py +2 -4
  25. validmind/tests/data_validation/Duplicates.py +2 -4
  26. validmind/tests/data_validation/EngleGrangerCoint.py +2 -4
  27. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +2 -4
  28. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +2 -4
  29. validmind/tests/data_validation/HighCardinality.py +2 -4
  30. validmind/tests/data_validation/HighPearsonCorrelation.py +2 -4
  31. validmind/tests/data_validation/IQROutliersBarPlot.py +2 -4
  32. validmind/tests/data_validation/IQROutliersTable.py +2 -4
  33. validmind/tests/data_validation/IsolationForestOutliers.py +2 -4
  34. validmind/tests/data_validation/KPSS.py +8 -10
  35. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +2 -4
  36. validmind/tests/data_validation/MissingValues.py +2 -4
  37. validmind/tests/data_validation/MissingValuesBarPlot.py +2 -4
  38. validmind/tests/data_validation/MissingValuesRisk.py +2 -4
  39. validmind/tests/data_validation/PearsonCorrelationMatrix.py +2 -4
  40. validmind/tests/data_validation/PhillipsPerronArch.py +7 -9
  41. validmind/tests/data_validation/RollingStatsPlot.py +2 -4
  42. validmind/tests/data_validation/ScatterPlot.py +2 -4
  43. validmind/tests/data_validation/SeasonalDecompose.py +70 -44
  44. validmind/tests/data_validation/Skewness.py +2 -4
  45. validmind/tests/data_validation/SpreadPlot.py +2 -4
  46. validmind/tests/data_validation/TabularCategoricalBarPlots.py +2 -4
  47. validmind/tests/data_validation/TabularDateTimeHistograms.py +2 -4
  48. validmind/tests/data_validation/TabularDescriptionTables.py +2 -4
  49. validmind/tests/data_validation/TabularNumericalHistograms.py +2 -4
  50. validmind/tests/data_validation/TargetRateBarPlots.py +2 -4
  51. validmind/tests/data_validation/TimeSeriesDescription.py +74 -0
  52. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +76 -0
  53. validmind/tests/data_validation/TimeSeriesFrequency.py +2 -4
  54. validmind/tests/data_validation/TimeSeriesHistogram.py +29 -45
  55. validmind/tests/data_validation/TimeSeriesLinePlot.py +2 -4
  56. validmind/tests/data_validation/TimeSeriesMissingValues.py +2 -4
  57. validmind/tests/data_validation/TimeSeriesOutliers.py +32 -45
  58. validmind/tests/data_validation/TooManyZeroValues.py +2 -4
  59. validmind/tests/data_validation/UniqueRows.py +2 -4
  60. validmind/tests/data_validation/WOEBinPlots.py +2 -4
  61. validmind/tests/data_validation/WOEBinTable.py +2 -4
  62. validmind/tests/data_validation/ZivotAndrewsArch.py +2 -4
  63. validmind/tests/data_validation/nlp/CommonWords.py +2 -4
  64. validmind/tests/data_validation/nlp/Hashtags.py +2 -4
  65. validmind/tests/data_validation/nlp/Mentions.py +2 -4
  66. validmind/tests/data_validation/nlp/Punctuations.py +2 -4
  67. validmind/tests/data_validation/nlp/StopWords.py +2 -4
  68. validmind/tests/data_validation/nlp/TextDescription.py +2 -4
  69. validmind/tests/decorator.py +10 -8
  70. validmind/tests/load.py +264 -0
  71. validmind/tests/metadata.py +59 -0
  72. validmind/tests/model_validation/ClusterSizeDistribution.py +5 -7
  73. validmind/tests/model_validation/FeaturesAUC.py +6 -8
  74. validmind/tests/model_validation/ModelMetadata.py +8 -9
  75. validmind/tests/model_validation/ModelMetadataComparison.py +59 -0
  76. validmind/tests/model_validation/ModelPredictionResiduals.py +103 -0
  77. validmind/tests/model_validation/RegressionResidualsPlot.py +2 -6
  78. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +131 -0
  79. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +76 -0
  80. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +103 -0
  81. validmind/tests/model_validation/embeddings/ClusterDistribution.py +2 -4
  82. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +2 -4
  83. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +2 -4
  84. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +2 -4
  85. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +2 -4
  86. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -7
  87. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -7
  88. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +7 -9
  89. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -7
  90. validmind/tests/model_validation/sklearn/ClusterPerformance.py +5 -7
  91. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +2 -7
  92. validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -7
  93. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +19 -10
  94. validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +83 -0
  95. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -7
  96. validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -7
  97. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -7
  98. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +4 -7
  99. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +7 -9
  100. validmind/tests/model_validation/sklearn/MinimumF1Score.py +7 -9
  101. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +7 -9
  102. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +8 -10
  103. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +7 -9
  104. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +9 -11
  105. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +7 -9
  106. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +8 -10
  107. validmind/tests/model_validation/sklearn/ROCCurve.py +10 -11
  108. validmind/tests/model_validation/sklearn/RegressionErrors.py +5 -7
  109. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +76 -0
  110. validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +5 -7
  111. validmind/tests/model_validation/sklearn/RegressionR2Square.py +5 -7
  112. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +63 -0
  113. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +10 -14
  114. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +8 -10
  115. validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -7
  116. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +8 -10
  117. validmind/tests/model_validation/sklearn/VMeasure.py +5 -7
  118. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +8 -10
  119. validmind/tests/model_validation/statsmodels/AutoARIMA.py +2 -4
  120. validmind/tests/model_validation/statsmodels/BoxPierce.py +2 -4
  121. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +3 -4
  122. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +2 -4
  123. validmind/tests/model_validation/statsmodels/GINITable.py +2 -4
  124. validmind/tests/model_validation/statsmodels/JarqueBera.py +7 -9
  125. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +7 -9
  126. validmind/tests/model_validation/statsmodels/LJungBox.py +2 -4
  127. validmind/tests/model_validation/statsmodels/Lilliefors.py +7 -9
  128. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +2 -4
  129. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +2 -4
  130. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -9
  131. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +2 -4
  132. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -4
  133. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +2 -4
  134. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +2 -4
  135. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +2 -4
  136. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +6 -8
  137. validmind/tests/model_validation/statsmodels/RunsTest.py +2 -4
  138. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +3 -4
  139. validmind/tests/model_validation/statsmodels/ShapiroWilk.py +2 -4
  140. validmind/tests/prompt_validation/Bias.py +2 -4
  141. validmind/tests/prompt_validation/Clarity.py +2 -4
  142. validmind/tests/prompt_validation/Conciseness.py +2 -4
  143. validmind/tests/prompt_validation/Delimitation.py +2 -4
  144. validmind/tests/prompt_validation/NegativeInstruction.py +2 -4
  145. validmind/tests/prompt_validation/Robustness.py +2 -4
  146. validmind/tests/prompt_validation/Specificity.py +2 -4
  147. validmind/tests/run.py +394 -0
  148. validmind/tests/test_providers.py +12 -0
  149. validmind/tests/utils.py +16 -0
  150. validmind/unit_metrics/__init__.py +12 -4
  151. validmind/unit_metrics/composite.py +3 -0
  152. validmind/vm_models/test/metric.py +8 -5
  153. validmind/vm_models/test/result_wrapper.py +2 -1
  154. validmind/vm_models/test/test.py +14 -11
  155. validmind/vm_models/test/threshold_test.py +1 -0
  156. validmind/vm_models/test_suite/runner.py +1 -0
  157. {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/METADATA +70 -36
  158. {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/RECORD +162 -146
  159. /validmind/datasets/regression/datasets/{lending_club_loan_rates.csv → leanding_club_loan_rates.csv} +0 -0
  160. {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/LICENSE +0 -0
  161. {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/WHEEL +0 -0
  162. {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/entry_points.txt +0 -0
@@ -58,15 +58,13 @@ class ClassifierPerformance(Metric):
58
58
 
59
59
  name = "classifier_performance"
60
60
  required_inputs = ["model", "dataset"]
61
- metadata = {
62
- "task_types": ["classification", "text_classification"],
63
- "tags": [
64
- "sklearn",
65
- "binary_classification",
66
- "multiclass_classification",
67
- "model_performance",
68
- ],
69
- }
61
+ tasks = ["classification", "text_classification"]
62
+ tags = [
63
+ "sklearn",
64
+ "binary_classification",
65
+ "multiclass_classification",
66
+ "model_performance",
67
+ ]
70
68
 
71
69
  def summary(self, metric_value: dict):
72
70
  """
@@ -57,13 +57,11 @@ class ClusterCosineSimilarity(Metric):
57
57
 
58
58
  name = "cluster_cosine_similarity"
59
59
  required_inputs = ["model", "dataset"]
60
- metadata = {
61
- "task_types": ["clustering"],
62
- "tags": [
63
- "sklearn",
64
- "model_performance",
65
- ],
66
- }
60
+ tasks = ["clustering"]
61
+ tags = [
62
+ "sklearn",
63
+ "model_performance",
64
+ ]
67
65
 
68
66
  def run(self):
69
67
  y_true_train = self.inputs.dataset.y
@@ -51,13 +51,11 @@ class ClusterPerformance(Metric):
51
51
 
52
52
  name = "cluster_performance_metrics"
53
53
  required_inputs = ["model", "datasets"]
54
- metadata = {
55
- "task_types": ["clustering"],
56
- "tags": [
57
- "sklearn",
58
- "model_performance",
59
- ],
60
- }
54
+ tasks = ["clustering"]
55
+ tags = [
56
+ "sklearn",
57
+ "model_performance",
58
+ ]
61
59
 
62
60
  def cluser_performance_metrics(
63
61
  self, y_true_train, y_pred_train, y_true_test, y_pred_test, samples, metric_info
@@ -61,13 +61,8 @@ class ClusterPerformanceMetrics(ClusterPerformance):
61
61
 
62
62
  name = "homogeneity_score"
63
63
  required_inputs = ["model", "datasets"]
64
- metadata = {
65
- "task_types": ["clustering"],
66
- "tags": [
67
- "sklearn",
68
- "model_performance",
69
- ],
70
- }
64
+ tasks = ["clustering"]
65
+ tags = ["sklearn", "model_performance"]
71
66
  default_metrics = {
72
67
  "Homogeneity Score": metrics.homogeneity_score,
73
68
  "Completeness Score": metrics.completeness_score,
@@ -44,13 +44,11 @@ class CompletenessScore(ClusterPerformance):
44
44
 
45
45
  name = "homogeneity_score"
46
46
  required_inputs = ["model", "datasets"]
47
- metadata = {
48
- "task_types": ["clustering"],
49
- "tags": [
50
- "sklearn",
51
- "model_performance",
52
- ],
53
- }
47
+ tasks = ["clustering"]
48
+ tags = [
49
+ "sklearn",
50
+ "model_performance",
51
+ ]
54
52
 
55
53
  def metric_info(self):
56
54
  return {"Completeness Score": metrics.completeness_score}
@@ -55,16 +55,14 @@ class ConfusionMatrix(Metric):
55
55
 
56
56
  name = "confusion_matrix"
57
57
  required_inputs = ["model", "dataset"]
58
- metadata = {
59
- "task_types": ["classification", "text_classification"],
60
- "tags": [
61
- "sklearn",
62
- "binary_classification",
63
- "multiclass_classification",
64
- "model_performance",
65
- "visualization",
66
- ],
67
- }
58
+ tasks = ["classification", "text_classification"]
59
+ tags = [
60
+ "sklearn",
61
+ "binary_classification",
62
+ "multiclass_classification",
63
+ "model_performance",
64
+ "visualization",
65
+ ]
68
66
 
69
67
  def run(self):
70
68
  y_true = self.inputs.dataset.y
@@ -113,6 +111,17 @@ class ConfusionMatrix(Metric):
113
111
  height=600,
114
112
  )
115
113
 
114
+ # Add an annotation at the bottom of the heatmap
115
+ fig.add_annotation(
116
+ x=0.5,
117
+ y=-0.1,
118
+ xref="paper",
119
+ yref="paper",
120
+ text=f"Confusion Matrix for {self.inputs.model.input_id} on {self.inputs.dataset.input_id}",
121
+ showarrow=False,
122
+ font=dict(size=14),
123
+ )
124
+
116
125
  return self.cache_results(
117
126
  metric_value={
118
127
  "confusion_matrix": cm,
@@ -0,0 +1,83 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import pandas as pd
6
+ from sklearn.inspection import permutation_importance
7
+
8
+ from validmind import tags, tasks
9
+
10
+
11
+ @tags("model_explainability", "sklearn")
12
+ @tasks("regression", "time_series_forecasting")
13
+ def FeatureImportanceComparison(datasets, models, num_features=3):
14
+ """
15
+ Compare feature importance scores for each model and generate a summary table
16
+ with the top important features.
17
+
18
+ **Purpose**: The purpose of this function is to compare the feature importance scores for different models applied to various datasets.
19
+
20
+ **Test Mechanism**: The function iterates through each dataset-model pair, calculates permutation feature importance (PFI) scores, and generates a summary table with the top `num_features` important features for each model.
21
+
22
+ **Signs of High Risk**:
23
+ - If key features expected to be important are ranked low, it could indicate potential issues with model training or data quality.
24
+ - High variance in feature importance scores across different models may suggest instability in feature selection.
25
+
26
+ **Strengths**:
27
+ - Provides a clear comparison of the most important features for each model.
28
+ - Uses permutation importance, which is a model-agnostic method and can be applied to any estimator.
29
+
30
+ **Limitations**:
31
+ - Assumes that the dataset is provided as a DataFrameDataset object with `x_df` and `y_df` methods to access feature and target data.
32
+ - Requires that `model.model` is compatible with `sklearn.inspection.permutation_importance`.
33
+ - The function's output is dependent on the number of features specified by `num_features`, which defaults to 3 but can be adjusted.
34
+
35
+
36
+ """
37
+ results_list = []
38
+
39
+ for dataset, model in zip(datasets, models):
40
+ x = dataset.x_df()
41
+ y = dataset.y_df()
42
+
43
+ pfi_values = permutation_importance(
44
+ model.model,
45
+ x,
46
+ y,
47
+ random_state=0,
48
+ n_jobs=-2,
49
+ )
50
+
51
+ # Create a dictionary to store PFI scores
52
+ pfi = {
53
+ column: pfi_values["importances_mean"][i]
54
+ for i, column in enumerate(x.columns)
55
+ }
56
+
57
+ # Sort features by their importance
58
+ sorted_features = sorted(pfi.items(), key=lambda item: item[1], reverse=True)
59
+
60
+ # Extract the top `num_features` features
61
+ top_features = sorted_features[:num_features]
62
+
63
+ # Prepare the result for the current model and dataset
64
+ result = {
65
+ "Model": model.input_id,
66
+ "Dataset": dataset.input_id,
67
+ }
68
+
69
+ # Dynamically add feature columns to the result
70
+ for i in range(num_features):
71
+ if i < len(top_features):
72
+ result[
73
+ f"Feature {i + 1}"
74
+ ] = f"[{top_features[i][0]}; {top_features[i][1]:.4f}]"
75
+ else:
76
+ result[f"Feature {i + 1}"] = None
77
+
78
+ # Append the result to the list
79
+ results_list.append(result)
80
+
81
+ # Convert the results list to a DataFrame
82
+ results_df = pd.DataFrame(results_list)
83
+ return results_df
@@ -55,13 +55,11 @@ class FowlkesMallowsScore(ClusterPerformance):
55
55
 
56
56
  name = "fowlkes_mallows_score"
57
57
  required_inputs = ["model", "datasets"]
58
- metadata = {
59
- "task_types": ["clustering"],
60
- "tags": [
61
- "sklearn",
62
- "model_performance",
63
- ],
64
- }
58
+ tasks = ["clustering"]
59
+ tags = [
60
+ "sklearn",
61
+ "model_performance",
62
+ ]
65
63
 
66
64
  def metric_info(self):
67
65
  return {"Fowlkes-Mallows score": metrics.fowlkes_mallows_score}
@@ -46,13 +46,11 @@ class HomogeneityScore(ClusterPerformance):
46
46
 
47
47
  name = "homogeneity_score"
48
48
  required_inputs = ["model", "datasets"]
49
- metadata = {
50
- "task_types": ["clustering"],
51
- "tags": [
52
- "sklearn",
53
- "model_performance",
54
- ],
55
- }
49
+ tasks = ["clustering"]
50
+ tags = [
51
+ "sklearn",
52
+ "model_performance",
53
+ ]
56
54
 
57
55
  def metric_info(self):
58
56
  return {"Homogeneity Score": metrics.homogeneity_score}
@@ -52,13 +52,8 @@ class HyperParametersTuning(Metric):
52
52
 
53
53
  name = "hyper_parameters_tuning"
54
54
  required_inputs = ["model", "dataset"]
55
- metadata = {
56
- "task_types": ["classification", "clustering"],
57
- "tags": [
58
- "sklearn",
59
- "model_performance",
60
- ],
61
- }
55
+ tasks = ["classification", "clustering"]
56
+ tags = ["sklearn", "model_performance"]
62
57
  default_params = {"param_grid": None, "scoring": None}
63
58
 
64
59
  def run(self):
@@ -60,13 +60,10 @@ class KMeansClustersOptimization(Metric):
60
60
 
61
61
  name = "clusters_optimize_elbow_method"
62
62
  required_inputs = ["model", "dataset"]
63
- metadata = {
64
- "task_types": ["clustering"],
65
- "tags": ["sklearn", "model_performance", "kmeans"],
66
- }
67
- default_params = {
68
- "n_clusters": None,
69
- }
63
+ tasks = ["clustering"]
64
+ tags = ["sklearn", "model_performance", "kmeans"]
65
+
66
+ default_params = {"n_clusters": None}
70
67
 
71
68
  def run(self):
72
69
  n_clusters = self.params["n_clusters"]
@@ -59,15 +59,13 @@ class MinimumAccuracy(ThresholdTest):
59
59
  name = "accuracy_score"
60
60
  required_inputs = ["model", "dataset"]
61
61
  default_params = {"min_threshold": 0.7}
62
- metadata = {
63
- "task_types": ["classification", "text_classification"],
64
- "tags": [
65
- "sklearn",
66
- "binary_classification",
67
- "multiclass_classification",
68
- "model_performance",
69
- ],
70
- }
62
+ tasks = ["classification", "text_classification"]
63
+ tags = [
64
+ "sklearn",
65
+ "binary_classification",
66
+ "multiclass_classification",
67
+ "model_performance",
68
+ ]
71
69
 
72
70
  def summary(self, results: List[ThresholdTestResult], all_passed: bool):
73
71
  """
@@ -62,15 +62,13 @@ class MinimumF1Score(ThresholdTest):
62
62
  name = "f1_score"
63
63
  required_inputs = ["model", "dataset"]
64
64
  default_params = {"min_threshold": 0.5}
65
- metadata = {
66
- "task_types": ["classification", "text_classification"],
67
- "tags": [
68
- "sklearn",
69
- "binary_classification",
70
- "multiclass_classification",
71
- "model_performance",
72
- ],
73
- }
65
+ tasks = ["classification", "text_classification"]
66
+ tags = [
67
+ "sklearn",
68
+ "binary_classification",
69
+ "multiclass_classification",
70
+ "model_performance",
71
+ ]
74
72
 
75
73
  def summary(self, results: List[ThresholdTestResult], all_passed: bool):
76
74
  """
@@ -59,15 +59,13 @@ class MinimumROCAUCScore(ThresholdTest):
59
59
  name = "roc_auc_score"
60
60
  required_inputs = ["model", "dataset"]
61
61
  default_params = {"min_threshold": 0.5}
62
- metadata = {
63
- "task_types": ["classification", "text_classification"],
64
- "tags": [
65
- "sklearn",
66
- "binary_classification",
67
- "multiclass_classification",
68
- "model_performance",
69
- ],
70
- }
62
+ tasks = ["classification", "text_classification"]
63
+ tags = [
64
+ "sklearn",
65
+ "binary_classification",
66
+ "multiclass_classification",
67
+ "model_performance",
68
+ ]
71
69
 
72
70
  def summary(self, results: List[ThresholdTestResult], all_passed: bool):
73
71
  """
@@ -53,16 +53,14 @@ class ModelsPerformanceComparison(ClassifierPerformance):
53
53
 
54
54
  name = "models_performance_comparison"
55
55
  required_inputs = ["dataset", "models"]
56
- metadata = {
57
- "task_types": ["classification", "text_classification"],
58
- "tags": [
59
- "sklearn",
60
- "binary_classification",
61
- "multiclass_classification",
62
- "model_performance",
63
- "model_comparison",
64
- ],
65
- }
56
+ tasks = ["classification", "text_classification"]
57
+ tags = [
58
+ "sklearn",
59
+ "binary_classification",
60
+ "multiclass_classification",
61
+ "model_performance",
62
+ "model_comparison",
63
+ ]
66
64
 
67
65
  def summary(self, metric_value: dict):
68
66
  """
@@ -67,15 +67,13 @@ class OverfitDiagnosis(ThresholdTest):
67
67
  name = "overfit_regions"
68
68
  required_inputs = ["model", "datasets"]
69
69
  default_params = {"features_columns": None, "cut_off_percentage": 4}
70
- metadata = {
71
- "task_types": ["classification", "text_classification"],
72
- "tags": [
73
- "sklearn",
74
- "binary_classification",
75
- "multiclass_classification",
76
- "model_diagnosis",
77
- ],
78
- }
70
+ tasks = ["classification", "text_classification"]
71
+ tags = [
72
+ "sklearn",
73
+ "binary_classification",
74
+ "multiclass_classification",
75
+ "model_diagnosis",
76
+ ]
79
77
 
80
78
  default_metrics = {
81
79
  "accuracy": metrics.accuracy_score,
@@ -56,16 +56,14 @@ class PermutationFeatureImportance(Metric):
56
56
  "fontsize": None,
57
57
  "figure_height": 1000,
58
58
  }
59
- metadata = {
60
- "task_types": ["classification", "text_classification"],
61
- "tags": [
62
- "sklearn",
63
- "binary_classification",
64
- "multiclass_classification",
65
- "feature_importance",
66
- "visualization",
67
- ],
68
- }
59
+ tasks = ["classification", "text_classification"]
60
+ tags = [
61
+ "sklearn",
62
+ "binary_classification",
63
+ "multiclass_classification",
64
+ "feature_importance",
65
+ "visualization",
66
+ ]
69
67
 
70
68
  def run(self):
71
69
  x = self.inputs.dataset.x_df()
@@ -121,7 +119,7 @@ class PermutationFeatureImportance(Metric):
121
119
  figures=[
122
120
  Figure(
123
121
  for_object=self,
124
- key="pfi",
122
+ key=f"pfi_{self.inputs.dataset.input_id}_{self.inputs.model.input_id}",
125
123
  figure=fig,
126
124
  ),
127
125
  ],
@@ -73,15 +73,13 @@ class PopulationStabilityIndex(Metric):
73
73
 
74
74
  name = "psi"
75
75
  required_inputs = ["model", "datasets"]
76
- metadata = {
77
- "task_types": ["classification", "text_classification"],
78
- "tags": [
79
- "sklearn",
80
- "binary_classification",
81
- "multiclass_classification",
82
- "model_performance",
83
- ],
84
- }
76
+ tasks = ["classification", "text_classification"]
77
+ tags = [
78
+ "sklearn",
79
+ "binary_classification",
80
+ "multiclass_classification",
81
+ "model_performance",
82
+ ]
85
83
  default_params = {
86
84
  "num_bins": 10,
87
85
  "mode": "fixed",
@@ -51,16 +51,14 @@ class PrecisionRecallCurve(Metric):
51
51
 
52
52
  name = "pr_curve"
53
53
  required_inputs = ["model", "dataset"]
54
- metadata = {
55
- "task_types": ["classification", "text_classification"],
56
- "tags": [
57
- "sklearn",
58
- "binary_classification",
59
- "multiclass_classification",
60
- "model_performance",
61
- "visualization",
62
- ],
63
- }
54
+ tasks = ["classification", "text_classification"]
55
+ tags = [
56
+ "sklearn",
57
+ "binary_classification",
58
+ "multiclass_classification",
59
+ "model_performance",
60
+ "visualization",
61
+ ]
64
62
 
65
63
  def run(self):
66
64
  if isinstance(self.inputs.model, FoundationModel):
@@ -59,16 +59,14 @@ class ROCCurve(Metric):
59
59
 
60
60
  name = "roc_curve"
61
61
  required_inputs = ["model", "dataset"]
62
- metadata = {
63
- "task_types": ["classification", "text_classification"],
64
- "tags": [
65
- "sklearn",
66
- "binary_classification",
67
- "multiclass_classification",
68
- "model_performance",
69
- "visualization",
70
- ],
71
- }
62
+ tasks = ["classification", "text_classification"]
63
+ tags = [
64
+ "sklearn",
65
+ "binary_classification",
66
+ "multiclass_classification",
67
+ "model_performance",
68
+ "visualization",
69
+ ]
72
70
 
73
71
  def run(self):
74
72
  if isinstance(self.inputs.model, FoundationModel):
@@ -109,7 +107,7 @@ class ROCCurve(Metric):
109
107
  )
110
108
 
111
109
  layout = go.Layout(
112
- title="ROC Curve",
110
+ title=f"ROC Curve for {self.inputs.model.input_id} on {self.inputs.dataset.input_id}",
113
111
  xaxis=dict(title="False Positive Rate"),
114
112
  yaxis=dict(title="True Positive Rate"),
115
113
  width=700,
@@ -117,6 +115,7 @@ class ROCCurve(Metric):
117
115
  )
118
116
 
119
117
  fig = go.Figure(data=[trace0, trace1], layout=layout)
118
+
120
119
  return self.cache_results(
121
120
  metric_value={
122
121
  "auc": auc,
@@ -43,13 +43,11 @@ class RegressionErrors(Metric):
43
43
 
44
44
  name = "regression_errors"
45
45
  required_inputs = ["model", "datasets"]
46
- metadata = {
47
- "task_types": ["regression"],
48
- "tags": [
49
- "sklearn",
50
- "model_performance",
51
- ],
52
- }
46
+ tasks = ["regression"]
47
+ tags = [
48
+ "sklearn",
49
+ "model_performance",
50
+ ]
53
51
 
54
52
  def summary(self, raw_results):
55
53
  """
@@ -0,0 +1,76 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ from sklearn import metrics
8
+
9
+ from validmind import tags, tasks
10
+ from validmind.logging import get_logger
11
+
12
+ logger = get_logger(__name__)
13
+
14
+
15
+ @tags("model_performance", "sklearn")
16
+ @tasks("regression", "time_series_forecasting")
17
+ def RegressionErrorsComparison(datasets, models):
18
+ """
19
+ Compare regression error metrics for each model and generate a summary table
20
+ with the results.
21
+
22
+ **Purpose**: The purpose of this function is to compare the regression errors for different models applied to various datasets.
23
+
24
+ **Test Mechanism**: The function iterates through each dataset-model pair, calculates various error metrics (MAE, MSE, MAPE, MBD), and generates a summary table with these results.
25
+
26
+ **Signs of High Risk**:
27
+ - High Mean Absolute Error (MAE) or Mean Squared Error (MSE) indicates poor model performance.
28
+ - High Mean Absolute Percentage Error (MAPE) suggests large percentage errors, especially problematic if the true values are small.
29
+ - Mean Bias Deviation (MBD) significantly different from zero indicates systematic overestimation or underestimation by the model.
30
+
31
+ **Strengths**:
32
+ - Provides multiple error metrics to assess model performance from different perspectives.
33
+ - Includes a check to avoid division by zero when calculating MAPE.
34
+
35
+ **Limitations**:
36
+ - Assumes that the dataset is provided as a DataFrameDataset object with `y`, `y_pred`, and `feature_columns` attributes.
37
+ - The function relies on the `logger` from `validmind.logging` to warn about zero values in `y_true`, which should be correctly implemented and imported.
38
+ - Requires that `dataset.y_pred(model)` returns the predicted values for the model.
39
+ """
40
+ results_list = []
41
+
42
+ for dataset, model in zip(datasets, models):
43
+ dataset_name = dataset.input_id
44
+ model_name = model.input_id
45
+
46
+ y_true = dataset.y
47
+ y_pred = dataset.y_pred(model) # Assuming dataset has X for features
48
+ y_true = y_true.astype(y_pred.dtype)
49
+
50
+ mae = metrics.mean_absolute_error(y_true, y_pred)
51
+ mse = metrics.mean_squared_error(y_true, y_pred)
52
+
53
+ if np.any(y_true == 0):
54
+ logger.warning(
55
+ "y_true contains zero values. Skipping MAPE calculation to avoid division by zero."
56
+ )
57
+ mape = None
58
+ else:
59
+ mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
60
+ mbd = np.mean(y_pred - y_true)
61
+
62
+ # Append results to the list
63
+ results_list.append(
64
+ {
65
+ "Model": model_name,
66
+ "Dataset": dataset_name,
67
+ "Mean Absolute Error (MAE)": mae,
68
+ "Mean Squared Error (MSE)": mse,
69
+ "Mean Absolute Percentage Error (MAPE)": mape,
70
+ "Mean Bias Deviation (MBD)": mbd,
71
+ }
72
+ )
73
+
74
+ # Convert results list to a DataFrame
75
+ results_df = pd.DataFrame(results_list)
76
+ return results_df