validmind 2.5.6__py3-none-any.whl → 2.5.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/test_descriptions.py +26 -7
  3. validmind/api_client.py +89 -43
  4. validmind/client.py +2 -2
  5. validmind/client_config.py +11 -14
  6. validmind/datasets/regression/fred_timeseries.py +67 -138
  7. validmind/template.py +1 -0
  8. validmind/test_suites/__init__.py +0 -2
  9. validmind/test_suites/statsmodels_timeseries.py +1 -1
  10. validmind/test_suites/summarization.py +0 -1
  11. validmind/test_suites/time_series.py +0 -43
  12. validmind/tests/__types__.py +3 -13
  13. validmind/tests/data_validation/ACFandPACFPlot.py +15 -13
  14. validmind/tests/data_validation/ADF.py +31 -24
  15. validmind/tests/data_validation/AutoAR.py +9 -9
  16. validmind/tests/data_validation/AutoMA.py +23 -16
  17. validmind/tests/data_validation/AutoSeasonality.py +18 -16
  18. validmind/tests/data_validation/AutoStationarity.py +21 -16
  19. validmind/tests/data_validation/BivariateScatterPlots.py +67 -96
  20. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +82 -124
  21. validmind/tests/data_validation/ClassImbalance.py +15 -12
  22. validmind/tests/data_validation/DFGLSArch.py +19 -13
  23. validmind/tests/data_validation/DatasetDescription.py +17 -11
  24. validmind/tests/data_validation/DatasetSplit.py +7 -5
  25. validmind/tests/data_validation/DescriptiveStatistics.py +28 -21
  26. validmind/tests/data_validation/Duplicates.py +33 -25
  27. validmind/tests/data_validation/EngleGrangerCoint.py +35 -33
  28. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +59 -71
  29. validmind/tests/data_validation/HighCardinality.py +19 -12
  30. validmind/tests/data_validation/HighPearsonCorrelation.py +27 -22
  31. validmind/tests/data_validation/IQROutliersBarPlot.py +13 -10
  32. validmind/tests/data_validation/IQROutliersTable.py +40 -36
  33. validmind/tests/data_validation/IsolationForestOutliers.py +21 -14
  34. validmind/tests/data_validation/KPSS.py +34 -29
  35. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +22 -15
  36. validmind/tests/data_validation/MissingValues.py +32 -27
  37. validmind/tests/data_validation/MissingValuesBarPlot.py +25 -21
  38. validmind/tests/data_validation/PearsonCorrelationMatrix.py +71 -84
  39. validmind/tests/data_validation/PhillipsPerronArch.py +37 -30
  40. validmind/tests/data_validation/RollingStatsPlot.py +31 -23
  41. validmind/tests/data_validation/ScatterPlot.py +63 -78
  42. validmind/tests/data_validation/SeasonalDecompose.py +38 -34
  43. validmind/tests/data_validation/Skewness.py +35 -37
  44. validmind/tests/data_validation/SpreadPlot.py +35 -35
  45. validmind/tests/data_validation/TabularCategoricalBarPlots.py +23 -17
  46. validmind/tests/data_validation/TabularDateTimeHistograms.py +21 -13
  47. validmind/tests/data_validation/TabularDescriptionTables.py +51 -16
  48. validmind/tests/data_validation/TabularNumericalHistograms.py +25 -22
  49. validmind/tests/data_validation/TargetRateBarPlots.py +21 -14
  50. validmind/tests/data_validation/TimeSeriesDescription.py +25 -18
  51. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +23 -17
  52. validmind/tests/data_validation/TimeSeriesFrequency.py +24 -17
  53. validmind/tests/data_validation/TimeSeriesHistogram.py +33 -32
  54. validmind/tests/data_validation/TimeSeriesLinePlot.py +17 -10
  55. validmind/tests/data_validation/TimeSeriesMissingValues.py +15 -10
  56. validmind/tests/data_validation/TimeSeriesOutliers.py +37 -33
  57. validmind/tests/data_validation/TooManyZeroValues.py +16 -11
  58. validmind/tests/data_validation/UniqueRows.py +11 -6
  59. validmind/tests/data_validation/WOEBinPlots.py +23 -16
  60. validmind/tests/data_validation/WOEBinTable.py +35 -30
  61. validmind/tests/data_validation/ZivotAndrewsArch.py +34 -28
  62. validmind/tests/data_validation/nlp/CommonWords.py +21 -14
  63. validmind/tests/data_validation/nlp/Hashtags.py +27 -20
  64. validmind/tests/data_validation/nlp/LanguageDetection.py +33 -14
  65. validmind/tests/data_validation/nlp/Mentions.py +21 -15
  66. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +32 -9
  67. validmind/tests/data_validation/nlp/Punctuations.py +24 -20
  68. validmind/tests/data_validation/nlp/Sentiment.py +27 -8
  69. validmind/tests/data_validation/nlp/StopWords.py +26 -19
  70. validmind/tests/data_validation/nlp/TextDescription.py +36 -35
  71. validmind/tests/data_validation/nlp/Toxicity.py +32 -9
  72. validmind/tests/decorator.py +81 -42
  73. validmind/tests/model_validation/BertScore.py +36 -27
  74. validmind/tests/model_validation/BleuScore.py +25 -19
  75. validmind/tests/model_validation/ClusterSizeDistribution.py +38 -34
  76. validmind/tests/model_validation/ContextualRecall.py +35 -13
  77. validmind/tests/model_validation/FeaturesAUC.py +32 -13
  78. validmind/tests/model_validation/MeteorScore.py +46 -33
  79. validmind/tests/model_validation/ModelMetadata.py +32 -64
  80. validmind/tests/model_validation/ModelPredictionResiduals.py +75 -73
  81. validmind/tests/model_validation/RegardScore.py +30 -14
  82. validmind/tests/model_validation/RegressionResidualsPlot.py +10 -5
  83. validmind/tests/model_validation/RougeScore.py +36 -30
  84. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +30 -14
  85. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +27 -30
  86. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +68 -63
  87. validmind/tests/model_validation/TokenDisparity.py +31 -23
  88. validmind/tests/model_validation/ToxicityScore.py +26 -17
  89. validmind/tests/model_validation/embeddings/ClusterDistribution.py +24 -20
  90. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +30 -27
  91. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +7 -5
  92. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +32 -23
  93. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +7 -5
  94. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +15 -11
  95. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +29 -29
  96. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +34 -25
  97. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +38 -26
  98. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +40 -1
  99. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +18 -17
  100. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +40 -45
  101. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +17 -19
  102. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +29 -25
  103. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +38 -28
  104. validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -4
  105. validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
  106. validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
  107. validmind/tests/model_validation/ragas/AspectCritique.py +7 -0
  108. validmind/tests/model_validation/ragas/ContextEntityRecall.py +9 -8
  109. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -4
  110. validmind/tests/model_validation/ragas/ContextRecall.py +5 -4
  111. validmind/tests/model_validation/ragas/Faithfulness.py +5 -4
  112. validmind/tests/model_validation/ragas/utils.py +6 -0
  113. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +19 -12
  114. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +22 -17
  115. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +27 -25
  116. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +7 -5
  117. validmind/tests/model_validation/sklearn/ClusterPerformance.py +40 -78
  118. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +15 -17
  119. validmind/tests/model_validation/sklearn/CompletenessScore.py +17 -11
  120. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -15
  121. validmind/tests/model_validation/sklearn/FeatureImportance.py +95 -0
  122. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +7 -7
  123. validmind/tests/model_validation/sklearn/HomogeneityScore.py +19 -12
  124. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +35 -30
  125. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +10 -5
  126. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +32 -32
  127. validmind/tests/model_validation/sklearn/MinimumF1Score.py +23 -23
  128. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +15 -10
  129. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +26 -19
  130. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +38 -18
  131. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +31 -25
  132. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +8 -6
  133. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +24 -17
  134. validmind/tests/model_validation/sklearn/ROCCurve.py +12 -7
  135. validmind/tests/model_validation/sklearn/RegressionErrors.py +74 -130
  136. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +27 -12
  137. validmind/tests/model_validation/sklearn/{RegressionModelsPerformanceComparison.py → RegressionPerformance.py} +18 -20
  138. validmind/tests/model_validation/sklearn/RegressionR2Square.py +55 -93
  139. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +32 -13
  140. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +113 -73
  141. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +7 -5
  142. validmind/tests/model_validation/sklearn/SilhouettePlot.py +27 -19
  143. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +25 -18
  144. validmind/tests/model_validation/sklearn/VMeasure.py +14 -13
  145. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +7 -5
  146. validmind/tests/model_validation/statsmodels/AutoARIMA.py +24 -18
  147. validmind/tests/model_validation/statsmodels/BoxPierce.py +14 -10
  148. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +73 -104
  149. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +19 -12
  150. validmind/tests/model_validation/statsmodels/GINITable.py +44 -77
  151. validmind/tests/model_validation/statsmodels/JarqueBera.py +27 -22
  152. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +33 -34
  153. validmind/tests/model_validation/statsmodels/LJungBox.py +32 -28
  154. validmind/tests/model_validation/statsmodels/Lilliefors.py +27 -24
  155. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +87 -119
  156. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +100 -0
  157. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +14 -9
  158. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +17 -13
  159. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +46 -43
  160. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +38 -36
  161. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +30 -28
  162. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +18 -11
  163. validmind/tests/model_validation/statsmodels/RunsTest.py +32 -28
  164. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +75 -107
  165. validmind/tests/model_validation/statsmodels/ShapiroWilk.py +15 -8
  166. validmind/tests/ongoing_monitoring/FeatureDrift.py +10 -6
  167. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +31 -25
  168. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +29 -21
  169. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +31 -23
  170. validmind/tests/prompt_validation/Bias.py +14 -11
  171. validmind/tests/prompt_validation/Clarity.py +16 -14
  172. validmind/tests/prompt_validation/Conciseness.py +7 -5
  173. validmind/tests/prompt_validation/Delimitation.py +23 -22
  174. validmind/tests/prompt_validation/NegativeInstruction.py +7 -5
  175. validmind/tests/prompt_validation/Robustness.py +12 -10
  176. validmind/tests/prompt_validation/Specificity.py +13 -11
  177. validmind/tests/prompt_validation/ai_powered_test.py +6 -0
  178. validmind/tests/run.py +68 -23
  179. validmind/unit_metrics/__init__.py +81 -144
  180. validmind/unit_metrics/classification/{sklearn/Accuracy.py → Accuracy.py} +1 -1
  181. validmind/unit_metrics/classification/{sklearn/F1.py → F1.py} +1 -1
  182. validmind/unit_metrics/classification/{sklearn/Precision.py → Precision.py} +1 -1
  183. validmind/unit_metrics/classification/{sklearn/ROC_AUC.py → ROC_AUC.py} +1 -2
  184. validmind/unit_metrics/classification/{sklearn/Recall.py → Recall.py} +1 -1
  185. validmind/unit_metrics/regression/{sklearn/AdjustedRSquaredScore.py → AdjustedRSquaredScore.py} +1 -1
  186. validmind/unit_metrics/regression/GiniCoefficient.py +1 -1
  187. validmind/unit_metrics/regression/HuberLoss.py +1 -1
  188. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -1
  189. validmind/unit_metrics/regression/{sklearn/MeanAbsoluteError.py → MeanAbsoluteError.py} +1 -1
  190. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -1
  191. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -1
  192. validmind/unit_metrics/regression/{sklearn/MeanSquaredError.py → MeanSquaredError.py} +1 -1
  193. validmind/unit_metrics/regression/QuantileLoss.py +1 -1
  194. validmind/unit_metrics/regression/{sklearn/RSquaredScore.py → RSquaredScore.py} +1 -1
  195. validmind/unit_metrics/regression/{sklearn/RootMeanSquaredError.py → RootMeanSquaredError.py} +1 -1
  196. validmind/vm_models/dataset/dataset.py +2 -0
  197. validmind/vm_models/figure.py +5 -0
  198. validmind/vm_models/test/result_wrapper.py +93 -132
  199. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/METADATA +1 -1
  200. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/RECORD +203 -210
  201. validmind/tests/data_validation/ANOVAOneWayTable.py +0 -138
  202. validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -142
  203. validmind/tests/data_validation/BivariateHistograms.py +0 -117
  204. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -124
  205. validmind/tests/data_validation/MissingValuesRisk.py +0 -88
  206. validmind/tests/model_validation/ModelMetadataComparison.py +0 -59
  207. validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -83
  208. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -135
  209. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -103
  210. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/LICENSE +0 -0
  211. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/WHEEL +0 -0
  212. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/entry_points.txt +0 -0
@@ -2,134 +2,102 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
6
5
 
7
6
  import plotly.graph_objects as go
8
7
  from matplotlib import cm
9
8
 
10
- from validmind.vm_models import Figure, Metric
11
9
 
10
+ from validmind import tags, tasks
12
11
 
13
- @dataclass
14
- class PredictionProbabilitiesHistogram(Metric):
12
+
13
+ @tags("visualization", "credit_risk", "logistic_regression")
14
+ @tasks("classification")
15
+ def PredictionProbabilitiesHistogram(
16
+ dataset, model, title="Histogram of Predictive Probabilities"
17
+ ):
15
18
  """
16
- Generates and visualizes histograms of the Probability of Default predictions for both positive and negative
17
- classes in training and testing datasets.
18
-
19
- **Purpose**: This code is designed to generate histograms that display the Probability of Default (PD) predictions
20
- for positive and negative classes in both the training and testing datasets. By doing so, it evaluates the
21
- performance of a logistic regression model, particularly in the context of credit risk prediction.
22
-
23
- **Test Mechanism**: The metric executes these steps to run the test:
24
- - Firstly, it extracts the target column from both the train and test datasets.
25
- - The model's predict function is then used to calculate probabilities.
26
- - These probabilities are added as a new column to the training and testing dataframes.
27
- - Histograms are generated for each class (0 or 1 in binary classification scenarios) within the training and
28
- testing datasets.
29
- - To enhance visualization, the histograms are set to have different opacities.
30
- - The four histograms (two for training data and two for testing) are overlaid on two different subplot frames (one
31
- for training and one for testing data).
32
- - The test returns a plotly graph object displaying the visualization.
33
-
34
- **Signs of High Risk**: Several indicators could suggest a high risk or failure in the model's performance. These
35
- include:
36
- - Significant discrepancies observed between the histograms of training and testing data.
19
+ Assesses the predictive probability distribution for binary classification to evaluate model performance and
20
+ potential overfitting or bias.
21
+
22
+ ### Purpose
23
+
24
+ The Prediction Probabilities Histogram test is designed to generate histograms displaying the Probability of
25
+ Default (PD) predictions for both positive and negative classes in training and testing datasets. This helps in
26
+ evaluating the performance of a logistic regression model, particularly for credit risk prediction.
27
+
28
+ ### Test Mechanism
29
+
30
+ The metric follows these steps to execute the test:
31
+ - Extracts the target column from both the train and test datasets.
32
+ - Uses the model's predict function to calculate probabilities.
33
+ - Adds these probabilities as a new column to the training and testing dataframes.
34
+ - Generates histograms for each class (0 or 1) within the training and testing datasets.
35
+ - Sets different opacities for the histograms to enhance visualization.
36
+ - Overlays the four histograms (two for training and two for testing) on two different subplot frames.
37
+ - Returns a plotly graph object displaying the visualization.
38
+
39
+ ### Signs of High Risk
40
+
41
+ - Significant discrepancies between the histograms of training and testing data.
37
42
  - Large disparities between the histograms for the positive and negative classes.
38
- - These issues could signal potential overfitting or bias in the model.
39
- - Unevenly distributed probabilities may also indicate that the model does not accurately predict outcomes.
40
-
41
- **Strengths**: This metric and test offer several benefits, including:
42
- - The visual representation of the PD predictions made by the model, which aids in understanding the model's
43
- behaviour.
44
- - The ability to assess both the training and testing datasets, adding depth to the validation of the model.
45
- - Highlighting disparities between multiple classes, providing potential insights into class imbalance or data
46
- skewness issues.
47
- - Particularly beneficial for credit risk prediction, it effectively visualizes the spread of risk across different
48
- classes.
49
-
50
- **Limitations**: Despite its strengths, the test has several limitations:
51
- - It is specifically tailored for binary classification scenarios, where the target variable only has two classes;
52
- as such, it isn't suited for multi-class classification tasks.
53
- - This metric is mainly applicable for logistic regression models. It might not be effective or accurate when used
54
- on other model types.
55
- - While the test provides a robust visual representation of the model's PD predictions, it does not provide a
56
- quantifiable measure or score to assess model performance.
43
+ - Potential overfitting or bias indicated by significant issues.
44
+ - Unevenly distributed probabilities suggesting inaccurate model predictions.
45
+
46
+ ### Strengths
47
+
48
+ - Offers a visual representation of the PD predictions made by the model, aiding in understanding its behavior.
49
+ - Assesses both the training and testing datasets, adding depth to model validation.
50
+ - Highlights disparities between classes, providing insights into class imbalance or data skewness.
51
+ - Effectively visualizes risk spread, which is particularly beneficial for credit risk prediction.
52
+
53
+ ### Limitations
54
+
55
+ - Specifically tailored for binary classification scenarios and not suited for multi-class classification tasks.
56
+ - Mainly applicable to logistic regression models, and may not be effective for other model types.
57
+ - Provides a robust visual representation but lacks a quantifiable measure to assess model performance.
57
58
  """
58
59
 
59
- name = "prediction_probabilities_histogram"
60
- required_inputs = ["model", "datasets"]
61
- tasks = ["classification"]
62
- tags = ["tabular_data", "visualization", "credit_risk", "logistic_regression"]
63
-
64
- default_params = {"title": "Histogram of Predictive Probabilities"}
65
-
66
- @staticmethod
67
- def plot_prob_histogram(dataframes, dataset_titles, target_col, title):
68
- figures = []
69
-
70
- # Generate a colormap and convert to Plotly-accepted color format
71
- # Adjust 'viridis' to any other matplotlib colormap if desired
72
- colormap = cm.get_cmap("viridis")
73
-
74
- for i, (df, dataset_title) in enumerate(zip(dataframes, dataset_titles)):
75
- fig = go.Figure()
76
-
77
- # Get unique classes and assign colors
78
- classes = sorted(df[target_col].unique())
79
- colors = [
80
- colormap(i / len(classes))[:3] for i in range(len(classes))
81
- ] # RGB
82
- color_dict = {
83
- cls: f"rgb({int(rgb[0]*255)}, {int(rgb[1]*255)}, {int(rgb[2]*255)})"
84
- for cls, rgb in zip(classes, colors)
85
- }
86
-
87
- # Ensure classes are plotted in the specified order
88
- for class_value in sorted(df[target_col].unique()):
89
- fig.add_trace(
90
- go.Histogram(
91
- x=df[df[target_col] == class_value]["probabilities"],
92
- opacity=0.75,
93
- name=f"{dataset_title} {target_col} = {class_value}",
94
- marker=dict(
95
- color=color_dict[class_value],
96
- ),
97
- )
98
- )
99
- fig.update_layout(
100
- barmode="overlay",
101
- title_text=f"{title} - {dataset_title}",
102
- xaxis_title="Probability",
103
- yaxis_title="Frequency",
104
- )
105
- figures.append(fig)
106
- return figures
107
-
108
- def run(self):
109
- dataset_titles = [dataset.input_id for dataset in self.inputs.datasets]
110
- target_column = self.inputs.datasets[0].target_column
111
- title = self.params.get("title", self.default_params["title"])
112
-
113
- dataframes = []
114
- metric_value = {"prob_histogram": {}}
115
- for _, dataset in enumerate(self.inputs.datasets):
116
- df = dataset.df.copy()
117
- y_prob = dataset.y_prob(self.inputs.model)
118
- df["probabilities"] = y_prob
119
- dataframes.append(df)
120
- metric_value["prob_histogram"][dataset.input_id] = list(df["probabilities"])
121
-
122
- figures = self.plot_prob_histogram(
123
- dataframes, dataset_titles, target_column, title
124
- )
60
+ df = dataset.df
61
+ df["probabilities"] = dataset.y_prob(model)
125
62
 
126
- figures_list = [
127
- Figure(
128
- for_object=self,
129
- key=f"prob_histogram_{title.replace(' ', '_')}_{i+1}",
130
- figure=fig,
131
- )
132
- for i, fig in enumerate(figures)
133
- ]
63
+ fig = _plot_prob_histogram(df, dataset.target_column, title)
64
+
65
+ return fig
66
+
67
+
68
+ def _plot_prob_histogram(df, target_col, title):
134
69
 
135
- return self.cache_results(metric_value=metric_value, figures=figures_list)
70
+ # Generate a colormap and convert to Plotly-accepted color format
71
+ # Adjust 'viridis' to any other matplotlib colormap if desired
72
+ colormap = cm.get_cmap("viridis")
73
+
74
+ fig = go.Figure()
75
+
76
+ # Get unique classes and assign colors
77
+ classes = sorted(df[target_col].unique())
78
+ colors = [colormap(i / len(classes))[:3] for i in range(len(classes))] # RGB
79
+ color_dict = {
80
+ cls: f"rgb({int(rgb[0]*255)}, {int(rgb[1]*255)}, {int(rgb[2]*255)})"
81
+ for cls, rgb in zip(classes, colors)
82
+ }
83
+
84
+ # Ensure classes are plotted in the specified order
85
+ for class_value in sorted(df[target_col].unique()):
86
+ fig.add_trace(
87
+ go.Histogram(
88
+ x=df[df[target_col] == class_value]["probabilities"],
89
+ opacity=0.75,
90
+ name=f"{target_col} = {class_value}",
91
+ marker=dict(
92
+ color=color_dict[class_value],
93
+ ),
94
+ )
95
+ )
96
+ fig.update_layout(
97
+ barmode="overlay",
98
+ title_text=f"{title}",
99
+ xaxis_title="Probability",
100
+ yaxis_title="Frequency",
101
+ )
102
+
103
+ return fig
@@ -0,0 +1,100 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+
6
+ import pandas as pd
7
+ import plotly.graph_objects as go
8
+ from scipy import stats
9
+
10
+ from validmind.errors import SkipTestError
11
+ from validmind import tags, tasks
12
+
13
+
14
+ @tags("tabular_data", "visualization", "model_training")
15
+ @tasks("regression")
16
+ def RegressionCoeffs(model):
17
+ """
18
+ Assesses the significance and uncertainty of predictor variables in a regression model through visualization of
19
+ coefficients and their 95% confidence intervals.
20
+
21
+ ### Purpose
22
+
23
+ The `RegressionCoeffs` metric visualizes the estimated regression coefficients alongside their 95% confidence intervals,
24
+ providing insights into the impact and significance of predictor variables on the response variable. This visualization
25
+ helps to understand the variability and uncertainty in the model's estimates, aiding in the evaluation of the
26
+ significance of each predictor.
27
+
28
+ ### Test Mechanism
29
+
30
+ The function operates by extracting the estimated coefficients and their standard errors from the regression model.
31
+ Using these, it calculates the confidence intervals at a 95% confidence level, which indicates the range within which
32
+ the true coefficient value is expected to fall 95% of the time. The confidence intervals are computed using the
33
+ Z-value associated with the 95% confidence level. The coefficients and their confidence intervals are then visualized
34
+ in a bar plot. The x-axis represents the predictor variables, the y-axis represents the estimated coefficients, and
35
+ the error bars depict the confidence intervals.
36
+
37
+ ### Signs of High Risk
38
+
39
+ - The confidence interval for a coefficient contains the zero value, suggesting that the predictor may not significantly
40
+ contribute to the model.
41
+ - Multiple coefficients with confidence intervals that include zero, potentially indicating issues with model reliability.
42
+ - Very wide confidence intervals, which may suggest high uncertainty in the coefficient estimates and potential model
43
+ instability.
44
+
45
+ ### Strengths
46
+
47
+ - Provides a clear visualization that allows for easy interpretation of the significance and impact of predictor
48
+ variables.
49
+ - Includes confidence intervals, which provide additional information about the uncertainty surrounding each coefficient
50
+ estimate.
51
+
52
+ ### Limitations
53
+
54
+ - The method assumes normality of residuals and independence of observations, assumptions that may not always hold true
55
+ in practice.
56
+ - It does not address issues related to multi-collinearity among predictor variables, which can affect the interpretation
57
+ of coefficients.
58
+ - This metric is limited to regression tasks using tabular data and is not applicable to other types of machine learning
59
+ tasks or data structures.
60
+ """
61
+
62
+ if model.library != "statsmodels":
63
+ raise SkipTestError("Only statsmodels are supported for this metric")
64
+
65
+ # Extract estimated coefficients and standard errors
66
+ coefficients = model.regression_coefficients()
67
+ coef = pd.to_numeric(coefficients["coef"])
68
+ std_err = pd.to_numeric(coefficients["std err"])
69
+
70
+ # Calculate confidence intervals
71
+ confidence_level = 0.95 # 95% confidence interval
72
+ z_value = stats.norm.ppf((1 + confidence_level) / 2) # Calculate Z-value
73
+ lower_ci = coef - z_value * std_err
74
+ upper_ci = coef + z_value * std_err
75
+
76
+ # Create a bar plot with confidence intervals
77
+ fig = go.Figure()
78
+
79
+ fig.add_trace(
80
+ go.Bar(
81
+ x=list(coefficients["Feature"].values),
82
+ y=coef,
83
+ name="Estimated Coefficients",
84
+ error_y=dict(
85
+ type="data",
86
+ symmetric=False,
87
+ arrayminus=lower_ci,
88
+ array=upper_ci,
89
+ visible=True,
90
+ ),
91
+ )
92
+ )
93
+
94
+ fig.update_layout(
95
+ title=f"{model.input_id} Coefficients with Confidence Intervals",
96
+ xaxis_title="Predictor Variables",
97
+ yaxis_title="Coefficients",
98
+ )
99
+
100
+ return (fig, coefficients)
@@ -19,31 +19,36 @@ class RegressionFeatureSignificance(Metric):
19
19
  """
20
20
  Assesses and visualizes the statistical significance of features in a set of regression models.
21
21
 
22
- **Purpose**:
22
+ ### Purpose
23
+
23
24
  The Regression Feature Significance metric assesses the significance of each feature in a given set of regression
24
25
  models. It creates a visualization displaying p-values for every feature of each model, assisting model developers
25
26
  in understanding which features are most influential in their models.
26
27
 
27
- **Test Mechanism**:
28
+ ### Test Mechanism
29
+
28
30
  The test mechanism involves going through each fitted regression model in a given list, extracting the model
29
31
  coefficients and p-values for each feature, and then plotting these values. The x-axis on the plot contains the
30
32
  p-values while the y-axis denotes the coefficients of each feature. A vertical red line is drawn at the threshold
31
33
  for p-value significance, which is 0.05 by default. Any features with p-values to the left of this line are
32
34
  considered statistically significant at the chosen level.
33
35
 
34
- **Signs of High Risk**:
36
+ ### Signs of High Risk
37
+
35
38
  - Any feature with a high p-value (greater than the threshold) is considered a potential high risk, as it suggests
36
39
  the feature is not statistically significant and may not be reliably contributing to the model's predictions.
37
40
  - A high number of such features may indicate problems with the model validation, variable selection, and overall
38
41
  reliability of the model predictions.
39
42
 
40
- **Strengths**:
43
+ ### Strengths
44
+
41
45
  - Helps identify the features that significantly contribute to a model's prediction, providing insights into the
42
46
  feature importance.
43
47
  - Provides tangible, easy-to-understand visualizations to interpret the feature significance.
44
48
  - Facilitates comparison of feature importance across multiple models.
45
49
 
46
- **Limitations**:
50
+ ### Limitations
51
+
47
52
  - This metric assumes model features are independent, which may not always be the case. Multicollinearity (high
48
53
  correlation amongst predictors) can cause high variance and unreliable statistical tests of significance.
49
54
  - The p-value strategy for feature selection doesn't take into account the magnitude of the effect, focusing solely
@@ -54,7 +59,7 @@ class RegressionFeatureSignificance(Metric):
54
59
  """
55
60
 
56
61
  name = "regression_feature_significance"
57
- required_inputs = ["models"]
62
+ required_inputs = ["model"]
58
63
 
59
64
  default_params = {"fontsize": 10, "p_threshold": 0.05}
60
65
  tasks = ["regression"]
@@ -70,10 +75,10 @@ class RegressionFeatureSignificance(Metric):
70
75
  p_threshold = self.params["p_threshold"]
71
76
 
72
77
  # Check models list is not empty
73
- if not self.inputs.models:
74
- raise ValueError("List of models must be provided in the models parameter")
78
+ if not self.inputs.model:
79
+ raise ValueError("Model must be provided in the models parameter")
75
80
 
76
- figures = self._plot_pvalues(self.inputs.models, fontsize, p_threshold)
81
+ figures = self._plot_pvalues(self.inputs.model, fontsize, p_threshold)
77
82
 
78
83
  return self.cache_results(figures=figures)
79
84
 
@@ -19,26 +19,30 @@ class RegressionModelForecastPlot(Metric):
19
19
  Generates plots to visually compare the forecasted outcomes of one or more regression models against actual
20
20
  observed values over a specified date range.
21
21
 
22
- **Purpose:** The "regression_forecast_plot" is intended to visually depict the performance of one or more
23
- regression models by comparing the model's forecasted outcomes against actual observed values within a specified
24
- date range. This metric is especially useful in time-series models or any model where the outcome changes over
25
- time, allowing direct comparison of predicted vs actual values.
22
+ ### Purpose
26
23
 
27
- **Test Mechanism:** This test generates a plot for each fitted model in the list. The x-axis represents the date
28
- ranging from the specified "start_date" to the "end_date", while the y-axis shows the value of the outcome
29
- variable. Two lines are plotted: one representing the forecasted values and the other representing the observed
30
- values. The "start_date" and "end_date" can be parameters of this test; if these parameters are not provided, they
31
- are set to the minimum and maximum date available in the dataset. The test verifies that the provided date range is
32
- within the limits of the available data.
24
+ The "regression_forecast_plot" is intended to visually depict the performance of one or more regression models by
25
+ comparing the model's forecasted outcomes against actual observed values within a specified date range. This metric
26
+ is especially useful in time-series models or any model where the outcome changes over time, allowing direct
27
+ comparison of predicted vs actual values.
33
28
 
34
- **Signs of High Risk:**
29
+ ### Test Mechanism
30
+
31
+ This test generates a plot for each fitted model in the list. The x-axis represents the date ranging from the
32
+ specified "start_date" to the "end_date", while the y-axis shows the value of the outcome variable. Two lines are
33
+ plotted: one representing the forecasted values and the other representing the observed values. The "start_date"
34
+ and "end_date" can be parameters of this test; if these parameters are not provided, they are set to the minimum
35
+ and maximum date available in the dataset. The test verifies that the provided date range is within the limits of
36
+ the available data.
37
+
38
+ ### Signs of High Risk
35
39
 
36
40
  - High risk or failure signs could be deduced visually from the plots if the forecasted line significantly deviates
37
41
  from the observed line, indicating the model's predicted values are not matching actual outcomes.
38
42
  - A model that struggles to handle the edge conditions like maximum and minimum data points could also be
39
43
  considered a sign of risk.
40
44
 
41
- **Strengths:**
45
+ ### Strengths
42
46
 
43
47
  - Visualization: The plot provides an intuitive and clear illustration of how well the forecast matches the actual
44
48
  values, making it straightforward even for non-technical stakeholders to interpret.
@@ -46,7 +50,7 @@ class RegressionModelForecastPlot(Metric):
46
50
  - Model Evaluation: It can be useful in identifying overfitting or underfitting situations, as these will manifest
47
51
  as discrepancies between the forecasted and observed values.
48
52
 
49
- **Limitations:**
53
+ ### Limitations
50
54
 
51
55
  - Interpretation Bias: Interpretation of the plot is subjective and can lead to different conclusions by different
52
56
  evaluators.
@@ -14,49 +14,52 @@ from validmind.vm_models import Figure, Metric
14
14
  @dataclass
15
15
  class RegressionModelForecastPlotLevels(Metric):
16
16
  """
17
- Compares and visualizes forecasted and actual values of regression models on both raw and transformed datasets.
18
-
19
- **Purpose:**
20
- The `RegressionModelForecastPlotLevels` metric is designed to visually assess a series of regression models'
21
- performance. It achieves this by contrasting the models' forecasts with the observed data from the respective
22
- training and test datasets. The gauge of accuracy here involves determining the extent of closeness between
23
- forecasted and actual values. Accordingly, if any transformations are specified, the metric will handle
24
- transforming the data before making this comparison.
25
-
26
- **Test Mechanism:**
27
- The `RegressionModelForecastPlotLevels` class in Python initiates with a `transformation` parameter, which default
28
- aggregates to None. Initially, the class checks for the presence of model objects and raises a `ValueError` if none
29
- are found. Each model is then processed, creating predictive forecasts for both its training and testing datasets.
30
- These forecasts are then contrasted with the actual values and plotted. In situations where a specified
31
- transformation, like "integrate," is specified, the class navigates the transformation steps (performing cumulative
32
- sums to generate a novel series, for instance). Finally, plots are produced that compare observed and forecasted
33
- values for both the raw and transformed datasets.
34
-
35
- **Signs of High Risk:**
36
- Indications of high risk or failure in the model's performance can be derived from checking the generated plots.
37
- When the forecasted values dramatically deviate from the observed values in either the training or testing
38
- datasets, it suggests a high risk situation. A significant deviation could be a symptom of either overfitting or
39
- underfitting, both scenarios are worrying. Such discrepancies could inhibit the model's ability to create precise,
40
- generalized results.
41
-
42
- **Strengths:**
43
-
44
- - Visual Evaluations: The metric provides a visual and comparative way of assessing multiple regression models at
45
- once. This allows easier interpretation and evaluation of their forecasting accuracy.
46
- - Transformation Handling: This metric can handle transformations like "integrate," enhancing its breadth and
47
- flexibility in evaluating different models.
48
- - Detailed Perspective: By looking at the performance on both datasets (training and testing), the metric may give
49
- a detailed overview of the model.
50
-
51
- **Limitations:**
52
-
53
- - Subjectivity: Relying heavily on visual interpretations; assessments may differ from person to person.
54
- - Limited Transformation Capability: Currently, only the "integrate" transformation is supported, implying complex
55
- transformations might go unchecked or unhandled.
56
- - Overhead: The plotting mechanism may become computationally costly when applying to extensive datasets,
57
- increasing runtime.
58
- - Numerical Measurement: Although visualization is instrumental, a corresponding numerical measure would further
59
- reinforce the observations. However, this metric does not provide numerical measures.
17
+ Assesses the alignment between forecasted and observed values in regression models through visual plots, including
18
+ handling data transformations.
19
+
20
+ ### Purpose
21
+
22
+ The `RegressionModelForecastPlotLevels` test aims to visually assess the performance of a series of regression
23
+ models by comparing their forecasted values against the actual observed values in both training and test datasets.
24
+ This test helps determine the accuracy of the models and can handle specific data transformations before making the
25
+ comparison, providing a comprehensive evaluation of model performance.
26
+
27
+ ### Test Mechanism
28
+
29
+ The test mechanism involves initializing the `RegressionModelForecastPlotLevels` class with an optional
30
+ `transformation` parameter. The class then:
31
+
32
+ - Checks for the presence of model objects and raises a `ValueError` if none are found.
33
+ - Processes each model to generate predictive forecasts for both training and testing datasets.
34
+ - Contrasts these forecasts with the actual observed values.
35
+ - Produces plots to visually compare forecasted and observed values for both raw and transformed datasets.
36
+ - Handles specified transformations (e.g., "integrate") by performing cumulative sums to create a new series before
37
+ plotting.
38
+
39
+ ### Signs of High Risk
40
+
41
+ - Significant deviation between forecasted and observed values in training or testing datasets.
42
+ - Patterns suggesting overfitting or underfitting.
43
+ - Large discrepancies in the plotted forecasts, indicating potential issues with model generalizability and
44
+ precision.
45
+
46
+ ### Strengths
47
+
48
+ - **Visual Evaluations**: Provides an intuitive, visual way to assess multiple regression models, aiding in easier
49
+ interpretation and evaluation of forecast accuracy.
50
+ - **Transformation Handling**: Can process specified data transformations such as "integrate," enhancing
51
+ flexibility.
52
+ - **Detailed Perspective**: Assesses performance on both training and testing datasets, offering a comprehensive
53
+ view of model behavior.
54
+
55
+ ### Limitations
56
+
57
+ - **Subjectivity**: Relies heavily on visual interpretation, which may vary between individuals.
58
+ - **Limited Transformation Capability**: Supports only the "integrate" transformation; other complex
59
+ transformations might not be handled.
60
+ - **Overhead**: Plotting can be computationally intensive for large datasets, increasing runtime.
61
+ - **Numerical Measurement**: Does not provide a numerical metric to quantify forecast accuracy, relying solely on
62
+ visual assessment.
60
63
  """
61
64
 
62
65
  name = "regression_forecast_plot_levels"
@@ -16,44 +16,46 @@ logger = get_logger(__name__)
16
16
  @dataclass
17
17
  class RegressionModelSensitivityPlot(Metric):
18
18
  """
19
- Tests the sensitivity of a regression model to variations in independent variables by applying shocks and
20
- visualizing the effects.
21
-
22
- **Purpose**: The Regression Sensitivity Plot metric is designed to perform sensitivity analysis on regression
23
- models. This metric aims to measure the impact of slight changes (shocks) applied to individual variables on the
24
- system's outcome while keeping all other variables constant. By doing so, it analyzes the effects of each
25
- independent variable on the dependent variable within the regression model and helps identify significant risk
26
- factors that could substantially influence the model's output.
27
-
28
- **Test Mechanism**: This metric operates by initially applying shocks of varying magnitudes, defined by specific
29
- parameters, to each of the model's features, one at a time. With all other variables held constant, a new
30
- prediction is made for each dataset subjected to shocks. Any changes in the model's predictions are directly
31
- attributed to the shocks applied. In the event that the transformation parameter is set to "integrate", initial
32
- predictions and target values undergo transformation via an integration function before being plotted. Lastly, a
33
- plot demonstrating observed values against predicted values for each model is generated, showcasing a distinct line
34
- graph illustrating predictions for each shock.
35
-
36
- **Signs of High Risk**:
37
- - If the plot exhibits drastic alterations in model predictions consequent to minor shocks to an individual
38
- variable, it may indicate high risk. This underscores potentially high model sensitivity to changes in that
39
- variable, suggesting over-dependence on that variable for predictions.
40
- - Unusually high or unpredictable shifts in response to shocks may also denote potential model instability or
19
+ Assesses the sensitivity of a regression model to changes in independent variables by applying shocks and
20
+ visualizing the impact.
21
+
22
+ ### Purpose
23
+
24
+ The Regression Sensitivity Plot test is designed to perform sensitivity analysis on regression models. This test
25
+ aims to measure the impact of slight changes (shocks) applied to individual variables on the system's outcome while
26
+ keeping all other variables constant. By doing so, it analyzes the effects of each independent variable on the
27
+ dependent variable within the regression model, helping identify significant risk factors that could substantially
28
+ influence the model's output.
29
+
30
+ ### Test Mechanism
31
+
32
+ This test operates by initially applying shocks of varying magnitudes, defined by specific parameters, to each of
33
+ the model's features, one at a time. With all other variables held constant, a new prediction is made for each
34
+ dataset subjected to shocks. Any changes in the model's predictions are directly attributed to the shocks applied.
35
+ If the transformation parameter is set to "integrate," initial predictions and target values undergo transformation
36
+ via an integration function before being plotted. Finally, a plot demonstrating observed values against predicted
37
+ values for each model is generated, showcasing a distinct line graph illustrating predictions for each shock.
38
+
39
+ ### Signs of High Risk
40
+
41
+ - Drastic alterations in model predictions due to minor shocks to an individual variable, indicating high
42
+ sensitivity and potential over-dependence on that variable.
43
+ - Unusually high or unpredictable shifts in response to shocks, suggesting potential model instability or
41
44
  overfitting.
42
45
 
43
- **Strengths**:
44
- - The metric allows identification of variables strongly influencing the model outcomes, paving the way for
45
- understanding feature importance.
46
- - It generates visual plots which make the results easily interpretable even to non-technical stakeholders.
47
- - Beneficial in identifying overfitting and detecting unstable models that over-react to minor changes in variables.
48
-
49
- **Limitations**:
50
- - The metric operates on the assumption that all other variables remain unchanged during the application of a
51
- shock. However, real-world situations where variables may possess intricate interdependencies may not always
52
- reflect this.
53
- - It is best compatible with linear models and may not effectively evaluate the sensitivity of non-linear model
54
- configurations.
55
- - The metric does not provide a numerical risk measure. It offers only a visual representation, which may invite
56
- subjectivity in interpretation.
46
+ ### Strengths
47
+
48
+ - Helps identify variables that strongly influence model outcomes, aiding in understanding feature importance.
49
+ - Generates visual plots, making results easily interpretable even to non-technical stakeholders.
50
+ - Useful in identifying overfitting and detecting unstable models that react excessively to minor variable changes.
51
+
52
+ ### Limitations
53
+
54
+ - Operates on the assumption that all other variables remain unchanged during the application of a shock, which may
55
+ not reflect real-world interdependencies.
56
+ - Best compatible with linear models and may not effectively evaluate the sensitivity of non-linear models.
57
+ - Provides a visual representation without a numerical risk measure, potentially introducing subjectivity in
58
+ interpretation.
57
59
  """
58
60
 
59
61
  name = "regression_sensitivity_plot"