validmind 2.5.6__py3-none-any.whl → 2.5.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/test_descriptions.py +26 -7
  3. validmind/api_client.py +89 -43
  4. validmind/client.py +2 -2
  5. validmind/client_config.py +11 -14
  6. validmind/datasets/regression/fred_timeseries.py +67 -138
  7. validmind/template.py +1 -0
  8. validmind/test_suites/__init__.py +0 -2
  9. validmind/test_suites/statsmodels_timeseries.py +1 -1
  10. validmind/test_suites/summarization.py +0 -1
  11. validmind/test_suites/time_series.py +0 -43
  12. validmind/tests/__types__.py +3 -13
  13. validmind/tests/data_validation/ACFandPACFPlot.py +15 -13
  14. validmind/tests/data_validation/ADF.py +31 -24
  15. validmind/tests/data_validation/AutoAR.py +9 -9
  16. validmind/tests/data_validation/AutoMA.py +23 -16
  17. validmind/tests/data_validation/AutoSeasonality.py +18 -16
  18. validmind/tests/data_validation/AutoStationarity.py +21 -16
  19. validmind/tests/data_validation/BivariateScatterPlots.py +67 -96
  20. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +82 -124
  21. validmind/tests/data_validation/ClassImbalance.py +15 -12
  22. validmind/tests/data_validation/DFGLSArch.py +19 -13
  23. validmind/tests/data_validation/DatasetDescription.py +17 -11
  24. validmind/tests/data_validation/DatasetSplit.py +7 -5
  25. validmind/tests/data_validation/DescriptiveStatistics.py +28 -21
  26. validmind/tests/data_validation/Duplicates.py +33 -25
  27. validmind/tests/data_validation/EngleGrangerCoint.py +35 -33
  28. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +59 -71
  29. validmind/tests/data_validation/HighCardinality.py +19 -12
  30. validmind/tests/data_validation/HighPearsonCorrelation.py +27 -22
  31. validmind/tests/data_validation/IQROutliersBarPlot.py +13 -10
  32. validmind/tests/data_validation/IQROutliersTable.py +40 -36
  33. validmind/tests/data_validation/IsolationForestOutliers.py +21 -14
  34. validmind/tests/data_validation/KPSS.py +34 -29
  35. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +22 -15
  36. validmind/tests/data_validation/MissingValues.py +32 -27
  37. validmind/tests/data_validation/MissingValuesBarPlot.py +25 -21
  38. validmind/tests/data_validation/PearsonCorrelationMatrix.py +71 -84
  39. validmind/tests/data_validation/PhillipsPerronArch.py +37 -30
  40. validmind/tests/data_validation/RollingStatsPlot.py +31 -23
  41. validmind/tests/data_validation/ScatterPlot.py +63 -78
  42. validmind/tests/data_validation/SeasonalDecompose.py +38 -34
  43. validmind/tests/data_validation/Skewness.py +35 -37
  44. validmind/tests/data_validation/SpreadPlot.py +35 -35
  45. validmind/tests/data_validation/TabularCategoricalBarPlots.py +23 -17
  46. validmind/tests/data_validation/TabularDateTimeHistograms.py +21 -13
  47. validmind/tests/data_validation/TabularDescriptionTables.py +51 -16
  48. validmind/tests/data_validation/TabularNumericalHistograms.py +25 -22
  49. validmind/tests/data_validation/TargetRateBarPlots.py +21 -14
  50. validmind/tests/data_validation/TimeSeriesDescription.py +25 -18
  51. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +23 -17
  52. validmind/tests/data_validation/TimeSeriesFrequency.py +24 -17
  53. validmind/tests/data_validation/TimeSeriesHistogram.py +33 -32
  54. validmind/tests/data_validation/TimeSeriesLinePlot.py +17 -10
  55. validmind/tests/data_validation/TimeSeriesMissingValues.py +15 -10
  56. validmind/tests/data_validation/TimeSeriesOutliers.py +37 -33
  57. validmind/tests/data_validation/TooManyZeroValues.py +16 -11
  58. validmind/tests/data_validation/UniqueRows.py +11 -6
  59. validmind/tests/data_validation/WOEBinPlots.py +23 -16
  60. validmind/tests/data_validation/WOEBinTable.py +35 -30
  61. validmind/tests/data_validation/ZivotAndrewsArch.py +34 -28
  62. validmind/tests/data_validation/nlp/CommonWords.py +21 -14
  63. validmind/tests/data_validation/nlp/Hashtags.py +27 -20
  64. validmind/tests/data_validation/nlp/LanguageDetection.py +33 -14
  65. validmind/tests/data_validation/nlp/Mentions.py +21 -15
  66. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +32 -9
  67. validmind/tests/data_validation/nlp/Punctuations.py +24 -20
  68. validmind/tests/data_validation/nlp/Sentiment.py +27 -8
  69. validmind/tests/data_validation/nlp/StopWords.py +26 -19
  70. validmind/tests/data_validation/nlp/TextDescription.py +36 -35
  71. validmind/tests/data_validation/nlp/Toxicity.py +32 -9
  72. validmind/tests/decorator.py +81 -42
  73. validmind/tests/model_validation/BertScore.py +36 -27
  74. validmind/tests/model_validation/BleuScore.py +25 -19
  75. validmind/tests/model_validation/ClusterSizeDistribution.py +38 -34
  76. validmind/tests/model_validation/ContextualRecall.py +35 -13
  77. validmind/tests/model_validation/FeaturesAUC.py +32 -13
  78. validmind/tests/model_validation/MeteorScore.py +46 -33
  79. validmind/tests/model_validation/ModelMetadata.py +32 -64
  80. validmind/tests/model_validation/ModelPredictionResiduals.py +75 -73
  81. validmind/tests/model_validation/RegardScore.py +30 -14
  82. validmind/tests/model_validation/RegressionResidualsPlot.py +10 -5
  83. validmind/tests/model_validation/RougeScore.py +36 -30
  84. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +30 -14
  85. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +27 -30
  86. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +68 -63
  87. validmind/tests/model_validation/TokenDisparity.py +31 -23
  88. validmind/tests/model_validation/ToxicityScore.py +26 -17
  89. validmind/tests/model_validation/embeddings/ClusterDistribution.py +24 -20
  90. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +30 -27
  91. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +7 -5
  92. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +32 -23
  93. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +7 -5
  94. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +15 -11
  95. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +29 -29
  96. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +34 -25
  97. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +38 -26
  98. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +40 -1
  99. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +18 -17
  100. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +40 -45
  101. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +17 -19
  102. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +29 -25
  103. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +38 -28
  104. validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -4
  105. validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
  106. validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
  107. validmind/tests/model_validation/ragas/AspectCritique.py +7 -0
  108. validmind/tests/model_validation/ragas/ContextEntityRecall.py +9 -8
  109. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -4
  110. validmind/tests/model_validation/ragas/ContextRecall.py +5 -4
  111. validmind/tests/model_validation/ragas/Faithfulness.py +5 -4
  112. validmind/tests/model_validation/ragas/utils.py +6 -0
  113. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +19 -12
  114. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +22 -17
  115. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +27 -25
  116. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +7 -5
  117. validmind/tests/model_validation/sklearn/ClusterPerformance.py +40 -78
  118. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +15 -17
  119. validmind/tests/model_validation/sklearn/CompletenessScore.py +17 -11
  120. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -15
  121. validmind/tests/model_validation/sklearn/FeatureImportance.py +95 -0
  122. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +7 -7
  123. validmind/tests/model_validation/sklearn/HomogeneityScore.py +19 -12
  124. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +35 -30
  125. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +10 -5
  126. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +32 -32
  127. validmind/tests/model_validation/sklearn/MinimumF1Score.py +23 -23
  128. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +15 -10
  129. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +26 -19
  130. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +38 -18
  131. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +31 -25
  132. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +8 -6
  133. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +24 -17
  134. validmind/tests/model_validation/sklearn/ROCCurve.py +12 -7
  135. validmind/tests/model_validation/sklearn/RegressionErrors.py +74 -130
  136. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +27 -12
  137. validmind/tests/model_validation/sklearn/{RegressionModelsPerformanceComparison.py → RegressionPerformance.py} +18 -20
  138. validmind/tests/model_validation/sklearn/RegressionR2Square.py +55 -93
  139. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +32 -13
  140. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +113 -73
  141. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +7 -5
  142. validmind/tests/model_validation/sklearn/SilhouettePlot.py +27 -19
  143. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +25 -18
  144. validmind/tests/model_validation/sklearn/VMeasure.py +14 -13
  145. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +7 -5
  146. validmind/tests/model_validation/statsmodels/AutoARIMA.py +24 -18
  147. validmind/tests/model_validation/statsmodels/BoxPierce.py +14 -10
  148. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +73 -104
  149. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +19 -12
  150. validmind/tests/model_validation/statsmodels/GINITable.py +44 -77
  151. validmind/tests/model_validation/statsmodels/JarqueBera.py +27 -22
  152. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +33 -34
  153. validmind/tests/model_validation/statsmodels/LJungBox.py +32 -28
  154. validmind/tests/model_validation/statsmodels/Lilliefors.py +27 -24
  155. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +87 -119
  156. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +100 -0
  157. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +14 -9
  158. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +17 -13
  159. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +46 -43
  160. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +38 -36
  161. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +30 -28
  162. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +18 -11
  163. validmind/tests/model_validation/statsmodels/RunsTest.py +32 -28
  164. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +75 -107
  165. validmind/tests/model_validation/statsmodels/ShapiroWilk.py +15 -8
  166. validmind/tests/ongoing_monitoring/FeatureDrift.py +10 -6
  167. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +31 -25
  168. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +29 -21
  169. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +31 -23
  170. validmind/tests/prompt_validation/Bias.py +14 -11
  171. validmind/tests/prompt_validation/Clarity.py +16 -14
  172. validmind/tests/prompt_validation/Conciseness.py +7 -5
  173. validmind/tests/prompt_validation/Delimitation.py +23 -22
  174. validmind/tests/prompt_validation/NegativeInstruction.py +7 -5
  175. validmind/tests/prompt_validation/Robustness.py +12 -10
  176. validmind/tests/prompt_validation/Specificity.py +13 -11
  177. validmind/tests/prompt_validation/ai_powered_test.py +6 -0
  178. validmind/tests/run.py +68 -23
  179. validmind/unit_metrics/__init__.py +81 -144
  180. validmind/unit_metrics/classification/{sklearn/Accuracy.py → Accuracy.py} +1 -1
  181. validmind/unit_metrics/classification/{sklearn/F1.py → F1.py} +1 -1
  182. validmind/unit_metrics/classification/{sklearn/Precision.py → Precision.py} +1 -1
  183. validmind/unit_metrics/classification/{sklearn/ROC_AUC.py → ROC_AUC.py} +1 -2
  184. validmind/unit_metrics/classification/{sklearn/Recall.py → Recall.py} +1 -1
  185. validmind/unit_metrics/regression/{sklearn/AdjustedRSquaredScore.py → AdjustedRSquaredScore.py} +1 -1
  186. validmind/unit_metrics/regression/GiniCoefficient.py +1 -1
  187. validmind/unit_metrics/regression/HuberLoss.py +1 -1
  188. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -1
  189. validmind/unit_metrics/regression/{sklearn/MeanAbsoluteError.py → MeanAbsoluteError.py} +1 -1
  190. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -1
  191. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -1
  192. validmind/unit_metrics/regression/{sklearn/MeanSquaredError.py → MeanSquaredError.py} +1 -1
  193. validmind/unit_metrics/regression/QuantileLoss.py +1 -1
  194. validmind/unit_metrics/regression/{sklearn/RSquaredScore.py → RSquaredScore.py} +1 -1
  195. validmind/unit_metrics/regression/{sklearn/RootMeanSquaredError.py → RootMeanSquaredError.py} +1 -1
  196. validmind/vm_models/dataset/dataset.py +2 -0
  197. validmind/vm_models/figure.py +5 -0
  198. validmind/vm_models/test/result_wrapper.py +93 -132
  199. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/METADATA +1 -1
  200. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/RECORD +203 -210
  201. validmind/tests/data_validation/ANOVAOneWayTable.py +0 -138
  202. validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -142
  203. validmind/tests/data_validation/BivariateHistograms.py +0 -117
  204. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -124
  205. validmind/tests/data_validation/MissingValuesRisk.py +0 -88
  206. validmind/tests/model_validation/ModelMetadataComparison.py +0 -59
  207. validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -83
  208. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -135
  209. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -103
  210. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/LICENSE +0 -0
  211. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/WHEEL +0 -0
  212. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/entry_points.txt +0 -0
@@ -1,138 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- import pandas as pd
8
- from scipy.stats import f_oneway
9
-
10
- from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
11
-
12
-
13
- @dataclass
14
- class ANOVAOneWayTable(Metric):
15
- """
16
- Applies one-way ANOVA (Analysis of Variance) to identify statistically significant numerical features in the
17
- dataset.
18
-
19
- **Purpose**: The ANOVA (Analysis of Variance) One-Way Table metric is utilized to determine whether the mean of
20
- numerical variables differs across different groups identified by target or categorical variables. Its primary
21
- purpose is to scrutinize the significant impact of categorical variables on numerical ones. This method proves
22
- essential in identifying statistically significant features corresponding to the target variable present in the
23
- dataset.
24
-
25
- **Test Mechanism**: The testing mechanism involves the ANOVA F-test's performance on each numerical variable
26
- against the target. If no specific features are mentioned, all numerical features are tested. A p-value is produced
27
- for each test and compared against a certain threshold (default being 0.05 if not specified). If the p-value is
28
- less than or equal to this threshold, the feature is marked as 'Pass', indicating significant mean difference
29
- across the groups. Otherwise, it's marked as 'Fail'. The test produces a DataFrame that includes variable name, F
30
- statistic value, p-value, threshold, and pass/fail status for every numerical variable.
31
-
32
- **Signs of High Risk**:
33
- - A large number of 'Fail' results in the ANOVA F-test could signify high risk or underperformance in the model.
34
- This issue may arise when multiple numerical variables in the dataset don't exhibit any significant difference
35
- across the target variable groups.
36
- - Features with high p-values also indicate a high risk as they imply a greater chance of obtaining observed data
37
- given that the null hypothesis is true.
38
-
39
- **Strengths**:
40
- - The ANOVA One Way Table is highly efficient in identifying statistically significant features by simultaneously
41
- comparing group means.
42
- - Its flexibility allows the testing of all numerical features in the dataset when no specific ones are mentioned.
43
- - This metric provides a convenient method to measure the statistical significance of numerical variables and
44
- assists in selecting those variables influencing the classifier's predictions considerably.
45
-
46
- **Limitations**:
47
- - This metric assumes that the data is normally distributed, which may not always be the case leading to erroneous
48
- test results.
49
- - The sensitivity of the F-test to variance changes may hinder this metric's effectiveness, especially for datasets
50
- with high variance.
51
- - The ANOVA One Way test does not specify which group means differ statistically from others; it strictly asserts
52
- the existence of a difference.
53
- - The metric fails to provide insights into variable interactions, and significant effects due to these
54
- interactions could easily be overlooked.
55
- """
56
-
57
- name = "anova_one_way_table"
58
- required_inputs = ["dataset"]
59
- default_params = {"features": None, "p_threshold": 0.05}
60
- tasks = ["classification"]
61
- tags = [
62
- "tabular_data",
63
- "statistical_test",
64
- "multiclass_classification",
65
- "binary_classification",
66
- "numerical_data",
67
- ]
68
-
69
- def run(self):
70
- features = self.params["features"]
71
- p_threshold = self.params["p_threshold"]
72
-
73
- # Select all numerical features if none are specified
74
- if features is None:
75
- features = self.inputs.dataset.feature_columns_numeric
76
-
77
- anova_results = self.anova_numerical_features(features, p_threshold)
78
-
79
- return self.cache_results(
80
- {
81
- "anova_results": anova_results.to_dict(orient="records"),
82
- }
83
- )
84
-
85
- def anova_numerical_features(self, features, p_threshold):
86
- target_column = self.inputs.dataset.target_column
87
- df = self.inputs.dataset.df
88
-
89
- # Ensure the columns exist in the dataframe
90
- for var in features:
91
- if var not in df.columns:
92
- raise ValueError(f"The column '{var}' does not exist in the dataframe.")
93
- if target_column not in df.columns:
94
- raise ValueError(
95
- f"The target column '{target_column}' does not exist in the dataframe."
96
- )
97
-
98
- # Ensure the target variable is not included in num_vars
99
- if target_column in features:
100
- features.remove(target_column)
101
-
102
- results = []
103
-
104
- for var in features:
105
- # Perform the ANOVA test
106
- class_0 = df[df[target_column] == 0][var]
107
- class_1 = df[df[target_column] == 1][var]
108
-
109
- f, p = f_oneway(class_0, class_1)
110
-
111
- # Add the result to the list of results
112
- results.append(
113
- [var, f, p, p_threshold, "Pass" if p <= p_threshold else "Fail"]
114
- )
115
-
116
- # Convert results to a DataFrame and return
117
- results_df = pd.DataFrame(
118
- results,
119
- columns=["Variable", "F statistic", "p-value", "Threshold", "Pass/Fail"],
120
- )
121
-
122
- # Sort by p-value in ascending order
123
- results_df = results_df.sort_values(by="p-value")
124
-
125
- return results_df
126
-
127
- def summary(self, metric_value):
128
- anova_results_table = metric_value["anova_results"]
129
- return ResultSummary(
130
- results=[
131
- ResultTable(
132
- data=anova_results_table,
133
- metadata=ResultTableMetadata(
134
- title="ANOVA F-Test Results for Numerical Features"
135
- ),
136
- )
137
- ]
138
- )
@@ -1,142 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- import matplotlib.colors as mcolors
8
- import matplotlib.pyplot as plt
9
- import numpy as np
10
-
11
- from validmind.vm_models import Figure, Metric
12
-
13
-
14
- @dataclass
15
- class BivariateFeaturesBarPlots(Metric):
16
- """
17
- Generates visual bar plots to analyze the relationship between paired features within categorical data in the model.
18
-
19
- **Purpose**: The BivariateFeaturesBarPlots metric is intended to perform a visual analysis of categorical data
20
- within the model. The goal is to assess and understand the specific relationships between various feature pairs,
21
- while simultaneously highlighting the model's target variable. This form of bivariate plotting is immensely
22
- beneficial in uncovering trends, correlations, patterns, or inconsistencies that may not be readily apparent within
23
- raw tabular data.
24
-
25
- **Test Mechanism**: These tests establish bar plots for each pair of features defined within the parameters. The
26
- dataset is grouped by each feature pair and then calculates the mean of the target variable within each specific
27
- grouping. Each group is represented via a bar in the plot, and the height of this bar aligns with the calculated
28
- mean. The colors assigned to these bars are based on the categorical section to which they pertain: these colors
29
- can either come from a colormap or generated anew if the total number of categories exceeds the current colormap's
30
- scope.
31
-
32
- **Signs of High Risk**:
33
- - If any values are found missing or inconsistent within the feature pairs.
34
- - If there exist large discrepancies or irregularities between the mean values of certain categories within feature
35
- pairs.
36
- - If the parameters for feature pairs have not been specified or if they were wrongly defined.
37
-
38
- **Strengths**:
39
- - The BivariateFeaturesBarPlots provides a clear, visual comprehension of the relationships between feature pairs
40
- and the target variable.
41
- - It allows an easy comparison between different categories within feature pairs.
42
- - The metric can handle a diverse array of categorical data, enhancing its universal applicability.
43
- - It is highly customizable due to its allowance for users to define feature pairs based on their specific
44
- requirements.
45
-
46
- **Limitations**:
47
- - It can only be used with categorical data, limiting its usability with numerical or textual data.
48
- - It relies on manual input for feature pairs, which could result in the overlooking of important feature pairs if
49
- not chosen judiciously.
50
- - The generated bar plots could become overly cluttered and difficult to decipher when dealing with feature pairs
51
- with a large number of categories.
52
- - This metric only provides a visual evaluation and fails to offer any numerical or statistical measures to
53
- quantify the relationship between feature pairs.
54
- """
55
-
56
- name = "bivariate_features_bar_plots"
57
- required_inputs = ["dataset"]
58
- default_params = {"features_pairs": None}
59
- tasks = ["classification"]
60
- tags = [
61
- "tabular_data",
62
- "categorical_data",
63
- "binary_classification",
64
- "multiclass_classification",
65
- "visualization",
66
- ]
67
-
68
- def run(self):
69
- features_pairs = self.params["features_pairs"]
70
-
71
- if features_pairs is None:
72
- raise ValueError(
73
- "The features_pairs parameter is required for this metric."
74
- )
75
-
76
- figures = self.plot_bivariate_bar(features_pairs)
77
-
78
- return self.cache_results(figures=figures)
79
-
80
- def plot_bivariate_bar(self, features_pairs):
81
- status_var = self.inputs.dataset.target_column
82
- figures = []
83
- for x, hue in features_pairs.items():
84
- df = self.inputs.dataset.df
85
-
86
- means = df.groupby([x, hue])[status_var].mean().unstack().reset_index()
87
- hue_categories = means.columns[1:]
88
-
89
- n = len(hue_categories)
90
- width = 1 / (n + 1)
91
-
92
- plt.figure()
93
-
94
- # Number of colors in the colormap
95
- num_colors = len(plt.cm.get_cmap("tab10").colors)
96
-
97
- if n <= num_colors:
98
- # Use the colors from the colormap if there are enough
99
- color_palette = {
100
- category: color
101
- for category, color in zip(
102
- hue_categories, plt.cm.get_cmap("tab10").colors
103
- )
104
- }
105
- else:
106
- # Generate a larger set of colors if needed
107
- hues = np.linspace(0, 1, n + 1)[
108
- :-1
109
- ] # exclude the last value which is equal to 1
110
- color_palette = {
111
- category: mcolors.hsv_to_rgb(
112
- (h, 1, 1)
113
- ) # replace 1, 1 with desired saturation and value
114
- for category, h in zip(hue_categories, hues)
115
- }
116
-
117
- for i, hue_category in enumerate(hue_categories):
118
- plt.bar(
119
- np.arange(len(means)) + i * width,
120
- means[hue_category],
121
- color=color_palette[hue_category],
122
- alpha=0.7,
123
- label=hue_category,
124
- width=width,
125
- )
126
-
127
- plt.title(x + " by " + hue)
128
- plt.xlabel(x)
129
- plt.ylabel("Default Ratio")
130
- plt.xticks(ticks=np.arange(len(means)), labels=means[x], rotation=90)
131
- plt.legend()
132
- plt.show()
133
-
134
- figures.append(
135
- Figure(
136
- for_object=self, key=f"{self.key}:{x}_{hue}", figure=plt.figure()
137
- )
138
- )
139
-
140
- plt.close("all")
141
-
142
- return figures
@@ -1,117 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- import matplotlib.pyplot as plt
8
- import seaborn as sns
9
-
10
- from validmind.vm_models import Figure, Metric
11
-
12
-
13
- @dataclass
14
- class BivariateHistograms(Metric):
15
- """
16
- Generates bivariate histograms for paired features, aiding in visual inspection of categorical variables'
17
- distributions and correlations.
18
-
19
- **Purpose**: This metric, dubbed BivariateHistograms, is primarily used for visual data analysis via the inspection
20
- of variable distribution, specifically categorical variables. Its main objective is to ascertain any potential
21
- correlations between these variables and distributions within each defined target class. This is achieved by
22
- offering an intuitive avenue into gaining insights into the characteristics of the data and any plausible patterns
23
- therein.
24
-
25
- **Test Mechanism**: The working mechanism of the BivariateHistograms module revolves around an input dataset and a
26
- series of feature pairs. It uses seaborn's histogram plotting function and matplotlib techniques to create
27
- bivariate histograms for each feature pair in the dataset. Two histograms, stratified by the target column status,
28
- are produced for every pair of features. This enables the telling apart of different target statuses through color
29
- differentiation. The module also offers optional functionality for restricting the data by a specific status
30
- through the target_filter parameter.
31
-
32
- **Signs of High Risk**:
33
- - Irregular or unexpected distributions of data across the different categories.
34
- - Highly skewed data distributions.
35
- - Significant deviations from the perceived 'normal' or anticipated distributions.
36
- - Large discrepancies in distribution patterns between various target statuses.
37
-
38
- **Strengths**:
39
- - Owing to its simplicity, the histogram-based approach is easy to implement and interpret which translates to
40
- quick insights.
41
- - The metrics provides a consolidated view of the distribution of data across different target conditions for each
42
- variable pair, thereby assisting in highlighting potential correlations and patterns.
43
- - It proves advantageous in spotting anomalies, comprehending interactions among features, and facilitating
44
- exploratory data analysis.
45
-
46
- **Limitations**:
47
- - Its simplicity may be a drawback when it comes to spotting intricate or complex patterns in data.
48
- - Overplotting might occur when working with larger datasets.
49
- - The metric is only applicable to categorical data, and offers limited insights for numerical or continuous
50
- variables.
51
- - The interpretation of visual results hinges heavily on the expertise of the observer, possibly leading to
52
- subjective analysis.
53
- """
54
-
55
- name = "bivariate_histograms"
56
- required_inputs = ["dataset"]
57
- default_params = {"features_pairs": None, "target_filter": None}
58
- tasks = ["classification"]
59
- tags = [
60
- "tabular_data",
61
- "categorical_data",
62
- "binary_classification",
63
- "multiclass_classification",
64
- "visualization",
65
- ]
66
-
67
- def plot_bivariate_histogram(self, features_pairs, target_filter):
68
- status_var = self.inputs.dataset.target_column
69
- figures = []
70
- palette = {0: (0.5, 0.5, 0.5, 0.8), 1: "tab:red"}
71
-
72
- for x, y in features_pairs.items():
73
- df = self.inputs.dataset.df
74
- if target_filter is not None:
75
- df = df[df[status_var] == target_filter]
76
-
77
- fig, axes = plt.subplots(2, 1)
78
-
79
- for ax, var in zip(axes, [x, y]):
80
- for status, color in palette.items():
81
- subset = df[df[status_var] == status]
82
- sns.histplot(
83
- subset[var],
84
- ax=ax,
85
- color=color,
86
- edgecolor=None,
87
- kde=True,
88
- label=status_var if status else "Non-" + status_var,
89
- )
90
-
91
- ax.set_title(f"Histogram of {var} by {status_var}")
92
- ax.set_xlabel(var)
93
- ax.legend()
94
-
95
- plt.tight_layout()
96
- plt.show()
97
-
98
- figures.append(
99
- Figure(for_object=self, key=f"{self.key}:{x}_{y}", figure=plt.figure())
100
- )
101
-
102
- plt.close("all")
103
-
104
- return figures
105
-
106
- def run(self):
107
- features_pairs = self.params["features_pairs"]
108
- target_filter = self.params["target_filter"]
109
-
110
- if features_pairs is None:
111
- raise ValueError(
112
- "The features_pairs parameter is required for this metric."
113
- )
114
-
115
- figures = self.plot_bivariate_histogram(features_pairs, target_filter)
116
-
117
- return self.cache_results(figures=figures)
@@ -1,124 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- import matplotlib.pyplot as plt
8
- import seaborn as sns
9
-
10
- from validmind.vm_models import Figure, Metric
11
-
12
-
13
- @dataclass
14
- class HeatmapFeatureCorrelations(Metric):
15
- """
16
- Creates a heatmap to visually represent correlation patterns between pairs of numerical features in a dataset.
17
-
18
- **Purpose:** The HeatmapFeatureCorrelations metric is utilized to evaluate the degree of interrelationships between
19
- pairs of input features within a dataset. This metric allows us to visually comprehend the correlation patterns
20
- through a heatmap, which can be essential in understanding which features may contribute most significantly to the
21
- performance of the model. Features that have high intercorrelation can potentially reduce the model's ability to
22
- learn, thus impacting the overall performance and stability of the machine learning model.
23
-
24
- **Test Mechanism:** The metric executes the correlation test by computing the Pearson correlations for all pairs of
25
- numerical features. It then generates a heatmap plot using seaborn, a Python data visualization library. The
26
- colormap ranges from -1 to 1, indicating perfect negative correlation and perfect positive correlation
27
- respectively. A 'declutter' option is provided which, if set to true, removes variable names and numerical
28
- correlations from the plot to provide a more streamlined view. The size of feature names and correlation
29
- coefficients can be controlled through 'fontsize' parameters.
30
-
31
- **Signs of High Risk:**
32
-
33
- - Indicators of potential risk include features with high absolute correlation values.
34
- - A significant degree of multicollinearity might lead to instabilities in the trained model and can also result in
35
- overfitting.
36
- - The presence of multiple homogeneous blocks of high positive or negative correlation within the plot might
37
- indicate redundant or irrelevant features included within the dataset.
38
-
39
- **Strengths:**
40
-
41
- - The strength of this metric lies in its ability to visually represent the extent and direction of correlation
42
- between any two numeric features, which aids in the interpretation and understanding of complex data relationships.
43
- - The heatmap provides an immediate and intuitively understandable representation, hence, it is extremely useful
44
- for high-dimensional datasets where extracting meaningful relationships might be challenging.
45
-
46
- **Limitations:**
47
-
48
- - The central limitation might be that it can only calculate correlation between numeric features, making it
49
- unsuitable for categorical variables unless they are already numerically encoded in a meaningful manner.
50
- - It uses Pearson's correlation, which only measures linear relationships between features. It may perform poorly
51
- in cases where the relationship is non-linear.
52
- - Large feature sets might result in cluttered and difficult-to-read correlation heatmaps, especially when the
53
- 'declutter' option is set to false.
54
- """
55
-
56
- name = "heatmap_feature_correlations"
57
- required_inputs = ["dataset"]
58
- default_params = {"declutter": None, "fontsize": None, "num_features": None}
59
- tasks = ["classification", "regression"]
60
- tags = ["tabular_data", "visualization", "correlation"]
61
-
62
- def run(self):
63
- features = self.params.get("features")
64
- declutter = self.params.get("declutter", False)
65
- fontsize = self.params.get("fontsize", 13)
66
-
67
- # Filter DataFrame based on num_features
68
- if features is None:
69
- df = self.inputs.dataset.df
70
- else:
71
- df = self.inputs.dataset.df[features]
72
-
73
- figure = self.visualize_correlations(df, declutter, fontsize)
74
-
75
- return self.cache_results(figures=figure)
76
-
77
- def visualize_correlations(self, df, declutter, fontsize):
78
- # Compute Pearson correlations
79
- correlations = df.corr(method="pearson")
80
-
81
- # Create a figure and axes
82
- fig, ax = plt.subplots()
83
-
84
- # If declutter option is true, do not show correlation coefficients and variable names
85
- if declutter:
86
- sns.heatmap(
87
- correlations,
88
- cmap="coolwarm",
89
- vmin=-1,
90
- vmax=1,
91
- ax=ax,
92
- cbar_kws={"label": "Correlation"},
93
- )
94
- ax.set_xticklabels([])
95
- ax.set_yticklabels([])
96
- ax.set_xlabel(f"{df.shape[1]} Numerical Features", fontsize=fontsize)
97
- ax.set_ylabel(f"{df.shape[1]} Numerical Features", fontsize=fontsize)
98
- else:
99
- # For the correlation numbers, you can use the 'annot_kws' argument
100
- sns.heatmap(
101
- correlations,
102
- cmap="coolwarm",
103
- vmin=-1,
104
- vmax=1,
105
- annot=True,
106
- fmt=".2f",
107
- ax=ax,
108
- cbar_kws={"label": "Correlation"},
109
- annot_kws={"size": fontsize},
110
- )
111
- plt.yticks(fontsize=fontsize)
112
- plt.xticks(rotation=90, fontsize=fontsize)
113
-
114
- # To set the fontsize of the color bar, you can iterate over its text elements and set their size
115
- cbar = ax.collections[0].colorbar
116
- cbar.ax.tick_params(labelsize=fontsize)
117
- cbar.set_label("Correlation", size=fontsize)
118
-
119
- # Show the plot
120
- plt.tight_layout()
121
- plt.close("all")
122
-
123
- figure = Figure(for_object=self, key=self.key, figure=fig)
124
- return [figure]
@@ -1,88 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
8
-
9
-
10
- @dataclass
11
- class MissingValuesRisk(Metric):
12
- """
13
- Assesses and quantifies the risk related to missing values in a dataset used for training an ML model.
14
-
15
- **Purpose**: The Missing Values Risk metric is specifically designed to assess and quantify the risk associated
16
- with missing values in the dataset used for machine learning model training. It measures two specific risks: the
17
- percentage of total data that are missing, and the percentage of all variables (columns) that contain some missing
18
- values.
19
-
20
- **Test Mechanism**: Initially, the metric calculates the total number of data points in the dataset and the count
21
- of missing values. It then inspects each variable (column) to determine how many contain at least one missing
22
- datapoint. By methodically counting missing datapoints across the entire dataset and each variable (column), it
23
- identifies the percentage of missing values in the entire dataset and the percentage of variables (columns) with
24
- such values.
25
-
26
- **Signs of High Risk**:
27
-
28
- - Record high percentages in either of the risk measures could suggest a high risk.
29
- - If the dataset indicates a high percentage of missing values, it might significantly undermine the model's
30
- performance and credibility.
31
- - If a significant portion of variables (columns) in the dataset are missing values, this could make the model
32
- susceptible to bias and overfitting.
33
-
34
- **Strengths**:
35
-
36
- - The metric offers valuable insights into the readiness of a dataset for model training as missing values can
37
- heavily destabilize both the model's performance and predictive capabilities.
38
- - The metric's quantification of the risks caused by missing values allows for the use of targeted methods to
39
- manage these values correctly- either through removal, imputation, or alternative strategies.
40
- - The metric has the flexibility to be applied to both classification and regression assignments, maintaining its
41
- utility across a wide range of models and scenarios.
42
-
43
- **Limitations**:
44
-
45
- - The metric primarily identifies and quantifies the risk associated with missing values without suggesting
46
- specific mitigation strategies.
47
- - The metric does not ascertain whether the missing values are random or associated with an underlying issue in the
48
- stages of data collection or preprocessing.
49
- - However, the identification of the presence and scale of missing data is the essential initial step towards
50
- improving data quality.
51
- """
52
-
53
- name = "missing_values_risk"
54
- required_inputs = ["dataset"]
55
- tasks = ["classification", "regression"]
56
- tags = ["tabular_data", "data_quality", "risk_analysis"]
57
-
58
- def run(self):
59
- total_cells = self.inputs.dataset.df.size
60
- total_missing = self.inputs.dataset.df.isnull().sum().sum()
61
- total_columns = self.inputs.dataset.df.shape[1]
62
- columns_with_missing = self.inputs.dataset.df.isnull().any().sum()
63
-
64
- risk_measures = {
65
- "Missing Values in the Dataset": round(
66
- (total_missing / total_cells) * 100, 2
67
- ),
68
- "Variables with Missing Values": round(
69
- (columns_with_missing / total_columns) * 100, 2
70
- ),
71
- }
72
-
73
- return self.cache_results(risk_measures)
74
-
75
- def summary(self, metric_value):
76
- risk_measures_table = [
77
- {"Risk Metric": measure, "Value (%)": value}
78
- for measure, value in metric_value.items()
79
- ]
80
-
81
- return ResultSummary(
82
- results=[
83
- ResultTable(
84
- data=risk_measures_table,
85
- metadata=ResultTableMetadata(title="Missing Values Risk Measures"),
86
- ),
87
- ]
88
- )
@@ -1,59 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- import pandas as pd
6
-
7
- from validmind import tags, tasks
8
- from validmind.utils import get_model_info
9
-
10
-
11
- @tags("model_training", "metadata")
12
- @tasks("regression", "time_series_forecasting")
13
- def ModelMetadataComparison(models):
14
- """
15
- Compare metadata of different models and generate a summary table with the results.
16
-
17
- **Purpose**: The purpose of this function is to compare the metadata of different models, including information about their architecture, framework, framework version, and programming language.
18
-
19
- **Test Mechanism**: The function retrieves the metadata for each model using `get_model_info`, renames columns according to a predefined set of labels, and compiles this information into a summary table.
20
-
21
- **Signs of High Risk**:
22
- - Inconsistent or missing metadata across models can indicate potential issues in model documentation or management.
23
- - Significant differences in framework versions or programming languages might pose challenges in model integration and deployment.
24
-
25
- **Strengths**:
26
- - Provides a clear comparison of essential model metadata.
27
- - Standardizes metadata labels for easier interpretation and comparison.
28
- - Helps identify potential compatibility or consistency issues across models.
29
-
30
- **Limitations**:
31
- - Assumes that the `get_model_info` function returns all necessary metadata fields.
32
- - Relies on the correctness and completeness of the metadata provided by each model.
33
- - Does not include detailed parameter information, focusing instead on high-level metadata.
34
- """
35
- column_labels = {
36
- "architecture": "Modeling Technique",
37
- "framework": "Modeling Framework",
38
- "framework_version": "Framework Version",
39
- "language": "Programming Language",
40
- }
41
-
42
- description = []
43
-
44
- for model in models:
45
- model_info = get_model_info(model)
46
-
47
- # Rename columns based on provided labels
48
- model_info_renamed = {
49
- column_labels.get(k, k): v for k, v in model_info.items() if k != "params"
50
- }
51
-
52
- # Add model name or identifier if available
53
- model_info_renamed = {"Model Name": model.input_id, **model_info_renamed}
54
-
55
- description.append(model_info_renamed)
56
-
57
- description_df = pd.DataFrame(description)
58
-
59
- return description_df