validmind 2.5.8__py3-none-any.whl → 2.5.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/test_descriptions.py +80 -119
  3. validmind/ai/test_result_description/config.yaml +29 -0
  4. validmind/ai/test_result_description/context.py +73 -0
  5. validmind/ai/test_result_description/image_processing.py +124 -0
  6. validmind/ai/test_result_description/system.jinja +39 -0
  7. validmind/ai/test_result_description/user.jinja +25 -0
  8. validmind/api_client.py +89 -43
  9. validmind/client.py +2 -2
  10. validmind/client_config.py +11 -14
  11. validmind/datasets/credit_risk/__init__.py +1 -0
  12. validmind/datasets/credit_risk/datasets/lending_club_biased.csv.gz +0 -0
  13. validmind/datasets/credit_risk/lending_club_bias.py +142 -0
  14. validmind/datasets/regression/fred_timeseries.py +67 -138
  15. validmind/template.py +1 -0
  16. validmind/test_suites/__init__.py +0 -2
  17. validmind/test_suites/statsmodels_timeseries.py +1 -1
  18. validmind/test_suites/summarization.py +0 -1
  19. validmind/test_suites/time_series.py +0 -43
  20. validmind/tests/__types__.py +14 -15
  21. validmind/tests/data_validation/ACFandPACFPlot.py +15 -13
  22. validmind/tests/data_validation/ADF.py +31 -24
  23. validmind/tests/data_validation/AutoAR.py +9 -9
  24. validmind/tests/data_validation/AutoMA.py +23 -16
  25. validmind/tests/data_validation/AutoSeasonality.py +18 -16
  26. validmind/tests/data_validation/AutoStationarity.py +21 -16
  27. validmind/tests/data_validation/BivariateScatterPlots.py +67 -96
  28. validmind/tests/{model_validation/statsmodels → data_validation}/BoxPierce.py +34 -34
  29. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +85 -124
  30. validmind/tests/data_validation/ClassImbalance.py +15 -12
  31. validmind/tests/data_validation/DFGLSArch.py +19 -13
  32. validmind/tests/data_validation/DatasetDescription.py +17 -11
  33. validmind/tests/data_validation/DatasetSplit.py +7 -5
  34. validmind/tests/data_validation/DescriptiveStatistics.py +28 -21
  35. validmind/tests/data_validation/Duplicates.py +33 -25
  36. validmind/tests/data_validation/EngleGrangerCoint.py +35 -33
  37. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +59 -71
  38. validmind/tests/data_validation/HighCardinality.py +19 -12
  39. validmind/tests/data_validation/HighPearsonCorrelation.py +27 -22
  40. validmind/tests/data_validation/IQROutliersBarPlot.py +13 -10
  41. validmind/tests/data_validation/IQROutliersTable.py +40 -36
  42. validmind/tests/data_validation/IsolationForestOutliers.py +21 -14
  43. validmind/tests/data_validation/JarqueBera.py +70 -0
  44. validmind/tests/data_validation/KPSS.py +34 -29
  45. validmind/tests/data_validation/LJungBox.py +66 -0
  46. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +22 -15
  47. validmind/tests/data_validation/MissingValues.py +32 -27
  48. validmind/tests/data_validation/MissingValuesBarPlot.py +25 -21
  49. validmind/tests/data_validation/PearsonCorrelationMatrix.py +71 -84
  50. validmind/tests/data_validation/PhillipsPerronArch.py +37 -30
  51. validmind/tests/data_validation/ProtectedClassesCombination.py +197 -0
  52. validmind/tests/data_validation/ProtectedClassesDescription.py +130 -0
  53. validmind/tests/data_validation/ProtectedClassesDisparity.py +133 -0
  54. validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +172 -0
  55. validmind/tests/data_validation/RollingStatsPlot.py +31 -23
  56. validmind/tests/data_validation/RunsTest.py +72 -0
  57. validmind/tests/data_validation/ScatterPlot.py +63 -78
  58. validmind/tests/data_validation/SeasonalDecompose.py +38 -34
  59. validmind/tests/{model_validation/statsmodels → data_validation}/ShapiroWilk.py +35 -30
  60. validmind/tests/data_validation/Skewness.py +35 -37
  61. validmind/tests/data_validation/SpreadPlot.py +35 -35
  62. validmind/tests/data_validation/TabularCategoricalBarPlots.py +23 -17
  63. validmind/tests/data_validation/TabularDateTimeHistograms.py +21 -13
  64. validmind/tests/data_validation/TabularDescriptionTables.py +51 -16
  65. validmind/tests/data_validation/TabularNumericalHistograms.py +25 -22
  66. validmind/tests/data_validation/TargetRateBarPlots.py +21 -14
  67. validmind/tests/data_validation/TimeSeriesDescription.py +25 -18
  68. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +23 -17
  69. validmind/tests/data_validation/TimeSeriesFrequency.py +24 -17
  70. validmind/tests/data_validation/TimeSeriesHistogram.py +33 -32
  71. validmind/tests/data_validation/TimeSeriesLinePlot.py +17 -10
  72. validmind/tests/data_validation/TimeSeriesMissingValues.py +15 -10
  73. validmind/tests/data_validation/TimeSeriesOutliers.py +37 -33
  74. validmind/tests/data_validation/TooManyZeroValues.py +16 -11
  75. validmind/tests/data_validation/UniqueRows.py +11 -6
  76. validmind/tests/data_validation/WOEBinPlots.py +23 -16
  77. validmind/tests/data_validation/WOEBinTable.py +35 -30
  78. validmind/tests/data_validation/ZivotAndrewsArch.py +34 -28
  79. validmind/tests/data_validation/nlp/CommonWords.py +21 -14
  80. validmind/tests/data_validation/nlp/Hashtags.py +42 -40
  81. validmind/tests/data_validation/nlp/LanguageDetection.py +33 -14
  82. validmind/tests/data_validation/nlp/Mentions.py +21 -15
  83. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +32 -9
  84. validmind/tests/data_validation/nlp/Punctuations.py +24 -20
  85. validmind/tests/data_validation/nlp/Sentiment.py +27 -8
  86. validmind/tests/data_validation/nlp/StopWords.py +26 -19
  87. validmind/tests/data_validation/nlp/TextDescription.py +39 -36
  88. validmind/tests/data_validation/nlp/Toxicity.py +32 -9
  89. validmind/tests/decorator.py +81 -42
  90. validmind/tests/model_validation/BertScore.py +36 -27
  91. validmind/tests/model_validation/BleuScore.py +25 -19
  92. validmind/tests/model_validation/ClusterSizeDistribution.py +38 -34
  93. validmind/tests/model_validation/ContextualRecall.py +38 -13
  94. validmind/tests/model_validation/FeaturesAUC.py +32 -13
  95. validmind/tests/model_validation/MeteorScore.py +46 -33
  96. validmind/tests/model_validation/ModelMetadata.py +32 -64
  97. validmind/tests/model_validation/ModelPredictionResiduals.py +75 -73
  98. validmind/tests/model_validation/RegardScore.py +30 -14
  99. validmind/tests/model_validation/RegressionResidualsPlot.py +10 -5
  100. validmind/tests/model_validation/RougeScore.py +36 -30
  101. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +30 -14
  102. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +27 -30
  103. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +68 -63
  104. validmind/tests/model_validation/TokenDisparity.py +31 -23
  105. validmind/tests/model_validation/ToxicityScore.py +26 -17
  106. validmind/tests/model_validation/embeddings/ClusterDistribution.py +24 -20
  107. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +30 -27
  108. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +7 -5
  109. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +32 -23
  110. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +7 -5
  111. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +15 -11
  112. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +29 -29
  113. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +34 -25
  114. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +38 -26
  115. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +40 -1
  116. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +18 -17
  117. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +40 -45
  118. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +17 -19
  119. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +29 -25
  120. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +38 -28
  121. validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -4
  122. validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
  123. validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
  124. validmind/tests/model_validation/ragas/AspectCritique.py +12 -6
  125. validmind/tests/model_validation/ragas/ContextEntityRecall.py +9 -8
  126. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -4
  127. validmind/tests/model_validation/ragas/ContextRecall.py +5 -4
  128. validmind/tests/model_validation/ragas/ContextUtilization.py +155 -0
  129. validmind/tests/model_validation/ragas/Faithfulness.py +5 -4
  130. validmind/tests/model_validation/ragas/NoiseSensitivity.py +152 -0
  131. validmind/tests/model_validation/ragas/utils.py +6 -0
  132. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +19 -12
  133. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +22 -17
  134. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +27 -25
  135. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +7 -5
  136. validmind/tests/model_validation/sklearn/ClusterPerformance.py +40 -78
  137. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +15 -17
  138. validmind/tests/model_validation/sklearn/CompletenessScore.py +17 -11
  139. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -15
  140. validmind/tests/model_validation/sklearn/FeatureImportance.py +95 -0
  141. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +7 -7
  142. validmind/tests/model_validation/sklearn/HomogeneityScore.py +19 -12
  143. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +35 -30
  144. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +10 -5
  145. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +32 -32
  146. validmind/tests/model_validation/sklearn/MinimumF1Score.py +23 -23
  147. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +15 -10
  148. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +26 -19
  149. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +38 -18
  150. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +32 -26
  151. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +8 -6
  152. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +24 -17
  153. validmind/tests/model_validation/sklearn/ROCCurve.py +12 -7
  154. validmind/tests/model_validation/sklearn/RegressionErrors.py +74 -130
  155. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +27 -12
  156. validmind/tests/model_validation/sklearn/{RegressionModelsPerformanceComparison.py → RegressionPerformance.py} +18 -20
  157. validmind/tests/model_validation/sklearn/RegressionR2Square.py +55 -94
  158. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +32 -13
  159. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +36 -32
  160. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +66 -5
  161. validmind/tests/model_validation/sklearn/SilhouettePlot.py +27 -19
  162. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +25 -18
  163. validmind/tests/model_validation/sklearn/VMeasure.py +14 -13
  164. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +7 -5
  165. validmind/tests/model_validation/statsmodels/AutoARIMA.py +24 -18
  166. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +73 -104
  167. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +59 -32
  168. validmind/tests/model_validation/statsmodels/GINITable.py +44 -77
  169. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +33 -34
  170. validmind/tests/model_validation/statsmodels/Lilliefors.py +27 -24
  171. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +86 -119
  172. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +100 -0
  173. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +14 -9
  174. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +17 -13
  175. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +46 -43
  176. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +38 -36
  177. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +30 -28
  178. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +18 -11
  179. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +75 -107
  180. validmind/tests/ongoing_monitoring/FeatureDrift.py +10 -6
  181. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +31 -25
  182. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +29 -21
  183. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +31 -23
  184. validmind/tests/prompt_validation/Bias.py +14 -11
  185. validmind/tests/prompt_validation/Clarity.py +16 -14
  186. validmind/tests/prompt_validation/Conciseness.py +7 -5
  187. validmind/tests/prompt_validation/Delimitation.py +23 -22
  188. validmind/tests/prompt_validation/NegativeInstruction.py +7 -5
  189. validmind/tests/prompt_validation/Robustness.py +12 -10
  190. validmind/tests/prompt_validation/Specificity.py +13 -11
  191. validmind/tests/prompt_validation/ai_powered_test.py +6 -0
  192. validmind/tests/run.py +68 -23
  193. validmind/unit_metrics/__init__.py +81 -144
  194. validmind/unit_metrics/classification/{sklearn/Accuracy.py → Accuracy.py} +1 -1
  195. validmind/unit_metrics/classification/{sklearn/F1.py → F1.py} +1 -1
  196. validmind/unit_metrics/classification/{sklearn/Precision.py → Precision.py} +1 -1
  197. validmind/unit_metrics/classification/{sklearn/ROC_AUC.py → ROC_AUC.py} +1 -2
  198. validmind/unit_metrics/classification/{sklearn/Recall.py → Recall.py} +1 -1
  199. validmind/unit_metrics/regression/{sklearn/AdjustedRSquaredScore.py → AdjustedRSquaredScore.py} +1 -1
  200. validmind/unit_metrics/regression/GiniCoefficient.py +1 -1
  201. validmind/unit_metrics/regression/HuberLoss.py +1 -1
  202. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -1
  203. validmind/unit_metrics/regression/{sklearn/MeanAbsoluteError.py → MeanAbsoluteError.py} +1 -1
  204. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -1
  205. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -1
  206. validmind/unit_metrics/regression/{sklearn/MeanSquaredError.py → MeanSquaredError.py} +1 -1
  207. validmind/unit_metrics/regression/QuantileLoss.py +1 -1
  208. validmind/unit_metrics/regression/{sklearn/RSquaredScore.py → RSquaredScore.py} +1 -1
  209. validmind/unit_metrics/regression/{sklearn/RootMeanSquaredError.py → RootMeanSquaredError.py} +1 -1
  210. validmind/utils.py +4 -0
  211. validmind/vm_models/dataset/dataset.py +2 -0
  212. validmind/vm_models/figure.py +5 -0
  213. validmind/vm_models/test/metric.py +1 -0
  214. validmind/vm_models/test/result_wrapper.py +143 -158
  215. validmind/vm_models/test/threshold_test.py +1 -0
  216. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/METADATA +4 -3
  217. validmind-2.5.18.dist-info/RECORD +324 -0
  218. validmind/tests/data_validation/ANOVAOneWayTable.py +0 -138
  219. validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -142
  220. validmind/tests/data_validation/BivariateHistograms.py +0 -117
  221. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -124
  222. validmind/tests/data_validation/MissingValuesRisk.py +0 -88
  223. validmind/tests/model_validation/ModelMetadataComparison.py +0 -59
  224. validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -83
  225. validmind/tests/model_validation/statsmodels/JarqueBera.py +0 -73
  226. validmind/tests/model_validation/statsmodels/LJungBox.py +0 -66
  227. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -135
  228. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -103
  229. validmind/tests/model_validation/statsmodels/RunsTest.py +0 -71
  230. validmind-2.5.8.dist-info/RECORD +0 -318
  231. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/LICENSE +0 -0
  232. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/WHEEL +0 -0
  233. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/entry_points.txt +0 -0
@@ -1,117 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- import matplotlib.pyplot as plt
8
- import seaborn as sns
9
-
10
- from validmind.vm_models import Figure, Metric
11
-
12
-
13
- @dataclass
14
- class BivariateHistograms(Metric):
15
- """
16
- Generates bivariate histograms for paired features, aiding in visual inspection of categorical variables'
17
- distributions and correlations.
18
-
19
- **Purpose**: This metric, dubbed BivariateHistograms, is primarily used for visual data analysis via the inspection
20
- of variable distribution, specifically categorical variables. Its main objective is to ascertain any potential
21
- correlations between these variables and distributions within each defined target class. This is achieved by
22
- offering an intuitive avenue into gaining insights into the characteristics of the data and any plausible patterns
23
- therein.
24
-
25
- **Test Mechanism**: The working mechanism of the BivariateHistograms module revolves around an input dataset and a
26
- series of feature pairs. It uses seaborn's histogram plotting function and matplotlib techniques to create
27
- bivariate histograms for each feature pair in the dataset. Two histograms, stratified by the target column status,
28
- are produced for every pair of features. This enables the telling apart of different target statuses through color
29
- differentiation. The module also offers optional functionality for restricting the data by a specific status
30
- through the target_filter parameter.
31
-
32
- **Signs of High Risk**:
33
- - Irregular or unexpected distributions of data across the different categories.
34
- - Highly skewed data distributions.
35
- - Significant deviations from the perceived 'normal' or anticipated distributions.
36
- - Large discrepancies in distribution patterns between various target statuses.
37
-
38
- **Strengths**:
39
- - Owing to its simplicity, the histogram-based approach is easy to implement and interpret which translates to
40
- quick insights.
41
- - The metrics provides a consolidated view of the distribution of data across different target conditions for each
42
- variable pair, thereby assisting in highlighting potential correlations and patterns.
43
- - It proves advantageous in spotting anomalies, comprehending interactions among features, and facilitating
44
- exploratory data analysis.
45
-
46
- **Limitations**:
47
- - Its simplicity may be a drawback when it comes to spotting intricate or complex patterns in data.
48
- - Overplotting might occur when working with larger datasets.
49
- - The metric is only applicable to categorical data, and offers limited insights for numerical or continuous
50
- variables.
51
- - The interpretation of visual results hinges heavily on the expertise of the observer, possibly leading to
52
- subjective analysis.
53
- """
54
-
55
- name = "bivariate_histograms"
56
- required_inputs = ["dataset"]
57
- default_params = {"features_pairs": None, "target_filter": None}
58
- tasks = ["classification"]
59
- tags = [
60
- "tabular_data",
61
- "categorical_data",
62
- "binary_classification",
63
- "multiclass_classification",
64
- "visualization",
65
- ]
66
-
67
- def plot_bivariate_histogram(self, features_pairs, target_filter):
68
- status_var = self.inputs.dataset.target_column
69
- figures = []
70
- palette = {0: (0.5, 0.5, 0.5, 0.8), 1: "tab:red"}
71
-
72
- for x, y in features_pairs.items():
73
- df = self.inputs.dataset.df
74
- if target_filter is not None:
75
- df = df[df[status_var] == target_filter]
76
-
77
- fig, axes = plt.subplots(2, 1)
78
-
79
- for ax, var in zip(axes, [x, y]):
80
- for status, color in palette.items():
81
- subset = df[df[status_var] == status]
82
- sns.histplot(
83
- subset[var],
84
- ax=ax,
85
- color=color,
86
- edgecolor=None,
87
- kde=True,
88
- label=status_var if status else "Non-" + status_var,
89
- )
90
-
91
- ax.set_title(f"Histogram of {var} by {status_var}")
92
- ax.set_xlabel(var)
93
- ax.legend()
94
-
95
- plt.tight_layout()
96
- plt.show()
97
-
98
- figures.append(
99
- Figure(for_object=self, key=f"{self.key}:{x}_{y}", figure=plt.figure())
100
- )
101
-
102
- plt.close("all")
103
-
104
- return figures
105
-
106
- def run(self):
107
- features_pairs = self.params["features_pairs"]
108
- target_filter = self.params["target_filter"]
109
-
110
- if features_pairs is None:
111
- raise ValueError(
112
- "The features_pairs parameter is required for this metric."
113
- )
114
-
115
- figures = self.plot_bivariate_histogram(features_pairs, target_filter)
116
-
117
- return self.cache_results(figures=figures)
@@ -1,124 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- import matplotlib.pyplot as plt
8
- import seaborn as sns
9
-
10
- from validmind.vm_models import Figure, Metric
11
-
12
-
13
- @dataclass
14
- class HeatmapFeatureCorrelations(Metric):
15
- """
16
- Creates a heatmap to visually represent correlation patterns between pairs of numerical features in a dataset.
17
-
18
- **Purpose:** The HeatmapFeatureCorrelations metric is utilized to evaluate the degree of interrelationships between
19
- pairs of input features within a dataset. This metric allows us to visually comprehend the correlation patterns
20
- through a heatmap, which can be essential in understanding which features may contribute most significantly to the
21
- performance of the model. Features that have high intercorrelation can potentially reduce the model's ability to
22
- learn, thus impacting the overall performance and stability of the machine learning model.
23
-
24
- **Test Mechanism:** The metric executes the correlation test by computing the Pearson correlations for all pairs of
25
- numerical features. It then generates a heatmap plot using seaborn, a Python data visualization library. The
26
- colormap ranges from -1 to 1, indicating perfect negative correlation and perfect positive correlation
27
- respectively. A 'declutter' option is provided which, if set to true, removes variable names and numerical
28
- correlations from the plot to provide a more streamlined view. The size of feature names and correlation
29
- coefficients can be controlled through 'fontsize' parameters.
30
-
31
- **Signs of High Risk:**
32
-
33
- - Indicators of potential risk include features with high absolute correlation values.
34
- - A significant degree of multicollinearity might lead to instabilities in the trained model and can also result in
35
- overfitting.
36
- - The presence of multiple homogeneous blocks of high positive or negative correlation within the plot might
37
- indicate redundant or irrelevant features included within the dataset.
38
-
39
- **Strengths:**
40
-
41
- - The strength of this metric lies in its ability to visually represent the extent and direction of correlation
42
- between any two numeric features, which aids in the interpretation and understanding of complex data relationships.
43
- - The heatmap provides an immediate and intuitively understandable representation, hence, it is extremely useful
44
- for high-dimensional datasets where extracting meaningful relationships might be challenging.
45
-
46
- **Limitations:**
47
-
48
- - The central limitation might be that it can only calculate correlation between numeric features, making it
49
- unsuitable for categorical variables unless they are already numerically encoded in a meaningful manner.
50
- - It uses Pearson's correlation, which only measures linear relationships between features. It may perform poorly
51
- in cases where the relationship is non-linear.
52
- - Large feature sets might result in cluttered and difficult-to-read correlation heatmaps, especially when the
53
- 'declutter' option is set to false.
54
- """
55
-
56
- name = "heatmap_feature_correlations"
57
- required_inputs = ["dataset"]
58
- default_params = {"declutter": None, "fontsize": None, "num_features": None}
59
- tasks = ["classification", "regression"]
60
- tags = ["tabular_data", "visualization", "correlation"]
61
-
62
- def run(self):
63
- features = self.params.get("features")
64
- declutter = self.params.get("declutter", False)
65
- fontsize = self.params.get("fontsize", 13)
66
-
67
- # Filter DataFrame based on num_features
68
- if features is None:
69
- df = self.inputs.dataset.df
70
- else:
71
- df = self.inputs.dataset.df[features]
72
-
73
- figure = self.visualize_correlations(df, declutter, fontsize)
74
-
75
- return self.cache_results(figures=figure)
76
-
77
- def visualize_correlations(self, df, declutter, fontsize):
78
- # Compute Pearson correlations
79
- correlations = df.corr(method="pearson")
80
-
81
- # Create a figure and axes
82
- fig, ax = plt.subplots()
83
-
84
- # If declutter option is true, do not show correlation coefficients and variable names
85
- if declutter:
86
- sns.heatmap(
87
- correlations,
88
- cmap="coolwarm",
89
- vmin=-1,
90
- vmax=1,
91
- ax=ax,
92
- cbar_kws={"label": "Correlation"},
93
- )
94
- ax.set_xticklabels([])
95
- ax.set_yticklabels([])
96
- ax.set_xlabel(f"{df.shape[1]} Numerical Features", fontsize=fontsize)
97
- ax.set_ylabel(f"{df.shape[1]} Numerical Features", fontsize=fontsize)
98
- else:
99
- # For the correlation numbers, you can use the 'annot_kws' argument
100
- sns.heatmap(
101
- correlations,
102
- cmap="coolwarm",
103
- vmin=-1,
104
- vmax=1,
105
- annot=True,
106
- fmt=".2f",
107
- ax=ax,
108
- cbar_kws={"label": "Correlation"},
109
- annot_kws={"size": fontsize},
110
- )
111
- plt.yticks(fontsize=fontsize)
112
- plt.xticks(rotation=90, fontsize=fontsize)
113
-
114
- # To set the fontsize of the color bar, you can iterate over its text elements and set their size
115
- cbar = ax.collections[0].colorbar
116
- cbar.ax.tick_params(labelsize=fontsize)
117
- cbar.set_label("Correlation", size=fontsize)
118
-
119
- # Show the plot
120
- plt.tight_layout()
121
- plt.close("all")
122
-
123
- figure = Figure(for_object=self, key=self.key, figure=fig)
124
- return [figure]
@@ -1,88 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
8
-
9
-
10
- @dataclass
11
- class MissingValuesRisk(Metric):
12
- """
13
- Assesses and quantifies the risk related to missing values in a dataset used for training an ML model.
14
-
15
- **Purpose**: The Missing Values Risk metric is specifically designed to assess and quantify the risk associated
16
- with missing values in the dataset used for machine learning model training. It measures two specific risks: the
17
- percentage of total data that are missing, and the percentage of all variables (columns) that contain some missing
18
- values.
19
-
20
- **Test Mechanism**: Initially, the metric calculates the total number of data points in the dataset and the count
21
- of missing values. It then inspects each variable (column) to determine how many contain at least one missing
22
- datapoint. By methodically counting missing datapoints across the entire dataset and each variable (column), it
23
- identifies the percentage of missing values in the entire dataset and the percentage of variables (columns) with
24
- such values.
25
-
26
- **Signs of High Risk**:
27
-
28
- - Record high percentages in either of the risk measures could suggest a high risk.
29
- - If the dataset indicates a high percentage of missing values, it might significantly undermine the model's
30
- performance and credibility.
31
- - If a significant portion of variables (columns) in the dataset are missing values, this could make the model
32
- susceptible to bias and overfitting.
33
-
34
- **Strengths**:
35
-
36
- - The metric offers valuable insights into the readiness of a dataset for model training as missing values can
37
- heavily destabilize both the model's performance and predictive capabilities.
38
- - The metric's quantification of the risks caused by missing values allows for the use of targeted methods to
39
- manage these values correctly- either through removal, imputation, or alternative strategies.
40
- - The metric has the flexibility to be applied to both classification and regression assignments, maintaining its
41
- utility across a wide range of models and scenarios.
42
-
43
- **Limitations**:
44
-
45
- - The metric primarily identifies and quantifies the risk associated with missing values without suggesting
46
- specific mitigation strategies.
47
- - The metric does not ascertain whether the missing values are random or associated with an underlying issue in the
48
- stages of data collection or preprocessing.
49
- - However, the identification of the presence and scale of missing data is the essential initial step towards
50
- improving data quality.
51
- """
52
-
53
- name = "missing_values_risk"
54
- required_inputs = ["dataset"]
55
- tasks = ["classification", "regression"]
56
- tags = ["tabular_data", "data_quality", "risk_analysis"]
57
-
58
- def run(self):
59
- total_cells = self.inputs.dataset.df.size
60
- total_missing = self.inputs.dataset.df.isnull().sum().sum()
61
- total_columns = self.inputs.dataset.df.shape[1]
62
- columns_with_missing = self.inputs.dataset.df.isnull().any().sum()
63
-
64
- risk_measures = {
65
- "Missing Values in the Dataset": round(
66
- (total_missing / total_cells) * 100, 2
67
- ),
68
- "Variables with Missing Values": round(
69
- (columns_with_missing / total_columns) * 100, 2
70
- ),
71
- }
72
-
73
- return self.cache_results(risk_measures)
74
-
75
- def summary(self, metric_value):
76
- risk_measures_table = [
77
- {"Risk Metric": measure, "Value (%)": value}
78
- for measure, value in metric_value.items()
79
- ]
80
-
81
- return ResultSummary(
82
- results=[
83
- ResultTable(
84
- data=risk_measures_table,
85
- metadata=ResultTableMetadata(title="Missing Values Risk Measures"),
86
- ),
87
- ]
88
- )
@@ -1,59 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- import pandas as pd
6
-
7
- from validmind import tags, tasks
8
- from validmind.utils import get_model_info
9
-
10
-
11
- @tags("model_training", "metadata")
12
- @tasks("regression", "time_series_forecasting")
13
- def ModelMetadataComparison(models):
14
- """
15
- Compare metadata of different models and generate a summary table with the results.
16
-
17
- **Purpose**: The purpose of this function is to compare the metadata of different models, including information about their architecture, framework, framework version, and programming language.
18
-
19
- **Test Mechanism**: The function retrieves the metadata for each model using `get_model_info`, renames columns according to a predefined set of labels, and compiles this information into a summary table.
20
-
21
- **Signs of High Risk**:
22
- - Inconsistent or missing metadata across models can indicate potential issues in model documentation or management.
23
- - Significant differences in framework versions or programming languages might pose challenges in model integration and deployment.
24
-
25
- **Strengths**:
26
- - Provides a clear comparison of essential model metadata.
27
- - Standardizes metadata labels for easier interpretation and comparison.
28
- - Helps identify potential compatibility or consistency issues across models.
29
-
30
- **Limitations**:
31
- - Assumes that the `get_model_info` function returns all necessary metadata fields.
32
- - Relies on the correctness and completeness of the metadata provided by each model.
33
- - Does not include detailed parameter information, focusing instead on high-level metadata.
34
- """
35
- column_labels = {
36
- "architecture": "Modeling Technique",
37
- "framework": "Modeling Framework",
38
- "framework_version": "Framework Version",
39
- "language": "Programming Language",
40
- }
41
-
42
- description = []
43
-
44
- for model in models:
45
- model_info = get_model_info(model)
46
-
47
- # Rename columns based on provided labels
48
- model_info_renamed = {
49
- column_labels.get(k, k): v for k, v in model_info.items() if k != "params"
50
- }
51
-
52
- # Add model name or identifier if available
53
- model_info_renamed = {"Model Name": model.input_id, **model_info_renamed}
54
-
55
- description.append(model_info_renamed)
56
-
57
- description_df = pd.DataFrame(description)
58
-
59
- return description_df
@@ -1,83 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- import pandas as pd
6
- from sklearn.inspection import permutation_importance
7
-
8
- from validmind import tags, tasks
9
-
10
-
11
- @tags("model_explainability", "sklearn")
12
- @tasks("regression", "time_series_forecasting")
13
- def FeatureImportanceComparison(datasets, models, num_features=3):
14
- """
15
- Compare feature importance scores for each model and generate a summary table
16
- with the top important features.
17
-
18
- **Purpose**: The purpose of this function is to compare the feature importance scores for different models applied to various datasets.
19
-
20
- **Test Mechanism**: The function iterates through each dataset-model pair, calculates permutation feature importance (PFI) scores, and generates a summary table with the top `num_features` important features for each model.
21
-
22
- **Signs of High Risk**:
23
- - If key features expected to be important are ranked low, it could indicate potential issues with model training or data quality.
24
- - High variance in feature importance scores across different models may suggest instability in feature selection.
25
-
26
- **Strengths**:
27
- - Provides a clear comparison of the most important features for each model.
28
- - Uses permutation importance, which is a model-agnostic method and can be applied to any estimator.
29
-
30
- **Limitations**:
31
- - Assumes that the dataset is provided as a DataFrameDataset object with `x_df` and `y_df` methods to access feature and target data.
32
- - Requires that `model.model` is compatible with `sklearn.inspection.permutation_importance`.
33
- - The function's output is dependent on the number of features specified by `num_features`, which defaults to 3 but can be adjusted.
34
-
35
-
36
- """
37
- results_list = []
38
-
39
- for dataset, model in zip(datasets, models):
40
- x = dataset.x_df()
41
- y = dataset.y_df()
42
-
43
- pfi_values = permutation_importance(
44
- model.model,
45
- x,
46
- y,
47
- random_state=0,
48
- n_jobs=-2,
49
- )
50
-
51
- # Create a dictionary to store PFI scores
52
- pfi = {
53
- column: pfi_values["importances_mean"][i]
54
- for i, column in enumerate(x.columns)
55
- }
56
-
57
- # Sort features by their importance
58
- sorted_features = sorted(pfi.items(), key=lambda item: item[1], reverse=True)
59
-
60
- # Extract the top `num_features` features
61
- top_features = sorted_features[:num_features]
62
-
63
- # Prepare the result for the current model and dataset
64
- result = {
65
- "Model": model.input_id,
66
- "Dataset": dataset.input_id,
67
- }
68
-
69
- # Dynamically add feature columns to the result
70
- for i in range(num_features):
71
- if i < len(top_features):
72
- result[
73
- f"Feature {i + 1}"
74
- ] = f"[{top_features[i][0]}; {top_features[i][1]:.4f}]"
75
- else:
76
- result[f"Feature {i + 1}"] = None
77
-
78
- # Append the result to the list
79
- results_list.append(result)
80
-
81
- # Convert the results list to a DataFrame
82
- results_df = pd.DataFrame(results_list)
83
- return results_df
@@ -1,73 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from statsmodels.stats.stattools import jarque_bera
6
-
7
- from validmind.vm_models import Metric
8
-
9
-
10
- class JarqueBera(Metric):
11
- """
12
- Assesses normality of dataset features in an ML model using the Jarque-Bera test.
13
-
14
- **Purpose**: The purpose of the Jarque-Bera test as implemented in this metric is to determine if the features in
15
- the dataset of a given Machine Learning model follows a normal distribution. This is crucial for understanding the
16
- distribution and behavior of the model's features, as numerous statistical methods assume normal distribution of
17
- the data.
18
-
19
- **Test Mechanism**: The test mechanism involves computing the Jarque-Bera statistic, p-value, skew, and kurtosis
20
- for each feature in the dataset. It utilizes the 'jarque_bera' function from the 'statsmodels' library in Python,
21
- storing the results in a dictionary. The test evaluates the skewness and kurtosis to ascertain whether the dataset
22
- follows a normal distribution. A significant p-value (typically less than 0.05) implies that the data does not
23
- possess normal distribution.
24
-
25
- **Signs of High Risk**:
26
- - A high Jarque-Bera statistic and a low p-value (usually less than 0.05) indicates high-risk conditions.
27
- - Such results suggest the data significantly deviates from a normal distribution. If a machine learning model
28
- expects feature data to be normally distributed, these findings imply that it may not function as intended.
29
-
30
- **Strengths**:
31
- - This test provides insights into the shape of the data distribution, helping determine whether a given set of
32
- data follows a normal distribution.
33
- - This is particularly useful for risk assessment for models that assume a normal distribution of data.
34
- - By measuring skewness and kurtosis, it provides additional insights into the nature and magnitude of a
35
- distribution's deviation.
36
-
37
- **Limitations**:
38
- - The Jarque-Bera test only checks for normality in the data distribution. It cannot provide insights into other
39
- types of distributions.
40
- - Datasets that aren't normally distributed but follow some other distribution might lead to inaccurate risk
41
- assessments.
42
- - The test is highly sensitive to large sample sizes, often rejecting the null hypothesis (that data is normally
43
- distributed) even for minor deviations in larger datasets.
44
- """
45
-
46
- name = "jarque_bera"
47
- required_inputs = ["dataset"]
48
- tasks = ["classification", "regression"]
49
- tags = [
50
- "tabular_data",
51
- "data_distribution",
52
- "statistical_test",
53
- "statsmodels",
54
- ]
55
-
56
- def run(self):
57
- """
58
- Calculates JB for each of the dataset features
59
- """
60
- x_train = self.inputs.dataset.df[self.inputs.dataset.feature_columns_numeric]
61
-
62
- jb_values = {}
63
- for col in x_train.columns:
64
- jb_stat, jb_pvalue, jb_skew, jb_kurtosis = jarque_bera(x_train[col].values)
65
-
66
- jb_values[col] = {
67
- "stat": jb_stat,
68
- "pvalue": jb_pvalue,
69
- "skew": jb_skew,
70
- "kurtosis": jb_kurtosis,
71
- }
72
-
73
- return self.cache_results(jb_values)
@@ -1,66 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from statsmodels.stats.diagnostic import acorr_ljungbox
6
-
7
- from validmind.vm_models import Metric
8
-
9
-
10
- class LJungBox(Metric):
11
- """
12
- Assesses autocorrelations in dataset features by performing a Ljung-Box test on each feature.
13
-
14
- **Purpose**: The Ljung-Box test is a type of statistical test utilized to ascertain whether there are
15
- autocorrelations within a given dataset that differ significantly from zero. In the context of a machine learning
16
- model, this test is primarily used to evaluate data utilized in regression tasks, especially those involving time
17
- series and forecasting.
18
-
19
- **Test Mechanism**: The test operates by iterating over each feature within the training dataset and applying the
20
- `acorr_ljungbox` function from the `statsmodels.stats.diagnostic` library. This function calculates the Ljung-Box
21
- statistic and p-value for each feature. These results are then stored in a dictionary where the keys are the
22
- feature names and the values are dictionaries containing the statistic and p-value respectively. Generally, a lower
23
- p-value indicates a higher likelihood of significant autocorrelations within the feature.
24
-
25
- **Signs of High Risk**:
26
- - A high risk or failure in the model's performance relating to this test might be indicated by high Ljung-Box
27
- statistic values or low p-values.
28
- - These outcomes suggest the presence of significant autocorrelations in the respective features. If not properly
29
- consider or handle in the machine learning model, these can negatively affect model performance or bias.
30
-
31
- **Strengths**:
32
- - The Ljung-Box test is a powerful tool for detecting autocorrelations within datasets, especially in time series
33
- data.
34
- - It provides quantitative measures (statistic and p-value) that allow for precise evaluation of autocorrelation.
35
- - This test can be instrumental in avoiding issues related to autoregressive residuals and other challenges in
36
- regression models.
37
-
38
- **Limitations**:
39
- - The Ljung-Box test cannot detect all types of non-linearity or complex interrelationships among variables.
40
- - Testing individual features may not fully encapsulate the dynamics of the data if features interact with each
41
- other.
42
- - It is designed more for traditional statistical models and may not be fully compatible with certain types of
43
- complex machine learning models.
44
- """
45
-
46
- name = "ljung_box"
47
- required_inputs = ["dataset"]
48
- tasks = ["regression"]
49
- tags = ["time_series_data", "forecasting", "statistical_test", "statsmodels"]
50
-
51
- def run(self):
52
- """
53
- Calculates Ljung-Box test for each of the dataset features
54
- """
55
- x_train = self.inputs.dataset.df
56
-
57
- ljung_box_values = {}
58
- for col in x_train.columns:
59
- lb_results = acorr_ljungbox(x_train[col].values, return_df=True)
60
-
61
- ljung_box_values[col] = {
62
- "stat": lb_results["lb_stat"].values[0],
63
- "pvalue": lb_results["lb_pvalue"].values[0],
64
- }
65
-
66
- return self.cache_results(ljung_box_values)