validmind 2.5.8__py3-none-any.whl → 2.5.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/test_descriptions.py +80 -119
  3. validmind/ai/test_result_description/config.yaml +29 -0
  4. validmind/ai/test_result_description/context.py +73 -0
  5. validmind/ai/test_result_description/image_processing.py +124 -0
  6. validmind/ai/test_result_description/system.jinja +39 -0
  7. validmind/ai/test_result_description/user.jinja +25 -0
  8. validmind/api_client.py +89 -43
  9. validmind/client.py +2 -2
  10. validmind/client_config.py +11 -14
  11. validmind/datasets/credit_risk/__init__.py +1 -0
  12. validmind/datasets/credit_risk/datasets/lending_club_biased.csv.gz +0 -0
  13. validmind/datasets/credit_risk/lending_club_bias.py +142 -0
  14. validmind/datasets/regression/fred_timeseries.py +67 -138
  15. validmind/template.py +1 -0
  16. validmind/test_suites/__init__.py +0 -2
  17. validmind/test_suites/statsmodels_timeseries.py +1 -1
  18. validmind/test_suites/summarization.py +0 -1
  19. validmind/test_suites/time_series.py +0 -43
  20. validmind/tests/__types__.py +14 -15
  21. validmind/tests/data_validation/ACFandPACFPlot.py +15 -13
  22. validmind/tests/data_validation/ADF.py +31 -24
  23. validmind/tests/data_validation/AutoAR.py +9 -9
  24. validmind/tests/data_validation/AutoMA.py +23 -16
  25. validmind/tests/data_validation/AutoSeasonality.py +18 -16
  26. validmind/tests/data_validation/AutoStationarity.py +21 -16
  27. validmind/tests/data_validation/BivariateScatterPlots.py +67 -96
  28. validmind/tests/{model_validation/statsmodels → data_validation}/BoxPierce.py +34 -34
  29. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +85 -124
  30. validmind/tests/data_validation/ClassImbalance.py +15 -12
  31. validmind/tests/data_validation/DFGLSArch.py +19 -13
  32. validmind/tests/data_validation/DatasetDescription.py +17 -11
  33. validmind/tests/data_validation/DatasetSplit.py +7 -5
  34. validmind/tests/data_validation/DescriptiveStatistics.py +28 -21
  35. validmind/tests/data_validation/Duplicates.py +33 -25
  36. validmind/tests/data_validation/EngleGrangerCoint.py +35 -33
  37. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +59 -71
  38. validmind/tests/data_validation/HighCardinality.py +19 -12
  39. validmind/tests/data_validation/HighPearsonCorrelation.py +27 -22
  40. validmind/tests/data_validation/IQROutliersBarPlot.py +13 -10
  41. validmind/tests/data_validation/IQROutliersTable.py +40 -36
  42. validmind/tests/data_validation/IsolationForestOutliers.py +21 -14
  43. validmind/tests/data_validation/JarqueBera.py +70 -0
  44. validmind/tests/data_validation/KPSS.py +34 -29
  45. validmind/tests/data_validation/LJungBox.py +66 -0
  46. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +22 -15
  47. validmind/tests/data_validation/MissingValues.py +32 -27
  48. validmind/tests/data_validation/MissingValuesBarPlot.py +25 -21
  49. validmind/tests/data_validation/PearsonCorrelationMatrix.py +71 -84
  50. validmind/tests/data_validation/PhillipsPerronArch.py +37 -30
  51. validmind/tests/data_validation/ProtectedClassesCombination.py +197 -0
  52. validmind/tests/data_validation/ProtectedClassesDescription.py +130 -0
  53. validmind/tests/data_validation/ProtectedClassesDisparity.py +133 -0
  54. validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +172 -0
  55. validmind/tests/data_validation/RollingStatsPlot.py +31 -23
  56. validmind/tests/data_validation/RunsTest.py +72 -0
  57. validmind/tests/data_validation/ScatterPlot.py +63 -78
  58. validmind/tests/data_validation/SeasonalDecompose.py +38 -34
  59. validmind/tests/{model_validation/statsmodels → data_validation}/ShapiroWilk.py +35 -30
  60. validmind/tests/data_validation/Skewness.py +35 -37
  61. validmind/tests/data_validation/SpreadPlot.py +35 -35
  62. validmind/tests/data_validation/TabularCategoricalBarPlots.py +23 -17
  63. validmind/tests/data_validation/TabularDateTimeHistograms.py +21 -13
  64. validmind/tests/data_validation/TabularDescriptionTables.py +51 -16
  65. validmind/tests/data_validation/TabularNumericalHistograms.py +25 -22
  66. validmind/tests/data_validation/TargetRateBarPlots.py +21 -14
  67. validmind/tests/data_validation/TimeSeriesDescription.py +25 -18
  68. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +23 -17
  69. validmind/tests/data_validation/TimeSeriesFrequency.py +24 -17
  70. validmind/tests/data_validation/TimeSeriesHistogram.py +33 -32
  71. validmind/tests/data_validation/TimeSeriesLinePlot.py +17 -10
  72. validmind/tests/data_validation/TimeSeriesMissingValues.py +15 -10
  73. validmind/tests/data_validation/TimeSeriesOutliers.py +37 -33
  74. validmind/tests/data_validation/TooManyZeroValues.py +16 -11
  75. validmind/tests/data_validation/UniqueRows.py +11 -6
  76. validmind/tests/data_validation/WOEBinPlots.py +23 -16
  77. validmind/tests/data_validation/WOEBinTable.py +35 -30
  78. validmind/tests/data_validation/ZivotAndrewsArch.py +34 -28
  79. validmind/tests/data_validation/nlp/CommonWords.py +21 -14
  80. validmind/tests/data_validation/nlp/Hashtags.py +42 -40
  81. validmind/tests/data_validation/nlp/LanguageDetection.py +33 -14
  82. validmind/tests/data_validation/nlp/Mentions.py +21 -15
  83. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +32 -9
  84. validmind/tests/data_validation/nlp/Punctuations.py +24 -20
  85. validmind/tests/data_validation/nlp/Sentiment.py +27 -8
  86. validmind/tests/data_validation/nlp/StopWords.py +26 -19
  87. validmind/tests/data_validation/nlp/TextDescription.py +39 -36
  88. validmind/tests/data_validation/nlp/Toxicity.py +32 -9
  89. validmind/tests/decorator.py +81 -42
  90. validmind/tests/model_validation/BertScore.py +36 -27
  91. validmind/tests/model_validation/BleuScore.py +25 -19
  92. validmind/tests/model_validation/ClusterSizeDistribution.py +38 -34
  93. validmind/tests/model_validation/ContextualRecall.py +38 -13
  94. validmind/tests/model_validation/FeaturesAUC.py +32 -13
  95. validmind/tests/model_validation/MeteorScore.py +46 -33
  96. validmind/tests/model_validation/ModelMetadata.py +32 -64
  97. validmind/tests/model_validation/ModelPredictionResiduals.py +75 -73
  98. validmind/tests/model_validation/RegardScore.py +30 -14
  99. validmind/tests/model_validation/RegressionResidualsPlot.py +10 -5
  100. validmind/tests/model_validation/RougeScore.py +36 -30
  101. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +30 -14
  102. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +27 -30
  103. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +68 -63
  104. validmind/tests/model_validation/TokenDisparity.py +31 -23
  105. validmind/tests/model_validation/ToxicityScore.py +26 -17
  106. validmind/tests/model_validation/embeddings/ClusterDistribution.py +24 -20
  107. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +30 -27
  108. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +7 -5
  109. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +32 -23
  110. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +7 -5
  111. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +15 -11
  112. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +29 -29
  113. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +34 -25
  114. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +38 -26
  115. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +40 -1
  116. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +18 -17
  117. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +40 -45
  118. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +17 -19
  119. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +29 -25
  120. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +38 -28
  121. validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -4
  122. validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
  123. validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
  124. validmind/tests/model_validation/ragas/AspectCritique.py +12 -6
  125. validmind/tests/model_validation/ragas/ContextEntityRecall.py +9 -8
  126. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -4
  127. validmind/tests/model_validation/ragas/ContextRecall.py +5 -4
  128. validmind/tests/model_validation/ragas/ContextUtilization.py +155 -0
  129. validmind/tests/model_validation/ragas/Faithfulness.py +5 -4
  130. validmind/tests/model_validation/ragas/NoiseSensitivity.py +152 -0
  131. validmind/tests/model_validation/ragas/utils.py +6 -0
  132. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +19 -12
  133. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +22 -17
  134. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +27 -25
  135. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +7 -5
  136. validmind/tests/model_validation/sklearn/ClusterPerformance.py +40 -78
  137. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +15 -17
  138. validmind/tests/model_validation/sklearn/CompletenessScore.py +17 -11
  139. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -15
  140. validmind/tests/model_validation/sklearn/FeatureImportance.py +95 -0
  141. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +7 -7
  142. validmind/tests/model_validation/sklearn/HomogeneityScore.py +19 -12
  143. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +35 -30
  144. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +10 -5
  145. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +32 -32
  146. validmind/tests/model_validation/sklearn/MinimumF1Score.py +23 -23
  147. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +15 -10
  148. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +26 -19
  149. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +38 -18
  150. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +32 -26
  151. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +8 -6
  152. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +24 -17
  153. validmind/tests/model_validation/sklearn/ROCCurve.py +12 -7
  154. validmind/tests/model_validation/sklearn/RegressionErrors.py +74 -130
  155. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +27 -12
  156. validmind/tests/model_validation/sklearn/{RegressionModelsPerformanceComparison.py → RegressionPerformance.py} +18 -20
  157. validmind/tests/model_validation/sklearn/RegressionR2Square.py +55 -94
  158. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +32 -13
  159. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +36 -32
  160. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +66 -5
  161. validmind/tests/model_validation/sklearn/SilhouettePlot.py +27 -19
  162. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +25 -18
  163. validmind/tests/model_validation/sklearn/VMeasure.py +14 -13
  164. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +7 -5
  165. validmind/tests/model_validation/statsmodels/AutoARIMA.py +24 -18
  166. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +73 -104
  167. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +59 -32
  168. validmind/tests/model_validation/statsmodels/GINITable.py +44 -77
  169. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +33 -34
  170. validmind/tests/model_validation/statsmodels/Lilliefors.py +27 -24
  171. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +86 -119
  172. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +100 -0
  173. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +14 -9
  174. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +17 -13
  175. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +46 -43
  176. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +38 -36
  177. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +30 -28
  178. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +18 -11
  179. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +75 -107
  180. validmind/tests/ongoing_monitoring/FeatureDrift.py +10 -6
  181. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +31 -25
  182. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +29 -21
  183. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +31 -23
  184. validmind/tests/prompt_validation/Bias.py +14 -11
  185. validmind/tests/prompt_validation/Clarity.py +16 -14
  186. validmind/tests/prompt_validation/Conciseness.py +7 -5
  187. validmind/tests/prompt_validation/Delimitation.py +23 -22
  188. validmind/tests/prompt_validation/NegativeInstruction.py +7 -5
  189. validmind/tests/prompt_validation/Robustness.py +12 -10
  190. validmind/tests/prompt_validation/Specificity.py +13 -11
  191. validmind/tests/prompt_validation/ai_powered_test.py +6 -0
  192. validmind/tests/run.py +68 -23
  193. validmind/unit_metrics/__init__.py +81 -144
  194. validmind/unit_metrics/classification/{sklearn/Accuracy.py → Accuracy.py} +1 -1
  195. validmind/unit_metrics/classification/{sklearn/F1.py → F1.py} +1 -1
  196. validmind/unit_metrics/classification/{sklearn/Precision.py → Precision.py} +1 -1
  197. validmind/unit_metrics/classification/{sklearn/ROC_AUC.py → ROC_AUC.py} +1 -2
  198. validmind/unit_metrics/classification/{sklearn/Recall.py → Recall.py} +1 -1
  199. validmind/unit_metrics/regression/{sklearn/AdjustedRSquaredScore.py → AdjustedRSquaredScore.py} +1 -1
  200. validmind/unit_metrics/regression/GiniCoefficient.py +1 -1
  201. validmind/unit_metrics/regression/HuberLoss.py +1 -1
  202. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -1
  203. validmind/unit_metrics/regression/{sklearn/MeanAbsoluteError.py → MeanAbsoluteError.py} +1 -1
  204. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -1
  205. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -1
  206. validmind/unit_metrics/regression/{sklearn/MeanSquaredError.py → MeanSquaredError.py} +1 -1
  207. validmind/unit_metrics/regression/QuantileLoss.py +1 -1
  208. validmind/unit_metrics/regression/{sklearn/RSquaredScore.py → RSquaredScore.py} +1 -1
  209. validmind/unit_metrics/regression/{sklearn/RootMeanSquaredError.py → RootMeanSquaredError.py} +1 -1
  210. validmind/utils.py +4 -0
  211. validmind/vm_models/dataset/dataset.py +2 -0
  212. validmind/vm_models/figure.py +5 -0
  213. validmind/vm_models/test/metric.py +1 -0
  214. validmind/vm_models/test/result_wrapper.py +143 -158
  215. validmind/vm_models/test/threshold_test.py +1 -0
  216. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/METADATA +4 -3
  217. validmind-2.5.18.dist-info/RECORD +324 -0
  218. validmind/tests/data_validation/ANOVAOneWayTable.py +0 -138
  219. validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -142
  220. validmind/tests/data_validation/BivariateHistograms.py +0 -117
  221. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -124
  222. validmind/tests/data_validation/MissingValuesRisk.py +0 -88
  223. validmind/tests/model_validation/ModelMetadataComparison.py +0 -59
  224. validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -83
  225. validmind/tests/model_validation/statsmodels/JarqueBera.py +0 -73
  226. validmind/tests/model_validation/statsmodels/LJungBox.py +0 -66
  227. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -135
  228. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -103
  229. validmind/tests/model_validation/statsmodels/RunsTest.py +0 -71
  230. validmind-2.5.8.dist-info/RECORD +0 -318
  231. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/LICENSE +0 -0
  232. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/WHEEL +0 -0
  233. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/entry_points.txt +0 -0
@@ -1,135 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- import pandas as pd
8
- import plotly.graph_objects as go
9
- from scipy import stats
10
-
11
- from validmind.errors import SkipTestError
12
- from validmind.vm_models import Figure, Metric
13
-
14
-
15
- @dataclass
16
- class RegressionCoeffsPlot(Metric):
17
- """
18
- Visualizes regression coefficients with 95% confidence intervals to assess predictor variables' impact on response
19
- variable.
20
-
21
- **Purpose**: The Regression Coefficients with Confidence Intervals plot and metric aims to understand the impact of
22
- predictor variables on the response variable in question. This understanding is achieved via the visualization and
23
- analysis of the regression model by presenting the coefficients derived from the model along with their associated
24
- 95% confidence intervals. By doing so, it offers insights into the variability and uncertainty associated with the
25
- model's estimates.
26
-
27
- **Test Mechanism**: The test begins by extracting the estimated coefficients and their related standard errors from
28
- the regression model under test. It then calculates and draws confidence intervals based on a 95% confidence level
29
- (a standard convention in statistics). These intervals provide a range wherein the true value can be expected to
30
- fall 95% of the time if the same regression were re-run multiple times with samples drawn from the same population.
31
- This information is then visualized as a bar plot, with the predictor variables and their coefficients on the
32
- x-axis and y-axis respectively and the confidence intervals represented as error bars.
33
-
34
- **Signs of High Risk**:
35
- * If the calculated confidence interval contains the zero value, it could mean the feature/coefficient in question
36
- doesn't significantly contribute to prediction in the model.
37
- * If there are multiple coefficients exhibiting this behavior, it might raise concerns about overall model
38
- reliability.
39
- * Very wide confidence intervals might indicate high uncertainty in the associated coefficient estimates.
40
-
41
- **Strengths**:
42
- * This metric offers a simple and easily comprehendible visualization of the significance and impact of individual
43
- predictor variables in a regression model.
44
- * By including confidence intervals, it enables an observer to evaluate the uncertainty around each coefficient
45
- estimate.
46
-
47
- **Limitations**:
48
- * The test is dependent on a few assumptions about the data, namely normality of residuals and independence of
49
- observations, which may not always be true for all types of datasets.
50
- * The test does not consider multi-collinearity (correlation among predictor variables), which can potentially
51
- distort the model and make interpretation of coefficients challenging.
52
- * The test's application is limited to regression tasks and tabular datasets and is not suitable for other types of
53
- machine learning assignments or data structures.
54
- """
55
-
56
- name = "regression_coeffs_plot"
57
- required_inputs = ["models"]
58
- tasks = ["regression"]
59
- tags = ["tabular_data", "visualization", "model_interpretation"]
60
-
61
- @staticmethod
62
- def plot_coefficients_with_ci(model, model_name):
63
- # Extract estimated coefficients and standard errors
64
- coefficients = model.regression_coefficients()
65
- coef = pd.to_numeric(coefficients["coef"])
66
- std_err = pd.to_numeric(coefficients["std err"])
67
-
68
- # Calculate confidence intervals
69
- confidence_level = 0.95 # 95% confidence interval
70
- z_value = stats.norm.ppf((1 + confidence_level) / 2) # Calculate Z-value
71
- lower_ci = coef - z_value * std_err
72
- upper_ci = coef + z_value * std_err
73
-
74
- # Create a bar plot with confidence intervals
75
- fig = go.Figure()
76
-
77
- fig.add_trace(
78
- go.Bar(
79
- x=list(coefficients["Feature"].values),
80
- y=coef,
81
- name="Estimated Coefficients",
82
- error_y=dict(
83
- type="data",
84
- symmetric=False,
85
- arrayminus=lower_ci,
86
- array=upper_ci,
87
- visible=True,
88
- ),
89
- )
90
- )
91
-
92
- fig.update_layout(
93
- title=f"{model_name} Coefficients with Confidence Intervals",
94
- xaxis_title="Predictor Variables",
95
- yaxis_title="Coefficients",
96
- )
97
-
98
- return fig, {
99
- "values": list(coef),
100
- "lower_ci": list(lower_ci),
101
- "upper_ci": list(upper_ci),
102
- }
103
-
104
- def run(self):
105
- # Check models list is not empty
106
- if not self.inputs.models:
107
- raise ValueError("List of models must be provided in the models parameter")
108
-
109
- all_models = []
110
- all_figures = []
111
- all_metric_values = []
112
-
113
- if self.inputs.models is not None:
114
- all_models.extend(self.inputs.models)
115
-
116
- for i, model in enumerate(all_models):
117
- if model.library != "statsmodels":
118
- raise SkipTestError("Only statsmodels are supported for this metric")
119
-
120
- model_name = f"Model {i+1}"
121
-
122
- fig, metric_values = self.plot_coefficients_with_ci(model, model_name)
123
- all_figures.append(
124
- Figure(
125
- for_object=self,
126
- key=f"{model_name}_coefficients_ci_plot",
127
- figure=fig,
128
- )
129
- )
130
- all_metric_values.append({"name": model_name, "metrics": metric_values})
131
-
132
- return self.cache_results(
133
- metric_value=all_metric_values,
134
- figures=all_figures,
135
- )
@@ -1,103 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- import pandas as pd
8
-
9
- from validmind.errors import SkipTestError
10
- from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
11
-
12
-
13
- @dataclass
14
- class RegressionModelsCoeffs(Metric):
15
- """
16
- Compares feature importance by evaluating and contrasting coefficients of different regression models.
17
-
18
- **Purpose**: The 'RegressionModelsCoeffs' metric is utilized to evaluate and compare coefficients of different
19
- regression models trained on the same dataset. By examining how each model weighted the importance of different
20
- features during training, this metric provides key insights into which factors have the most impact on the model's
21
- predictions and how these patterns differ across models.
22
-
23
- **Test Mechanism**: The test operates by extracting the coefficients of each regression model using the
24
- 'regression_coefficients()' method. These coefficients are then consolidated into a dataframe, with each row
25
- representing a model and columns corresponding to each feature's coefficient. It must be noted that this test is
26
- exclusive to 'statsmodels' and 'R' models, other models will result in a 'SkipTestError'.
27
-
28
- **Signs of High Risk**:
29
- - Discrepancies in how different models weight the same features
30
- - Unexpectedly high or low coefficients
31
- - The test is inapplicable to certain models because they are not from 'statsmodels' or 'R' libraries
32
-
33
- **Strengths**:
34
- - Enables insight into the training process of different models
35
- - Allows comparison of feature importance across models
36
- - Through the review of feature coefficients, the test provides a more transparent evaluation of the model and
37
- highlights significant weights and biases in the training procedure
38
-
39
- **Limitations**:
40
- - The test is only compatible with 'statsmodels' and 'R' regression models
41
- - While the test provides contrast in feature weightings among models, it does not establish the most appropriate
42
- or accurate weighting, thus remaining subject to interpretation
43
- - It does not account for potential overfitting or underfitting of models
44
- - The computed coefficients might not lead to effective performance on unseen data
45
- """
46
-
47
- name = "regression_models_coefficients"
48
- required_inputs = ["models"]
49
- tasks = ["regression"]
50
- tags = ["model_comparison"]
51
-
52
- def _build_model_summaries(self, all_coefficients):
53
- all_models_df = pd.DataFrame()
54
-
55
- for i, coefficients in enumerate(all_coefficients):
56
- model_name = f"Model {i+1}"
57
- # The coefficients summary object needs an additional "Model" column at the beginning
58
- coefficients["Model"] = model_name
59
- all_models_df = pd.concat([all_models_df, coefficients])
60
-
61
- # Reorder columns to have 'Model' as the first column and reset the index
62
- all_models_df = all_models_df.reset_index(drop=True)[
63
- ["Model"] + [col for col in all_models_df.columns if col != "Model"]
64
- ]
65
-
66
- return all_models_df
67
-
68
- def run(self):
69
- # Check models list is not empty
70
- if not self.inputs.models or len(self.inputs.models) == 0:
71
- raise ValueError("List of models must be provided in the models parameter")
72
-
73
- for model in self.inputs.models:
74
- if model.library != "statsmodels":
75
- raise SkipTestError(
76
- "Only statsmodels models are supported for this metric"
77
- )
78
-
79
- coefficients = [m.regression_coefficients() for m in self.inputs.models]
80
- all_models_summary = self._build_model_summaries(coefficients)
81
-
82
- return self.cache_results(
83
- {
84
- "coefficients_summary": all_models_summary.to_dict(orient="records"),
85
- }
86
- )
87
-
88
- def summary(self, metric_value):
89
- """
90
- Build one table for summarizing the regression models' coefficients
91
- """
92
- coefficients_summary = metric_value["coefficients_summary"]
93
-
94
- return ResultSummary(
95
- results=[
96
- ResultTable(
97
- data=coefficients_summary,
98
- metadata=ResultTableMetadata(
99
- title="Regression Models' Coefficients"
100
- ),
101
- ),
102
- ]
103
- )
@@ -1,71 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from statsmodels.sandbox.stats.runs import runstest_1samp
6
-
7
- from validmind.vm_models import Metric
8
-
9
-
10
- class RunsTest(Metric):
11
- """
12
- Executes Runs Test on ML model to detect non-random patterns in output data sequence.
13
-
14
- **Purpose**: The Runs Test is a statistical procedure used to determine whether the sequence of data extracted from
15
- the ML model behaves randomly or not. Specifically, it analyzes runs, sequences of consecutive positives or
16
- negatives, in the data to check if there are more or fewer runs than expected under the assumption of randomness.
17
- This can be an indication of some pattern, trend, or cycle in the model's output which may need attention.
18
-
19
- **Test Mechanism**: The testing mechanism applies the Runs Test from the statsmodels module on each column of the
20
- training dataset. For every feature in the dataset, a Runs Test is executed, whose output includes a Runs Statistic
21
- and P-value. A low P-value suggests that data arrangement in the feature is not likely to be random. The results
22
- are stored in a dictionary where the keys are the feature names, and the values are another dictionary storing the
23
- test statistic and the P-value for each feature.
24
-
25
- **Signs of High Risk**:
26
- - High risk is indicated when the P-value is close to zero.
27
- - If the p-value is less than a predefined significance level (like 0.05), it suggests that the runs (series of
28
- positive or negative values) in the model's output are not random and are longer or shorter than what is expected
29
- under a random scenario.
30
- - This would mean there's a high risk of non-random distribution of errors or model outcomes, suggesting potential
31
- issues with the model.
32
-
33
- **Strengths**:
34
- - The strength of the Runs Test is that it's straightforward and fast for detecting non-random patterns in data
35
- sequence.
36
- - It can validate assumptions of randomness, which is particularly valuable for checking error distributions in
37
- regression models, trendless time series data, and making sure a classifier doesn't favour one class over another.
38
- - Moreover, it can be applied to both classification and regression tasks, making it versatile.
39
-
40
- **Limitations**:
41
- - The test assumes that the data is independently and identically distributed (i.i.d.), which might not be the case
42
- for many real-world datasets.
43
- - The conclusion drawn from the low p-value indicating non-randomness does not provide information about the type
44
- or the source of the detected pattern.
45
- - Also, it is sensitive to extreme values (outliers), and overly large or small run sequences can influence the
46
- results.
47
- - Furthermore, this test does not provide model performance evaluation; it is used to detect patterns in the
48
- sequence of outputs only.
49
- """
50
-
51
- name = "runs_test"
52
- required_inputs = ["dataset"]
53
- tasks = ["classification", "regression"]
54
- tags = ["tabular_data", "statistical_test", "statsmodels"]
55
-
56
- def run(self):
57
- """
58
- Calculates the run test for each of the dataset features
59
- """
60
- x_train = self.inputs.dataset.df[self.inputs.dataset.feature_columns_numeric]
61
-
62
- runs_test_values = {}
63
- for col in x_train.columns:
64
- runs_stat, runs_p_value = runstest_1samp(x_train[col].values)
65
-
66
- runs_test_values[col] = {
67
- "stat": runs_stat,
68
- "pvalue": runs_p_value,
69
- }
70
-
71
- return self.cache_results(runs_test_values)