validmind 2.5.8__py3-none-any.whl → 2.5.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +80 -119
- validmind/ai/test_result_description/config.yaml +29 -0
- validmind/ai/test_result_description/context.py +73 -0
- validmind/ai/test_result_description/image_processing.py +124 -0
- validmind/ai/test_result_description/system.jinja +39 -0
- validmind/ai/test_result_description/user.jinja +25 -0
- validmind/api_client.py +89 -43
- validmind/client.py +2 -2
- validmind/client_config.py +11 -14
- validmind/datasets/credit_risk/__init__.py +1 -0
- validmind/datasets/credit_risk/datasets/lending_club_biased.csv.gz +0 -0
- validmind/datasets/credit_risk/lending_club_bias.py +142 -0
- validmind/datasets/regression/fred_timeseries.py +67 -138
- validmind/template.py +1 -0
- validmind/test_suites/__init__.py +0 -2
- validmind/test_suites/statsmodels_timeseries.py +1 -1
- validmind/test_suites/summarization.py +0 -1
- validmind/test_suites/time_series.py +0 -43
- validmind/tests/__types__.py +14 -15
- validmind/tests/data_validation/ACFandPACFPlot.py +15 -13
- validmind/tests/data_validation/ADF.py +31 -24
- validmind/tests/data_validation/AutoAR.py +9 -9
- validmind/tests/data_validation/AutoMA.py +23 -16
- validmind/tests/data_validation/AutoSeasonality.py +18 -16
- validmind/tests/data_validation/AutoStationarity.py +21 -16
- validmind/tests/data_validation/BivariateScatterPlots.py +67 -96
- validmind/tests/{model_validation/statsmodels → data_validation}/BoxPierce.py +34 -34
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +85 -124
- validmind/tests/data_validation/ClassImbalance.py +15 -12
- validmind/tests/data_validation/DFGLSArch.py +19 -13
- validmind/tests/data_validation/DatasetDescription.py +17 -11
- validmind/tests/data_validation/DatasetSplit.py +7 -5
- validmind/tests/data_validation/DescriptiveStatistics.py +28 -21
- validmind/tests/data_validation/Duplicates.py +33 -25
- validmind/tests/data_validation/EngleGrangerCoint.py +35 -33
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +59 -71
- validmind/tests/data_validation/HighCardinality.py +19 -12
- validmind/tests/data_validation/HighPearsonCorrelation.py +27 -22
- validmind/tests/data_validation/IQROutliersBarPlot.py +13 -10
- validmind/tests/data_validation/IQROutliersTable.py +40 -36
- validmind/tests/data_validation/IsolationForestOutliers.py +21 -14
- validmind/tests/data_validation/JarqueBera.py +70 -0
- validmind/tests/data_validation/KPSS.py +34 -29
- validmind/tests/data_validation/LJungBox.py +66 -0
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +22 -15
- validmind/tests/data_validation/MissingValues.py +32 -27
- validmind/tests/data_validation/MissingValuesBarPlot.py +25 -21
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +71 -84
- validmind/tests/data_validation/PhillipsPerronArch.py +37 -30
- validmind/tests/data_validation/ProtectedClassesCombination.py +197 -0
- validmind/tests/data_validation/ProtectedClassesDescription.py +130 -0
- validmind/tests/data_validation/ProtectedClassesDisparity.py +133 -0
- validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +172 -0
- validmind/tests/data_validation/RollingStatsPlot.py +31 -23
- validmind/tests/data_validation/RunsTest.py +72 -0
- validmind/tests/data_validation/ScatterPlot.py +63 -78
- validmind/tests/data_validation/SeasonalDecompose.py +38 -34
- validmind/tests/{model_validation/statsmodels → data_validation}/ShapiroWilk.py +35 -30
- validmind/tests/data_validation/Skewness.py +35 -37
- validmind/tests/data_validation/SpreadPlot.py +35 -35
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +23 -17
- validmind/tests/data_validation/TabularDateTimeHistograms.py +21 -13
- validmind/tests/data_validation/TabularDescriptionTables.py +51 -16
- validmind/tests/data_validation/TabularNumericalHistograms.py +25 -22
- validmind/tests/data_validation/TargetRateBarPlots.py +21 -14
- validmind/tests/data_validation/TimeSeriesDescription.py +25 -18
- validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +23 -17
- validmind/tests/data_validation/TimeSeriesFrequency.py +24 -17
- validmind/tests/data_validation/TimeSeriesHistogram.py +33 -32
- validmind/tests/data_validation/TimeSeriesLinePlot.py +17 -10
- validmind/tests/data_validation/TimeSeriesMissingValues.py +15 -10
- validmind/tests/data_validation/TimeSeriesOutliers.py +37 -33
- validmind/tests/data_validation/TooManyZeroValues.py +16 -11
- validmind/tests/data_validation/UniqueRows.py +11 -6
- validmind/tests/data_validation/WOEBinPlots.py +23 -16
- validmind/tests/data_validation/WOEBinTable.py +35 -30
- validmind/tests/data_validation/ZivotAndrewsArch.py +34 -28
- validmind/tests/data_validation/nlp/CommonWords.py +21 -14
- validmind/tests/data_validation/nlp/Hashtags.py +42 -40
- validmind/tests/data_validation/nlp/LanguageDetection.py +33 -14
- validmind/tests/data_validation/nlp/Mentions.py +21 -15
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +32 -9
- validmind/tests/data_validation/nlp/Punctuations.py +24 -20
- validmind/tests/data_validation/nlp/Sentiment.py +27 -8
- validmind/tests/data_validation/nlp/StopWords.py +26 -19
- validmind/tests/data_validation/nlp/TextDescription.py +39 -36
- validmind/tests/data_validation/nlp/Toxicity.py +32 -9
- validmind/tests/decorator.py +81 -42
- validmind/tests/model_validation/BertScore.py +36 -27
- validmind/tests/model_validation/BleuScore.py +25 -19
- validmind/tests/model_validation/ClusterSizeDistribution.py +38 -34
- validmind/tests/model_validation/ContextualRecall.py +38 -13
- validmind/tests/model_validation/FeaturesAUC.py +32 -13
- validmind/tests/model_validation/MeteorScore.py +46 -33
- validmind/tests/model_validation/ModelMetadata.py +32 -64
- validmind/tests/model_validation/ModelPredictionResiduals.py +75 -73
- validmind/tests/model_validation/RegardScore.py +30 -14
- validmind/tests/model_validation/RegressionResidualsPlot.py +10 -5
- validmind/tests/model_validation/RougeScore.py +36 -30
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +30 -14
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +27 -30
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +68 -63
- validmind/tests/model_validation/TokenDisparity.py +31 -23
- validmind/tests/model_validation/ToxicityScore.py +26 -17
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +24 -20
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +30 -27
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +7 -5
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +32 -23
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +7 -5
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +15 -11
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +29 -29
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +34 -25
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +38 -26
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +40 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +18 -17
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +40 -45
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +17 -19
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +29 -25
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +38 -28
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -4
- validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
- validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
- validmind/tests/model_validation/ragas/AspectCritique.py +12 -6
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +9 -8
- validmind/tests/model_validation/ragas/ContextPrecision.py +5 -4
- validmind/tests/model_validation/ragas/ContextRecall.py +5 -4
- validmind/tests/model_validation/ragas/ContextUtilization.py +155 -0
- validmind/tests/model_validation/ragas/Faithfulness.py +5 -4
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +152 -0
- validmind/tests/model_validation/ragas/utils.py +6 -0
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +19 -12
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +22 -17
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +27 -25
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +7 -5
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +40 -78
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +15 -17
- validmind/tests/model_validation/sklearn/CompletenessScore.py +17 -11
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -15
- validmind/tests/model_validation/sklearn/FeatureImportance.py +95 -0
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +7 -7
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +19 -12
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +35 -30
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +10 -5
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +32 -32
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +23 -23
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +15 -10
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +26 -19
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +38 -18
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +32 -26
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +8 -6
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +24 -17
- validmind/tests/model_validation/sklearn/ROCCurve.py +12 -7
- validmind/tests/model_validation/sklearn/RegressionErrors.py +74 -130
- validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +27 -12
- validmind/tests/model_validation/sklearn/{RegressionModelsPerformanceComparison.py → RegressionPerformance.py} +18 -20
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +55 -94
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +32 -13
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +36 -32
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +66 -5
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +27 -19
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +25 -18
- validmind/tests/model_validation/sklearn/VMeasure.py +14 -13
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +7 -5
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +24 -18
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +73 -104
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +59 -32
- validmind/tests/model_validation/statsmodels/GINITable.py +44 -77
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +33 -34
- validmind/tests/model_validation/statsmodels/Lilliefors.py +27 -24
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +86 -119
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +100 -0
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +14 -9
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +17 -13
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +46 -43
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +38 -36
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +30 -28
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +18 -11
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +75 -107
- validmind/tests/ongoing_monitoring/FeatureDrift.py +10 -6
- validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +31 -25
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +29 -21
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +31 -23
- validmind/tests/prompt_validation/Bias.py +14 -11
- validmind/tests/prompt_validation/Clarity.py +16 -14
- validmind/tests/prompt_validation/Conciseness.py +7 -5
- validmind/tests/prompt_validation/Delimitation.py +23 -22
- validmind/tests/prompt_validation/NegativeInstruction.py +7 -5
- validmind/tests/prompt_validation/Robustness.py +12 -10
- validmind/tests/prompt_validation/Specificity.py +13 -11
- validmind/tests/prompt_validation/ai_powered_test.py +6 -0
- validmind/tests/run.py +68 -23
- validmind/unit_metrics/__init__.py +81 -144
- validmind/unit_metrics/classification/{sklearn/Accuracy.py → Accuracy.py} +1 -1
- validmind/unit_metrics/classification/{sklearn/F1.py → F1.py} +1 -1
- validmind/unit_metrics/classification/{sklearn/Precision.py → Precision.py} +1 -1
- validmind/unit_metrics/classification/{sklearn/ROC_AUC.py → ROC_AUC.py} +1 -2
- validmind/unit_metrics/classification/{sklearn/Recall.py → Recall.py} +1 -1
- validmind/unit_metrics/regression/{sklearn/AdjustedRSquaredScore.py → AdjustedRSquaredScore.py} +1 -1
- validmind/unit_metrics/regression/GiniCoefficient.py +1 -1
- validmind/unit_metrics/regression/HuberLoss.py +1 -1
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -1
- validmind/unit_metrics/regression/{sklearn/MeanAbsoluteError.py → MeanAbsoluteError.py} +1 -1
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -1
- validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -1
- validmind/unit_metrics/regression/{sklearn/MeanSquaredError.py → MeanSquaredError.py} +1 -1
- validmind/unit_metrics/regression/QuantileLoss.py +1 -1
- validmind/unit_metrics/regression/{sklearn/RSquaredScore.py → RSquaredScore.py} +1 -1
- validmind/unit_metrics/regression/{sklearn/RootMeanSquaredError.py → RootMeanSquaredError.py} +1 -1
- validmind/utils.py +4 -0
- validmind/vm_models/dataset/dataset.py +2 -0
- validmind/vm_models/figure.py +5 -0
- validmind/vm_models/test/metric.py +1 -0
- validmind/vm_models/test/result_wrapper.py +143 -158
- validmind/vm_models/test/threshold_test.py +1 -0
- {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/METADATA +4 -3
- validmind-2.5.18.dist-info/RECORD +324 -0
- validmind/tests/data_validation/ANOVAOneWayTable.py +0 -138
- validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -142
- validmind/tests/data_validation/BivariateHistograms.py +0 -117
- validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -124
- validmind/tests/data_validation/MissingValuesRisk.py +0 -88
- validmind/tests/model_validation/ModelMetadataComparison.py +0 -59
- validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -83
- validmind/tests/model_validation/statsmodels/JarqueBera.py +0 -73
- validmind/tests/model_validation/statsmodels/LJungBox.py +0 -66
- validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -135
- validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -103
- validmind/tests/model_validation/statsmodels/RunsTest.py +0 -71
- validmind-2.5.8.dist-info/RECORD +0 -318
- {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/LICENSE +0 -0
- {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/WHEEL +0 -0
- {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/entry_points.txt +0 -0
@@ -1,135 +0,0 @@
|
|
1
|
-
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
-
# See the LICENSE file in the root of this repository for details.
|
3
|
-
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
-
|
5
|
-
from dataclasses import dataclass
|
6
|
-
|
7
|
-
import pandas as pd
|
8
|
-
import plotly.graph_objects as go
|
9
|
-
from scipy import stats
|
10
|
-
|
11
|
-
from validmind.errors import SkipTestError
|
12
|
-
from validmind.vm_models import Figure, Metric
|
13
|
-
|
14
|
-
|
15
|
-
@dataclass
|
16
|
-
class RegressionCoeffsPlot(Metric):
|
17
|
-
"""
|
18
|
-
Visualizes regression coefficients with 95% confidence intervals to assess predictor variables' impact on response
|
19
|
-
variable.
|
20
|
-
|
21
|
-
**Purpose**: The Regression Coefficients with Confidence Intervals plot and metric aims to understand the impact of
|
22
|
-
predictor variables on the response variable in question. This understanding is achieved via the visualization and
|
23
|
-
analysis of the regression model by presenting the coefficients derived from the model along with their associated
|
24
|
-
95% confidence intervals. By doing so, it offers insights into the variability and uncertainty associated with the
|
25
|
-
model's estimates.
|
26
|
-
|
27
|
-
**Test Mechanism**: The test begins by extracting the estimated coefficients and their related standard errors from
|
28
|
-
the regression model under test. It then calculates and draws confidence intervals based on a 95% confidence level
|
29
|
-
(a standard convention in statistics). These intervals provide a range wherein the true value can be expected to
|
30
|
-
fall 95% of the time if the same regression were re-run multiple times with samples drawn from the same population.
|
31
|
-
This information is then visualized as a bar plot, with the predictor variables and their coefficients on the
|
32
|
-
x-axis and y-axis respectively and the confidence intervals represented as error bars.
|
33
|
-
|
34
|
-
**Signs of High Risk**:
|
35
|
-
* If the calculated confidence interval contains the zero value, it could mean the feature/coefficient in question
|
36
|
-
doesn't significantly contribute to prediction in the model.
|
37
|
-
* If there are multiple coefficients exhibiting this behavior, it might raise concerns about overall model
|
38
|
-
reliability.
|
39
|
-
* Very wide confidence intervals might indicate high uncertainty in the associated coefficient estimates.
|
40
|
-
|
41
|
-
**Strengths**:
|
42
|
-
* This metric offers a simple and easily comprehendible visualization of the significance and impact of individual
|
43
|
-
predictor variables in a regression model.
|
44
|
-
* By including confidence intervals, it enables an observer to evaluate the uncertainty around each coefficient
|
45
|
-
estimate.
|
46
|
-
|
47
|
-
**Limitations**:
|
48
|
-
* The test is dependent on a few assumptions about the data, namely normality of residuals and independence of
|
49
|
-
observations, which may not always be true for all types of datasets.
|
50
|
-
* The test does not consider multi-collinearity (correlation among predictor variables), which can potentially
|
51
|
-
distort the model and make interpretation of coefficients challenging.
|
52
|
-
* The test's application is limited to regression tasks and tabular datasets and is not suitable for other types of
|
53
|
-
machine learning assignments or data structures.
|
54
|
-
"""
|
55
|
-
|
56
|
-
name = "regression_coeffs_plot"
|
57
|
-
required_inputs = ["models"]
|
58
|
-
tasks = ["regression"]
|
59
|
-
tags = ["tabular_data", "visualization", "model_interpretation"]
|
60
|
-
|
61
|
-
@staticmethod
|
62
|
-
def plot_coefficients_with_ci(model, model_name):
|
63
|
-
# Extract estimated coefficients and standard errors
|
64
|
-
coefficients = model.regression_coefficients()
|
65
|
-
coef = pd.to_numeric(coefficients["coef"])
|
66
|
-
std_err = pd.to_numeric(coefficients["std err"])
|
67
|
-
|
68
|
-
# Calculate confidence intervals
|
69
|
-
confidence_level = 0.95 # 95% confidence interval
|
70
|
-
z_value = stats.norm.ppf((1 + confidence_level) / 2) # Calculate Z-value
|
71
|
-
lower_ci = coef - z_value * std_err
|
72
|
-
upper_ci = coef + z_value * std_err
|
73
|
-
|
74
|
-
# Create a bar plot with confidence intervals
|
75
|
-
fig = go.Figure()
|
76
|
-
|
77
|
-
fig.add_trace(
|
78
|
-
go.Bar(
|
79
|
-
x=list(coefficients["Feature"].values),
|
80
|
-
y=coef,
|
81
|
-
name="Estimated Coefficients",
|
82
|
-
error_y=dict(
|
83
|
-
type="data",
|
84
|
-
symmetric=False,
|
85
|
-
arrayminus=lower_ci,
|
86
|
-
array=upper_ci,
|
87
|
-
visible=True,
|
88
|
-
),
|
89
|
-
)
|
90
|
-
)
|
91
|
-
|
92
|
-
fig.update_layout(
|
93
|
-
title=f"{model_name} Coefficients with Confidence Intervals",
|
94
|
-
xaxis_title="Predictor Variables",
|
95
|
-
yaxis_title="Coefficients",
|
96
|
-
)
|
97
|
-
|
98
|
-
return fig, {
|
99
|
-
"values": list(coef),
|
100
|
-
"lower_ci": list(lower_ci),
|
101
|
-
"upper_ci": list(upper_ci),
|
102
|
-
}
|
103
|
-
|
104
|
-
def run(self):
|
105
|
-
# Check models list is not empty
|
106
|
-
if not self.inputs.models:
|
107
|
-
raise ValueError("List of models must be provided in the models parameter")
|
108
|
-
|
109
|
-
all_models = []
|
110
|
-
all_figures = []
|
111
|
-
all_metric_values = []
|
112
|
-
|
113
|
-
if self.inputs.models is not None:
|
114
|
-
all_models.extend(self.inputs.models)
|
115
|
-
|
116
|
-
for i, model in enumerate(all_models):
|
117
|
-
if model.library != "statsmodels":
|
118
|
-
raise SkipTestError("Only statsmodels are supported for this metric")
|
119
|
-
|
120
|
-
model_name = f"Model {i+1}"
|
121
|
-
|
122
|
-
fig, metric_values = self.plot_coefficients_with_ci(model, model_name)
|
123
|
-
all_figures.append(
|
124
|
-
Figure(
|
125
|
-
for_object=self,
|
126
|
-
key=f"{model_name}_coefficients_ci_plot",
|
127
|
-
figure=fig,
|
128
|
-
)
|
129
|
-
)
|
130
|
-
all_metric_values.append({"name": model_name, "metrics": metric_values})
|
131
|
-
|
132
|
-
return self.cache_results(
|
133
|
-
metric_value=all_metric_values,
|
134
|
-
figures=all_figures,
|
135
|
-
)
|
@@ -1,103 +0,0 @@
|
|
1
|
-
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
-
# See the LICENSE file in the root of this repository for details.
|
3
|
-
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
-
|
5
|
-
from dataclasses import dataclass
|
6
|
-
|
7
|
-
import pandas as pd
|
8
|
-
|
9
|
-
from validmind.errors import SkipTestError
|
10
|
-
from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
|
11
|
-
|
12
|
-
|
13
|
-
@dataclass
|
14
|
-
class RegressionModelsCoeffs(Metric):
|
15
|
-
"""
|
16
|
-
Compares feature importance by evaluating and contrasting coefficients of different regression models.
|
17
|
-
|
18
|
-
**Purpose**: The 'RegressionModelsCoeffs' metric is utilized to evaluate and compare coefficients of different
|
19
|
-
regression models trained on the same dataset. By examining how each model weighted the importance of different
|
20
|
-
features during training, this metric provides key insights into which factors have the most impact on the model's
|
21
|
-
predictions and how these patterns differ across models.
|
22
|
-
|
23
|
-
**Test Mechanism**: The test operates by extracting the coefficients of each regression model using the
|
24
|
-
'regression_coefficients()' method. These coefficients are then consolidated into a dataframe, with each row
|
25
|
-
representing a model and columns corresponding to each feature's coefficient. It must be noted that this test is
|
26
|
-
exclusive to 'statsmodels' and 'R' models, other models will result in a 'SkipTestError'.
|
27
|
-
|
28
|
-
**Signs of High Risk**:
|
29
|
-
- Discrepancies in how different models weight the same features
|
30
|
-
- Unexpectedly high or low coefficients
|
31
|
-
- The test is inapplicable to certain models because they are not from 'statsmodels' or 'R' libraries
|
32
|
-
|
33
|
-
**Strengths**:
|
34
|
-
- Enables insight into the training process of different models
|
35
|
-
- Allows comparison of feature importance across models
|
36
|
-
- Through the review of feature coefficients, the test provides a more transparent evaluation of the model and
|
37
|
-
highlights significant weights and biases in the training procedure
|
38
|
-
|
39
|
-
**Limitations**:
|
40
|
-
- The test is only compatible with 'statsmodels' and 'R' regression models
|
41
|
-
- While the test provides contrast in feature weightings among models, it does not establish the most appropriate
|
42
|
-
or accurate weighting, thus remaining subject to interpretation
|
43
|
-
- It does not account for potential overfitting or underfitting of models
|
44
|
-
- The computed coefficients might not lead to effective performance on unseen data
|
45
|
-
"""
|
46
|
-
|
47
|
-
name = "regression_models_coefficients"
|
48
|
-
required_inputs = ["models"]
|
49
|
-
tasks = ["regression"]
|
50
|
-
tags = ["model_comparison"]
|
51
|
-
|
52
|
-
def _build_model_summaries(self, all_coefficients):
|
53
|
-
all_models_df = pd.DataFrame()
|
54
|
-
|
55
|
-
for i, coefficients in enumerate(all_coefficients):
|
56
|
-
model_name = f"Model {i+1}"
|
57
|
-
# The coefficients summary object needs an additional "Model" column at the beginning
|
58
|
-
coefficients["Model"] = model_name
|
59
|
-
all_models_df = pd.concat([all_models_df, coefficients])
|
60
|
-
|
61
|
-
# Reorder columns to have 'Model' as the first column and reset the index
|
62
|
-
all_models_df = all_models_df.reset_index(drop=True)[
|
63
|
-
["Model"] + [col for col in all_models_df.columns if col != "Model"]
|
64
|
-
]
|
65
|
-
|
66
|
-
return all_models_df
|
67
|
-
|
68
|
-
def run(self):
|
69
|
-
# Check models list is not empty
|
70
|
-
if not self.inputs.models or len(self.inputs.models) == 0:
|
71
|
-
raise ValueError("List of models must be provided in the models parameter")
|
72
|
-
|
73
|
-
for model in self.inputs.models:
|
74
|
-
if model.library != "statsmodels":
|
75
|
-
raise SkipTestError(
|
76
|
-
"Only statsmodels models are supported for this metric"
|
77
|
-
)
|
78
|
-
|
79
|
-
coefficients = [m.regression_coefficients() for m in self.inputs.models]
|
80
|
-
all_models_summary = self._build_model_summaries(coefficients)
|
81
|
-
|
82
|
-
return self.cache_results(
|
83
|
-
{
|
84
|
-
"coefficients_summary": all_models_summary.to_dict(orient="records"),
|
85
|
-
}
|
86
|
-
)
|
87
|
-
|
88
|
-
def summary(self, metric_value):
|
89
|
-
"""
|
90
|
-
Build one table for summarizing the regression models' coefficients
|
91
|
-
"""
|
92
|
-
coefficients_summary = metric_value["coefficients_summary"]
|
93
|
-
|
94
|
-
return ResultSummary(
|
95
|
-
results=[
|
96
|
-
ResultTable(
|
97
|
-
data=coefficients_summary,
|
98
|
-
metadata=ResultTableMetadata(
|
99
|
-
title="Regression Models' Coefficients"
|
100
|
-
),
|
101
|
-
),
|
102
|
-
]
|
103
|
-
)
|
@@ -1,71 +0,0 @@
|
|
1
|
-
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
-
# See the LICENSE file in the root of this repository for details.
|
3
|
-
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
-
|
5
|
-
from statsmodels.sandbox.stats.runs import runstest_1samp
|
6
|
-
|
7
|
-
from validmind.vm_models import Metric
|
8
|
-
|
9
|
-
|
10
|
-
class RunsTest(Metric):
|
11
|
-
"""
|
12
|
-
Executes Runs Test on ML model to detect non-random patterns in output data sequence.
|
13
|
-
|
14
|
-
**Purpose**: The Runs Test is a statistical procedure used to determine whether the sequence of data extracted from
|
15
|
-
the ML model behaves randomly or not. Specifically, it analyzes runs, sequences of consecutive positives or
|
16
|
-
negatives, in the data to check if there are more or fewer runs than expected under the assumption of randomness.
|
17
|
-
This can be an indication of some pattern, trend, or cycle in the model's output which may need attention.
|
18
|
-
|
19
|
-
**Test Mechanism**: The testing mechanism applies the Runs Test from the statsmodels module on each column of the
|
20
|
-
training dataset. For every feature in the dataset, a Runs Test is executed, whose output includes a Runs Statistic
|
21
|
-
and P-value. A low P-value suggests that data arrangement in the feature is not likely to be random. The results
|
22
|
-
are stored in a dictionary where the keys are the feature names, and the values are another dictionary storing the
|
23
|
-
test statistic and the P-value for each feature.
|
24
|
-
|
25
|
-
**Signs of High Risk**:
|
26
|
-
- High risk is indicated when the P-value is close to zero.
|
27
|
-
- If the p-value is less than a predefined significance level (like 0.05), it suggests that the runs (series of
|
28
|
-
positive or negative values) in the model's output are not random and are longer or shorter than what is expected
|
29
|
-
under a random scenario.
|
30
|
-
- This would mean there's a high risk of non-random distribution of errors or model outcomes, suggesting potential
|
31
|
-
issues with the model.
|
32
|
-
|
33
|
-
**Strengths**:
|
34
|
-
- The strength of the Runs Test is that it's straightforward and fast for detecting non-random patterns in data
|
35
|
-
sequence.
|
36
|
-
- It can validate assumptions of randomness, which is particularly valuable for checking error distributions in
|
37
|
-
regression models, trendless time series data, and making sure a classifier doesn't favour one class over another.
|
38
|
-
- Moreover, it can be applied to both classification and regression tasks, making it versatile.
|
39
|
-
|
40
|
-
**Limitations**:
|
41
|
-
- The test assumes that the data is independently and identically distributed (i.i.d.), which might not be the case
|
42
|
-
for many real-world datasets.
|
43
|
-
- The conclusion drawn from the low p-value indicating non-randomness does not provide information about the type
|
44
|
-
or the source of the detected pattern.
|
45
|
-
- Also, it is sensitive to extreme values (outliers), and overly large or small run sequences can influence the
|
46
|
-
results.
|
47
|
-
- Furthermore, this test does not provide model performance evaluation; it is used to detect patterns in the
|
48
|
-
sequence of outputs only.
|
49
|
-
"""
|
50
|
-
|
51
|
-
name = "runs_test"
|
52
|
-
required_inputs = ["dataset"]
|
53
|
-
tasks = ["classification", "regression"]
|
54
|
-
tags = ["tabular_data", "statistical_test", "statsmodels"]
|
55
|
-
|
56
|
-
def run(self):
|
57
|
-
"""
|
58
|
-
Calculates the run test for each of the dataset features
|
59
|
-
"""
|
60
|
-
x_train = self.inputs.dataset.df[self.inputs.dataset.feature_columns_numeric]
|
61
|
-
|
62
|
-
runs_test_values = {}
|
63
|
-
for col in x_train.columns:
|
64
|
-
runs_stat, runs_p_value = runstest_1samp(x_train[col].values)
|
65
|
-
|
66
|
-
runs_test_values[col] = {
|
67
|
-
"stat": runs_stat,
|
68
|
-
"pvalue": runs_p_value,
|
69
|
-
}
|
70
|
-
|
71
|
-
return self.cache_results(runs_test_values)
|