validmind 2.5.8__py3-none-any.whl → 2.5.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +26 -7
- validmind/api_client.py +89 -43
- validmind/client.py +2 -2
- validmind/client_config.py +11 -14
- validmind/datasets/regression/fred_timeseries.py +67 -138
- validmind/template.py +1 -0
- validmind/test_suites/__init__.py +0 -2
- validmind/test_suites/statsmodels_timeseries.py +1 -1
- validmind/test_suites/summarization.py +0 -1
- validmind/test_suites/time_series.py +0 -43
- validmind/tests/__types__.py +3 -13
- validmind/tests/data_validation/ACFandPACFPlot.py +15 -13
- validmind/tests/data_validation/ADF.py +31 -24
- validmind/tests/data_validation/AutoAR.py +9 -9
- validmind/tests/data_validation/AutoMA.py +23 -16
- validmind/tests/data_validation/AutoSeasonality.py +18 -16
- validmind/tests/data_validation/AutoStationarity.py +21 -16
- validmind/tests/data_validation/BivariateScatterPlots.py +67 -96
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +82 -124
- validmind/tests/data_validation/ClassImbalance.py +15 -12
- validmind/tests/data_validation/DFGLSArch.py +19 -13
- validmind/tests/data_validation/DatasetDescription.py +17 -11
- validmind/tests/data_validation/DatasetSplit.py +7 -5
- validmind/tests/data_validation/DescriptiveStatistics.py +28 -21
- validmind/tests/data_validation/Duplicates.py +33 -25
- validmind/tests/data_validation/EngleGrangerCoint.py +35 -33
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +59 -71
- validmind/tests/data_validation/HighCardinality.py +19 -12
- validmind/tests/data_validation/HighPearsonCorrelation.py +27 -22
- validmind/tests/data_validation/IQROutliersBarPlot.py +13 -10
- validmind/tests/data_validation/IQROutliersTable.py +40 -36
- validmind/tests/data_validation/IsolationForestOutliers.py +21 -14
- validmind/tests/data_validation/KPSS.py +34 -29
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +22 -15
- validmind/tests/data_validation/MissingValues.py +32 -27
- validmind/tests/data_validation/MissingValuesBarPlot.py +25 -21
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +71 -84
- validmind/tests/data_validation/PhillipsPerronArch.py +37 -30
- validmind/tests/data_validation/RollingStatsPlot.py +31 -23
- validmind/tests/data_validation/ScatterPlot.py +63 -78
- validmind/tests/data_validation/SeasonalDecompose.py +38 -34
- validmind/tests/data_validation/Skewness.py +35 -37
- validmind/tests/data_validation/SpreadPlot.py +35 -35
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +23 -17
- validmind/tests/data_validation/TabularDateTimeHistograms.py +21 -13
- validmind/tests/data_validation/TabularDescriptionTables.py +51 -16
- validmind/tests/data_validation/TabularNumericalHistograms.py +25 -22
- validmind/tests/data_validation/TargetRateBarPlots.py +21 -14
- validmind/tests/data_validation/TimeSeriesDescription.py +25 -18
- validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +23 -17
- validmind/tests/data_validation/TimeSeriesFrequency.py +24 -17
- validmind/tests/data_validation/TimeSeriesHistogram.py +33 -32
- validmind/tests/data_validation/TimeSeriesLinePlot.py +17 -10
- validmind/tests/data_validation/TimeSeriesMissingValues.py +15 -10
- validmind/tests/data_validation/TimeSeriesOutliers.py +37 -33
- validmind/tests/data_validation/TooManyZeroValues.py +16 -11
- validmind/tests/data_validation/UniqueRows.py +11 -6
- validmind/tests/data_validation/WOEBinPlots.py +23 -16
- validmind/tests/data_validation/WOEBinTable.py +35 -30
- validmind/tests/data_validation/ZivotAndrewsArch.py +34 -28
- validmind/tests/data_validation/nlp/CommonWords.py +21 -14
- validmind/tests/data_validation/nlp/Hashtags.py +27 -20
- validmind/tests/data_validation/nlp/LanguageDetection.py +33 -14
- validmind/tests/data_validation/nlp/Mentions.py +21 -15
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +32 -9
- validmind/tests/data_validation/nlp/Punctuations.py +24 -20
- validmind/tests/data_validation/nlp/Sentiment.py +27 -8
- validmind/tests/data_validation/nlp/StopWords.py +26 -19
- validmind/tests/data_validation/nlp/TextDescription.py +36 -35
- validmind/tests/data_validation/nlp/Toxicity.py +32 -9
- validmind/tests/decorator.py +81 -42
- validmind/tests/model_validation/BertScore.py +36 -27
- validmind/tests/model_validation/BleuScore.py +25 -19
- validmind/tests/model_validation/ClusterSizeDistribution.py +38 -34
- validmind/tests/model_validation/ContextualRecall.py +35 -13
- validmind/tests/model_validation/FeaturesAUC.py +32 -13
- validmind/tests/model_validation/MeteorScore.py +46 -33
- validmind/tests/model_validation/ModelMetadata.py +32 -64
- validmind/tests/model_validation/ModelPredictionResiduals.py +75 -73
- validmind/tests/model_validation/RegardScore.py +30 -14
- validmind/tests/model_validation/RegressionResidualsPlot.py +10 -5
- validmind/tests/model_validation/RougeScore.py +36 -30
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +30 -14
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +27 -30
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +68 -63
- validmind/tests/model_validation/TokenDisparity.py +31 -23
- validmind/tests/model_validation/ToxicityScore.py +26 -17
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +24 -20
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +30 -27
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +7 -5
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +32 -23
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +7 -5
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +15 -11
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +29 -29
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +34 -25
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +38 -26
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +40 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +18 -17
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +40 -45
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +17 -19
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +29 -25
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +38 -28
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -4
- validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
- validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
- validmind/tests/model_validation/ragas/AspectCritique.py +7 -0
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +9 -8
- validmind/tests/model_validation/ragas/ContextPrecision.py +5 -4
- validmind/tests/model_validation/ragas/ContextRecall.py +5 -4
- validmind/tests/model_validation/ragas/Faithfulness.py +5 -4
- validmind/tests/model_validation/ragas/utils.py +6 -0
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +19 -12
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +22 -17
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +27 -25
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +7 -5
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +40 -78
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +15 -17
- validmind/tests/model_validation/sklearn/CompletenessScore.py +17 -11
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -15
- validmind/tests/model_validation/sklearn/FeatureImportance.py +95 -0
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +7 -7
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +19 -12
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +35 -30
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +10 -5
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +32 -32
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +23 -23
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +15 -10
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +26 -19
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +38 -18
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +31 -25
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +8 -6
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +24 -17
- validmind/tests/model_validation/sklearn/ROCCurve.py +12 -7
- validmind/tests/model_validation/sklearn/RegressionErrors.py +74 -130
- validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +27 -12
- validmind/tests/model_validation/sklearn/{RegressionModelsPerformanceComparison.py → RegressionPerformance.py} +18 -20
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +55 -93
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +32 -13
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +36 -32
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +7 -5
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +27 -19
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +25 -18
- validmind/tests/model_validation/sklearn/VMeasure.py +14 -13
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +7 -5
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +24 -18
- validmind/tests/model_validation/statsmodels/BoxPierce.py +14 -10
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +73 -104
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +19 -12
- validmind/tests/model_validation/statsmodels/GINITable.py +44 -77
- validmind/tests/model_validation/statsmodels/JarqueBera.py +27 -22
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +33 -34
- validmind/tests/model_validation/statsmodels/LJungBox.py +32 -28
- validmind/tests/model_validation/statsmodels/Lilliefors.py +27 -24
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +87 -119
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +100 -0
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +14 -9
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +17 -13
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +46 -43
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +38 -36
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +30 -28
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +18 -11
- validmind/tests/model_validation/statsmodels/RunsTest.py +32 -28
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +75 -107
- validmind/tests/model_validation/statsmodels/ShapiroWilk.py +15 -8
- validmind/tests/ongoing_monitoring/FeatureDrift.py +10 -6
- validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +31 -25
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +29 -21
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +31 -23
- validmind/tests/prompt_validation/Bias.py +14 -11
- validmind/tests/prompt_validation/Clarity.py +16 -14
- validmind/tests/prompt_validation/Conciseness.py +7 -5
- validmind/tests/prompt_validation/Delimitation.py +23 -22
- validmind/tests/prompt_validation/NegativeInstruction.py +7 -5
- validmind/tests/prompt_validation/Robustness.py +12 -10
- validmind/tests/prompt_validation/Specificity.py +13 -11
- validmind/tests/prompt_validation/ai_powered_test.py +6 -0
- validmind/tests/run.py +68 -23
- validmind/unit_metrics/__init__.py +81 -144
- validmind/unit_metrics/classification/{sklearn/Accuracy.py → Accuracy.py} +1 -1
- validmind/unit_metrics/classification/{sklearn/F1.py → F1.py} +1 -1
- validmind/unit_metrics/classification/{sklearn/Precision.py → Precision.py} +1 -1
- validmind/unit_metrics/classification/{sklearn/ROC_AUC.py → ROC_AUC.py} +1 -2
- validmind/unit_metrics/classification/{sklearn/Recall.py → Recall.py} +1 -1
- validmind/unit_metrics/regression/{sklearn/AdjustedRSquaredScore.py → AdjustedRSquaredScore.py} +1 -1
- validmind/unit_metrics/regression/GiniCoefficient.py +1 -1
- validmind/unit_metrics/regression/HuberLoss.py +1 -1
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -1
- validmind/unit_metrics/regression/{sklearn/MeanAbsoluteError.py → MeanAbsoluteError.py} +1 -1
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -1
- validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -1
- validmind/unit_metrics/regression/{sklearn/MeanSquaredError.py → MeanSquaredError.py} +1 -1
- validmind/unit_metrics/regression/QuantileLoss.py +1 -1
- validmind/unit_metrics/regression/{sklearn/RSquaredScore.py → RSquaredScore.py} +1 -1
- validmind/unit_metrics/regression/{sklearn/RootMeanSquaredError.py → RootMeanSquaredError.py} +1 -1
- validmind/vm_models/dataset/dataset.py +2 -0
- validmind/vm_models/figure.py +5 -0
- validmind/vm_models/test/result_wrapper.py +93 -132
- {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/METADATA +1 -1
- {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/RECORD +203 -210
- validmind/tests/data_validation/ANOVAOneWayTable.py +0 -138
- validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -142
- validmind/tests/data_validation/BivariateHistograms.py +0 -117
- validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -124
- validmind/tests/data_validation/MissingValuesRisk.py +0 -88
- validmind/tests/model_validation/ModelMetadataComparison.py +0 -59
- validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -83
- validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -135
- validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -103
- {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/LICENSE +0 -0
- {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/WHEEL +0 -0
- {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/entry_points.txt +0 -0
@@ -19,31 +19,36 @@ class RegressionFeatureSignificance(Metric):
|
|
19
19
|
"""
|
20
20
|
Assesses and visualizes the statistical significance of features in a set of regression models.
|
21
21
|
|
22
|
-
|
22
|
+
### Purpose
|
23
|
+
|
23
24
|
The Regression Feature Significance metric assesses the significance of each feature in a given set of regression
|
24
25
|
models. It creates a visualization displaying p-values for every feature of each model, assisting model developers
|
25
26
|
in understanding which features are most influential in their models.
|
26
27
|
|
27
|
-
|
28
|
+
### Test Mechanism
|
29
|
+
|
28
30
|
The test mechanism involves going through each fitted regression model in a given list, extracting the model
|
29
31
|
coefficients and p-values for each feature, and then plotting these values. The x-axis on the plot contains the
|
30
32
|
p-values while the y-axis denotes the coefficients of each feature. A vertical red line is drawn at the threshold
|
31
33
|
for p-value significance, which is 0.05 by default. Any features with p-values to the left of this line are
|
32
34
|
considered statistically significant at the chosen level.
|
33
35
|
|
34
|
-
|
36
|
+
### Signs of High Risk
|
37
|
+
|
35
38
|
- Any feature with a high p-value (greater than the threshold) is considered a potential high risk, as it suggests
|
36
39
|
the feature is not statistically significant and may not be reliably contributing to the model's predictions.
|
37
40
|
- A high number of such features may indicate problems with the model validation, variable selection, and overall
|
38
41
|
reliability of the model predictions.
|
39
42
|
|
40
|
-
|
43
|
+
### Strengths
|
44
|
+
|
41
45
|
- Helps identify the features that significantly contribute to a model's prediction, providing insights into the
|
42
46
|
feature importance.
|
43
47
|
- Provides tangible, easy-to-understand visualizations to interpret the feature significance.
|
44
48
|
- Facilitates comparison of feature importance across multiple models.
|
45
49
|
|
46
|
-
|
50
|
+
### Limitations
|
51
|
+
|
47
52
|
- This metric assumes model features are independent, which may not always be the case. Multicollinearity (high
|
48
53
|
correlation amongst predictors) can cause high variance and unreliable statistical tests of significance.
|
49
54
|
- The p-value strategy for feature selection doesn't take into account the magnitude of the effect, focusing solely
|
@@ -54,7 +59,7 @@ class RegressionFeatureSignificance(Metric):
|
|
54
59
|
"""
|
55
60
|
|
56
61
|
name = "regression_feature_significance"
|
57
|
-
required_inputs = ["
|
62
|
+
required_inputs = ["model"]
|
58
63
|
|
59
64
|
default_params = {"fontsize": 10, "p_threshold": 0.05}
|
60
65
|
tasks = ["regression"]
|
@@ -70,10 +75,10 @@ class RegressionFeatureSignificance(Metric):
|
|
70
75
|
p_threshold = self.params["p_threshold"]
|
71
76
|
|
72
77
|
# Check models list is not empty
|
73
|
-
if not self.inputs.
|
74
|
-
raise ValueError("
|
78
|
+
if not self.inputs.model:
|
79
|
+
raise ValueError("Model must be provided in the models parameter")
|
75
80
|
|
76
|
-
figures = self._plot_pvalues(self.inputs.
|
81
|
+
figures = self._plot_pvalues(self.inputs.model, fontsize, p_threshold)
|
77
82
|
|
78
83
|
return self.cache_results(figures=figures)
|
79
84
|
|
@@ -19,26 +19,30 @@ class RegressionModelForecastPlot(Metric):
|
|
19
19
|
Generates plots to visually compare the forecasted outcomes of one or more regression models against actual
|
20
20
|
observed values over a specified date range.
|
21
21
|
|
22
|
-
|
23
|
-
regression models by comparing the model's forecasted outcomes against actual observed values within a specified
|
24
|
-
date range. This metric is especially useful in time-series models or any model where the outcome changes over
|
25
|
-
time, allowing direct comparison of predicted vs actual values.
|
22
|
+
### Purpose
|
26
23
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
are set to the minimum and maximum date available in the dataset. The test verifies that the provided date range is
|
32
|
-
within the limits of the available data.
|
24
|
+
The "regression_forecast_plot" is intended to visually depict the performance of one or more regression models by
|
25
|
+
comparing the model's forecasted outcomes against actual observed values within a specified date range. This metric
|
26
|
+
is especially useful in time-series models or any model where the outcome changes over time, allowing direct
|
27
|
+
comparison of predicted vs actual values.
|
33
28
|
|
34
|
-
|
29
|
+
### Test Mechanism
|
30
|
+
|
31
|
+
This test generates a plot for each fitted model in the list. The x-axis represents the date ranging from the
|
32
|
+
specified "start_date" to the "end_date", while the y-axis shows the value of the outcome variable. Two lines are
|
33
|
+
plotted: one representing the forecasted values and the other representing the observed values. The "start_date"
|
34
|
+
and "end_date" can be parameters of this test; if these parameters are not provided, they are set to the minimum
|
35
|
+
and maximum date available in the dataset. The test verifies that the provided date range is within the limits of
|
36
|
+
the available data.
|
37
|
+
|
38
|
+
### Signs of High Risk
|
35
39
|
|
36
40
|
- High risk or failure signs could be deduced visually from the plots if the forecasted line significantly deviates
|
37
41
|
from the observed line, indicating the model's predicted values are not matching actual outcomes.
|
38
42
|
- A model that struggles to handle the edge conditions like maximum and minimum data points could also be
|
39
43
|
considered a sign of risk.
|
40
44
|
|
41
|
-
|
45
|
+
### Strengths
|
42
46
|
|
43
47
|
- Visualization: The plot provides an intuitive and clear illustration of how well the forecast matches the actual
|
44
48
|
values, making it straightforward even for non-technical stakeholders to interpret.
|
@@ -46,7 +50,7 @@ class RegressionModelForecastPlot(Metric):
|
|
46
50
|
- Model Evaluation: It can be useful in identifying overfitting or underfitting situations, as these will manifest
|
47
51
|
as discrepancies between the forecasted and observed values.
|
48
52
|
|
49
|
-
|
53
|
+
### Limitations
|
50
54
|
|
51
55
|
- Interpretation Bias: Interpretation of the plot is subjective and can lead to different conclusions by different
|
52
56
|
evaluators.
|
@@ -14,49 +14,52 @@ from validmind.vm_models import Figure, Metric
|
|
14
14
|
@dataclass
|
15
15
|
class RegressionModelForecastPlotLevels(Metric):
|
16
16
|
"""
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
-
|
59
|
-
|
17
|
+
Assesses the alignment between forecasted and observed values in regression models through visual plots, including
|
18
|
+
handling data transformations.
|
19
|
+
|
20
|
+
### Purpose
|
21
|
+
|
22
|
+
The `RegressionModelForecastPlotLevels` test aims to visually assess the performance of a series of regression
|
23
|
+
models by comparing their forecasted values against the actual observed values in both training and test datasets.
|
24
|
+
This test helps determine the accuracy of the models and can handle specific data transformations before making the
|
25
|
+
comparison, providing a comprehensive evaluation of model performance.
|
26
|
+
|
27
|
+
### Test Mechanism
|
28
|
+
|
29
|
+
The test mechanism involves initializing the `RegressionModelForecastPlotLevels` class with an optional
|
30
|
+
`transformation` parameter. The class then:
|
31
|
+
|
32
|
+
- Checks for the presence of model objects and raises a `ValueError` if none are found.
|
33
|
+
- Processes each model to generate predictive forecasts for both training and testing datasets.
|
34
|
+
- Contrasts these forecasts with the actual observed values.
|
35
|
+
- Produces plots to visually compare forecasted and observed values for both raw and transformed datasets.
|
36
|
+
- Handles specified transformations (e.g., "integrate") by performing cumulative sums to create a new series before
|
37
|
+
plotting.
|
38
|
+
|
39
|
+
### Signs of High Risk
|
40
|
+
|
41
|
+
- Significant deviation between forecasted and observed values in training or testing datasets.
|
42
|
+
- Patterns suggesting overfitting or underfitting.
|
43
|
+
- Large discrepancies in the plotted forecasts, indicating potential issues with model generalizability and
|
44
|
+
precision.
|
45
|
+
|
46
|
+
### Strengths
|
47
|
+
|
48
|
+
- **Visual Evaluations**: Provides an intuitive, visual way to assess multiple regression models, aiding in easier
|
49
|
+
interpretation and evaluation of forecast accuracy.
|
50
|
+
- **Transformation Handling**: Can process specified data transformations such as "integrate," enhancing
|
51
|
+
flexibility.
|
52
|
+
- **Detailed Perspective**: Assesses performance on both training and testing datasets, offering a comprehensive
|
53
|
+
view of model behavior.
|
54
|
+
|
55
|
+
### Limitations
|
56
|
+
|
57
|
+
- **Subjectivity**: Relies heavily on visual interpretation, which may vary between individuals.
|
58
|
+
- **Limited Transformation Capability**: Supports only the "integrate" transformation; other complex
|
59
|
+
transformations might not be handled.
|
60
|
+
- **Overhead**: Plotting can be computationally intensive for large datasets, increasing runtime.
|
61
|
+
- **Numerical Measurement**: Does not provide a numerical metric to quantify forecast accuracy, relying solely on
|
62
|
+
visual assessment.
|
60
63
|
"""
|
61
64
|
|
62
65
|
name = "regression_forecast_plot_levels"
|
@@ -16,44 +16,46 @@ logger = get_logger(__name__)
|
|
16
16
|
@dataclass
|
17
17
|
class RegressionModelSensitivityPlot(Metric):
|
18
18
|
"""
|
19
|
-
|
20
|
-
visualizing the
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
19
|
+
Assesses the sensitivity of a regression model to changes in independent variables by applying shocks and
|
20
|
+
visualizing the impact.
|
21
|
+
|
22
|
+
### Purpose
|
23
|
+
|
24
|
+
The Regression Sensitivity Plot test is designed to perform sensitivity analysis on regression models. This test
|
25
|
+
aims to measure the impact of slight changes (shocks) applied to individual variables on the system's outcome while
|
26
|
+
keeping all other variables constant. By doing so, it analyzes the effects of each independent variable on the
|
27
|
+
dependent variable within the regression model, helping identify significant risk factors that could substantially
|
28
|
+
influence the model's output.
|
29
|
+
|
30
|
+
### Test Mechanism
|
31
|
+
|
32
|
+
This test operates by initially applying shocks of varying magnitudes, defined by specific parameters, to each of
|
33
|
+
the model's features, one at a time. With all other variables held constant, a new prediction is made for each
|
34
|
+
dataset subjected to shocks. Any changes in the model's predictions are directly attributed to the shocks applied.
|
35
|
+
If the transformation parameter is set to "integrate," initial predictions and target values undergo transformation
|
36
|
+
via an integration function before being plotted. Finally, a plot demonstrating observed values against predicted
|
37
|
+
values for each model is generated, showcasing a distinct line graph illustrating predictions for each shock.
|
38
|
+
|
39
|
+
### Signs of High Risk
|
40
|
+
|
41
|
+
- Drastic alterations in model predictions due to minor shocks to an individual variable, indicating high
|
42
|
+
sensitivity and potential over-dependence on that variable.
|
43
|
+
- Unusually high or unpredictable shifts in response to shocks, suggesting potential model instability or
|
41
44
|
overfitting.
|
42
45
|
|
43
|
-
|
44
|
-
|
45
|
-
understanding feature importance.
|
46
|
-
-
|
47
|
-
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
reflect
|
53
|
-
-
|
54
|
-
|
55
|
-
|
56
|
-
subjectivity in interpretation.
|
46
|
+
### Strengths
|
47
|
+
|
48
|
+
- Helps identify variables that strongly influence model outcomes, aiding in understanding feature importance.
|
49
|
+
- Generates visual plots, making results easily interpretable even to non-technical stakeholders.
|
50
|
+
- Useful in identifying overfitting and detecting unstable models that react excessively to minor variable changes.
|
51
|
+
|
52
|
+
### Limitations
|
53
|
+
|
54
|
+
- Operates on the assumption that all other variables remain unchanged during the application of a shock, which may
|
55
|
+
not reflect real-world interdependencies.
|
56
|
+
- Best compatible with linear models and may not effectively evaluate the sensitivity of non-linear models.
|
57
|
+
- Provides a visual representation without a numerical risk measure, potentially introducing subjectivity in
|
58
|
+
interpretation.
|
57
59
|
"""
|
58
60
|
|
59
61
|
name = "regression_sensitivity_plot"
|
@@ -17,36 +17,38 @@ class RegressionModelSummary(Metric):
|
|
17
17
|
"""
|
18
18
|
Evaluates regression model performance using metrics including R-Squared, Adjusted R-Squared, MSE, and RMSE.
|
19
19
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
20
|
+
### Purpose
|
21
|
+
|
22
|
+
The Regression Model Summary test evaluates the performance of regression models by measuring their predictive
|
23
|
+
ability regarding dependent variables given changes in the independent variables. It uses conventional regression
|
24
|
+
metrics such as R-Squared, Adjusted R-Squared, Mean Squared Error (MSE), and Root Mean Squared Error (RMSE) to
|
25
|
+
assess the model's accuracy and fit.
|
26
|
+
|
27
|
+
### Test Mechanism
|
28
|
+
|
29
|
+
This test employs the 'train_ds' attribute of the model to gather and analyze the training data. Initially, it
|
30
|
+
fetches the independent variables and uses the model to make predictions on these given features. Subsequently, it
|
31
|
+
calculates several standard regression performance metrics including R-Squared, Adjusted R-Squared, Mean Squared
|
32
|
+
Error (MSE), and Root Mean Squared Error (RMSE), which quantify the approximation of the predicted responses to the
|
33
|
+
actual responses.
|
34
|
+
|
35
|
+
### Signs of High Risk
|
36
|
+
|
37
|
+
- Low R-Squared and Adjusted R-Squared values.
|
38
|
+
- High MSE and RMSE values.
|
39
|
+
|
40
|
+
### Strengths
|
41
|
+
|
38
42
|
- Offers an extensive evaluation of regression models by combining four key measures of model accuracy and fit.
|
39
43
|
- Provides a comprehensive view of the model's performance.
|
40
|
-
- Both the R-Squared and Adjusted R-Squared measures are readily interpretable.
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
- Applicable exclusively to regression models.
|
45
|
-
|
46
|
-
-
|
47
|
-
|
48
|
-
- A high R-squared or adjusted R-squared may not necessarily indicate a good model, especially in cases where the
|
49
|
-
model is possibly overfitting the data.
|
44
|
+
- Both the R-Squared and Adjusted R-Squared measures are readily interpretable.
|
45
|
+
|
46
|
+
### Limitations
|
47
|
+
|
48
|
+
- Applicable exclusively to regression models.
|
49
|
+
- RMSE and MSE might be sensitive to outliers.
|
50
|
+
- A high R-Squared or Adjusted R-Squared may not necessarily indicate a good model, especially in cases of
|
51
|
+
overfitting.
|
50
52
|
"""
|
51
53
|
|
52
54
|
name = "regression_model_summary"
|
@@ -21,28 +21,35 @@ logger = get_logger(__name__)
|
|
21
21
|
class RegressionPermutationFeatureImportance(Metric):
|
22
22
|
"""
|
23
23
|
Assesses the significance of each feature in a model by evaluating the impact on model performance when feature
|
24
|
-
values are randomly rearranged.
|
25
|
-
importance of features based on the decrease in model's predictive accuracy, typically R².
|
24
|
+
values are randomly rearranged.
|
26
25
|
|
27
|
-
|
28
|
-
|
26
|
+
### Purpose
|
27
|
+
|
28
|
+
The primary purpose of this metric is to determine which features significantly impact the performance of a
|
29
|
+
regression model developed using statsmodels. The metric measures how much the prediction accuracy deteriorates
|
29
30
|
when each feature's values are permuted.
|
30
31
|
|
31
|
-
|
32
|
-
|
32
|
+
### Test Mechanism
|
33
|
+
|
34
|
+
This metric shuffles the values of each feature one at a time in the dataset, computes the model's performance
|
35
|
+
after each permutation, and compares it to the baseline performance. A significant decrease in performance
|
33
36
|
indicates the importance of the feature.
|
34
37
|
|
35
|
-
|
36
|
-
|
38
|
+
### Signs of High Risk
|
39
|
+
|
40
|
+
- Significant reliance on a feature that, when permuted, leads to a substantial decrease in performance, suggesting
|
37
41
|
overfitting or high model dependency on that feature.
|
38
42
|
- Features identified as unimportant despite known impacts from domain knowledge, suggesting potential issues in
|
39
43
|
model training or data preprocessing.
|
40
44
|
|
41
|
-
|
42
|
-
|
45
|
+
### Strengths
|
46
|
+
|
47
|
+
- Directly assesses the impact of each feature on model performance, providing clear insights into model
|
48
|
+
dependencies.
|
43
49
|
- Model-agnostic within the scope of statsmodels, applicable to any regression model that outputs predictions.
|
44
50
|
|
45
|
-
|
51
|
+
### Limitations
|
52
|
+
|
46
53
|
- The metric is specific to statsmodels and cannot be used with other types of models without adaptation.
|
47
54
|
- It does not capture interactions between features, which can lead to underestimating the importance of correlated
|
48
55
|
features.
|
@@ -11,41 +11,45 @@ class RunsTest(Metric):
|
|
11
11
|
"""
|
12
12
|
Executes Runs Test on ML model to detect non-random patterns in output data sequence.
|
13
13
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
14
|
+
### Purpose
|
15
|
+
|
16
|
+
The Runs Test is a statistical procedure used to determine whether the sequence of data extracted from the ML model
|
17
|
+
behaves randomly or not. Specifically, it analyzes runs, sequences of consecutive positives or negatives, in the
|
18
|
+
data to check if there are more or fewer runs than expected under the assumption of randomness. This can be an
|
19
|
+
indication of some pattern, trend, or cycle in the model's output which may need attention.
|
20
|
+
|
21
|
+
### Test Mechanism
|
22
|
+
|
23
|
+
The testing mechanism applies the Runs Test from the statsmodels module on each column of the training dataset. For
|
24
|
+
every feature in the dataset, a Runs Test is executed, whose output includes a Runs Statistic and P-value. A low
|
25
|
+
P-value suggests that data arrangement in the feature is not likely to be random. The results are stored in a
|
26
|
+
dictionary where the keys are the feature names, and the values are another dictionary storing the test statistic
|
27
|
+
and the P-value for each feature.
|
28
|
+
|
29
|
+
### Signs of High Risk
|
30
|
+
|
26
31
|
- High risk is indicated when the P-value is close to zero.
|
27
|
-
- If the
|
32
|
+
- If the P-value is less than a predefined significance level (like 0.05), it suggests that the runs (series of
|
28
33
|
positive or negative values) in the model's output are not random and are longer or shorter than what is expected
|
29
34
|
under a random scenario.
|
30
35
|
- This would mean there's a high risk of non-random distribution of errors or model outcomes, suggesting potential
|
31
36
|
issues with the model.
|
32
37
|
|
33
|
-
|
34
|
-
|
35
|
-
sequence.
|
36
|
-
-
|
37
|
-
|
38
|
-
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
-
|
38
|
+
### Strengths
|
39
|
+
|
40
|
+
- Straightforward and fast for detecting non-random patterns in data sequence.
|
41
|
+
- Validates assumptions of randomness, which is valuable for checking error distributions in regression models,
|
42
|
+
trendless time series data, and ensuring a classifier doesn't favor one class over another.
|
43
|
+
- Can be applied to both classification and regression tasks, making it versatile.
|
44
|
+
|
45
|
+
### Limitations
|
46
|
+
|
47
|
+
- Assumes that the data is independently and identically distributed (i.i.d.), which might not be the case for many
|
48
|
+
real-world datasets.
|
49
|
+
- The conclusion drawn from the low P-value indicating non-randomness does not provide information about the type
|
44
50
|
or the source of the detected pattern.
|
45
|
-
-
|
46
|
-
|
47
|
-
- Furthermore, this test does not provide model performance evaluation; it is used to detect patterns in the
|
48
|
-
sequence of outputs only.
|
51
|
+
- Sensitive to extreme values (outliers), and overly large or small run sequences can influence the results.
|
52
|
+
- Does not provide model performance evaluation; it is used to detect patterns in the sequence of outputs only.
|
49
53
|
"""
|
50
54
|
|
51
55
|
name = "runs_test"
|