validmind 2.5.25__py3-none-any.whl → 2.6.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +8 -17
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +66 -85
- validmind/ai/test_result_description/context.py +2 -2
- validmind/ai/utils.py +26 -1
- validmind/api_client.py +43 -79
- validmind/client.py +5 -7
- validmind/client_config.py +1 -1
- validmind/datasets/__init__.py +1 -1
- validmind/datasets/classification/customer_churn.py +7 -5
- validmind/datasets/nlp/__init__.py +2 -2
- validmind/errors.py +6 -10
- validmind/html_templates/content_blocks.py +18 -16
- validmind/logging.py +21 -16
- validmind/tests/__init__.py +28 -5
- validmind/tests/__types__.py +186 -170
- validmind/tests/_store.py +7 -21
- validmind/tests/comparison.py +362 -0
- validmind/tests/data_validation/ACFandPACFPlot.py +44 -73
- validmind/tests/data_validation/ADF.py +49 -83
- validmind/tests/data_validation/AutoAR.py +59 -96
- validmind/tests/data_validation/AutoMA.py +59 -96
- validmind/tests/data_validation/AutoStationarity.py +66 -114
- validmind/tests/data_validation/ClassImbalance.py +48 -117
- validmind/tests/data_validation/DatasetDescription.py +180 -209
- validmind/tests/data_validation/DatasetSplit.py +50 -75
- validmind/tests/data_validation/DescriptiveStatistics.py +59 -85
- validmind/tests/data_validation/{DFGLSArch.py → DickeyFullerGLS.py} +44 -76
- validmind/tests/data_validation/Duplicates.py +21 -90
- validmind/tests/data_validation/EngleGrangerCoint.py +53 -75
- validmind/tests/data_validation/HighCardinality.py +32 -80
- validmind/tests/data_validation/HighPearsonCorrelation.py +29 -97
- validmind/tests/data_validation/IQROutliersBarPlot.py +63 -94
- validmind/tests/data_validation/IQROutliersTable.py +40 -80
- validmind/tests/data_validation/IsolationForestOutliers.py +41 -63
- validmind/tests/data_validation/KPSS.py +33 -81
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +47 -95
- validmind/tests/data_validation/MissingValues.py +17 -58
- validmind/tests/data_validation/MissingValuesBarPlot.py +61 -87
- validmind/tests/data_validation/PhillipsPerronArch.py +56 -79
- validmind/tests/data_validation/RollingStatsPlot.py +50 -81
- validmind/tests/data_validation/SeasonalDecompose.py +102 -184
- validmind/tests/data_validation/Skewness.py +27 -64
- validmind/tests/data_validation/SpreadPlot.py +34 -57
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +46 -65
- validmind/tests/data_validation/TabularDateTimeHistograms.py +23 -45
- validmind/tests/data_validation/TabularNumericalHistograms.py +27 -46
- validmind/tests/data_validation/TargetRateBarPlots.py +54 -93
- validmind/tests/data_validation/TimeSeriesFrequency.py +48 -133
- validmind/tests/data_validation/TimeSeriesHistogram.py +24 -3
- validmind/tests/data_validation/TimeSeriesLinePlot.py +29 -47
- validmind/tests/data_validation/TimeSeriesMissingValues.py +59 -135
- validmind/tests/data_validation/TimeSeriesOutliers.py +54 -171
- validmind/tests/data_validation/TooManyZeroValues.py +21 -70
- validmind/tests/data_validation/UniqueRows.py +23 -62
- validmind/tests/data_validation/WOEBinPlots.py +83 -109
- validmind/tests/data_validation/WOEBinTable.py +28 -69
- validmind/tests/data_validation/ZivotAndrewsArch.py +33 -75
- validmind/tests/data_validation/nlp/CommonWords.py +49 -57
- validmind/tests/data_validation/nlp/Hashtags.py +27 -49
- validmind/tests/data_validation/nlp/LanguageDetection.py +7 -13
- validmind/tests/data_validation/nlp/Mentions.py +32 -63
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +89 -14
- validmind/tests/data_validation/nlp/Punctuations.py +63 -47
- validmind/tests/data_validation/nlp/Sentiment.py +4 -0
- validmind/tests/data_validation/nlp/StopWords.py +62 -91
- validmind/tests/data_validation/nlp/TextDescription.py +116 -159
- validmind/tests/data_validation/nlp/Toxicity.py +12 -4
- validmind/tests/decorator.py +33 -242
- validmind/tests/load.py +212 -153
- validmind/tests/model_validation/BertScore.py +13 -7
- validmind/tests/model_validation/BleuScore.py +4 -0
- validmind/tests/model_validation/ClusterSizeDistribution.py +24 -47
- validmind/tests/model_validation/ContextualRecall.py +3 -0
- validmind/tests/model_validation/FeaturesAUC.py +43 -74
- validmind/tests/model_validation/MeteorScore.py +3 -0
- validmind/tests/model_validation/RegardScore.py +5 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +54 -75
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +10 -33
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +11 -29
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +19 -31
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +40 -49
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +29 -15
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +25 -11
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +28 -13
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +67 -38
- validmind/tests/model_validation/embeddings/utils.py +53 -0
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +37 -32
- validmind/tests/model_validation/ragas/{AspectCritique.py → AspectCritic.py} +33 -27
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +44 -41
- validmind/tests/model_validation/ragas/ContextPrecision.py +40 -35
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +133 -0
- validmind/tests/model_validation/ragas/ContextRecall.py +40 -35
- validmind/tests/model_validation/ragas/Faithfulness.py +42 -30
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +59 -35
- validmind/tests/model_validation/ragas/{AnswerRelevance.py → ResponseRelevancy.py} +52 -41
- validmind/tests/model_validation/ragas/{AnswerSimilarity.py → SemanticSimilarity.py} +39 -34
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +13 -16
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +13 -16
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +51 -89
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +31 -61
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +118 -83
- validmind/tests/model_validation/sklearn/CompletenessScore.py +13 -16
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +62 -94
- validmind/tests/model_validation/sklearn/FeatureImportance.py +7 -8
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -15
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +12 -15
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +23 -53
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +60 -74
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +16 -84
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +22 -72
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +29 -78
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +52 -82
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +51 -145
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +60 -78
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +130 -172
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +26 -55
- validmind/tests/model_validation/sklearn/ROCCurve.py +43 -77
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +41 -94
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +47 -136
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +164 -208
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +54 -99
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +50 -124
- validmind/tests/model_validation/sklearn/VMeasure.py +12 -15
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +225 -281
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +40 -45
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +22 -47
- validmind/tests/model_validation/statsmodels/Lilliefors.py +17 -28
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +37 -81
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +37 -105
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +62 -166
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +57 -119
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +20 -57
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +47 -80
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +2 -0
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -2
- validmind/tests/output.py +120 -0
- validmind/tests/prompt_validation/Bias.py +55 -98
- validmind/tests/prompt_validation/Clarity.py +56 -99
- validmind/tests/prompt_validation/Conciseness.py +63 -101
- validmind/tests/prompt_validation/Delimitation.py +48 -89
- validmind/tests/prompt_validation/NegativeInstruction.py +62 -96
- validmind/tests/prompt_validation/Robustness.py +80 -121
- validmind/tests/prompt_validation/Specificity.py +61 -95
- validmind/tests/prompt_validation/ai_powered_test.py +2 -2
- validmind/tests/run.py +314 -496
- validmind/tests/test_providers.py +109 -79
- validmind/tests/utils.py +91 -0
- validmind/unit_metrics/__init__.py +16 -155
- validmind/unit_metrics/classification/F1.py +1 -0
- validmind/unit_metrics/classification/Precision.py +1 -0
- validmind/unit_metrics/classification/ROC_AUC.py +1 -0
- validmind/unit_metrics/classification/Recall.py +1 -0
- validmind/unit_metrics/regression/AdjustedRSquaredScore.py +1 -0
- validmind/unit_metrics/regression/GiniCoefficient.py +1 -0
- validmind/unit_metrics/regression/HuberLoss.py +1 -0
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -0
- validmind/unit_metrics/regression/MeanAbsoluteError.py +1 -0
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -0
- validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -0
- validmind/unit_metrics/regression/MeanSquaredError.py +1 -0
- validmind/unit_metrics/regression/QuantileLoss.py +1 -0
- validmind/unit_metrics/regression/RSquaredScore.py +2 -1
- validmind/unit_metrics/regression/RootMeanSquaredError.py +1 -0
- validmind/utils.py +66 -17
- validmind/vm_models/__init__.py +2 -17
- validmind/vm_models/dataset/dataset.py +31 -4
- validmind/vm_models/figure.py +7 -37
- validmind/vm_models/model.py +3 -0
- validmind/vm_models/result/__init__.py +7 -0
- validmind/vm_models/result/result.jinja +21 -0
- validmind/vm_models/result/result.py +337 -0
- validmind/vm_models/result/utils.py +160 -0
- validmind/vm_models/test_suite/runner.py +16 -54
- validmind/vm_models/test_suite/summary.py +3 -3
- validmind/vm_models/test_suite/test.py +43 -77
- validmind/vm_models/test_suite/test_suite.py +8 -40
- validmind-2.6.7.dist-info/METADATA +137 -0
- {validmind-2.5.25.dist-info → validmind-2.6.7.dist-info}/RECORD +182 -189
- validmind/tests/data_validation/AutoSeasonality.py +0 -190
- validmind/tests/metadata.py +0 -59
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -176
- validmind/tests/model_validation/ragas/ContextUtilization.py +0 -161
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -80
- validmind/unit_metrics/composite.py +0 -238
- validmind/vm_models/test/metric.py +0 -98
- validmind/vm_models/test/metric_result.py +0 -61
- validmind/vm_models/test/output_template.py +0 -55
- validmind/vm_models/test/result_summary.py +0 -76
- validmind/vm_models/test/result_wrapper.py +0 -488
- validmind/vm_models/test/test.py +0 -103
- validmind/vm_models/test/threshold_test.py +0 -106
- validmind/vm_models/test/threshold_test_result.py +0 -75
- validmind/vm_models/test_context.py +0 -259
- validmind-2.5.25.dist-info/METADATA +0 -118
- {validmind-2.5.25.dist-info → validmind-2.6.7.dist-info}/LICENSE +0 -0
- {validmind-2.5.25.dist-info → validmind-2.6.7.dist-info}/WHEEL +0 -0
- {validmind-2.5.25.dist-info → validmind-2.6.7.dist-info}/entry_points.txt +0 -0
@@ -2,197 +2,93 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from dataclasses import dataclass
|
6
|
-
|
7
5
|
import matplotlib.pyplot as plt
|
8
6
|
import numpy as np
|
9
7
|
import pandas as pd
|
10
8
|
|
11
|
-
from validmind
|
9
|
+
from validmind import tags, tasks
|
10
|
+
from validmind.vm_models import VMDataset, VMModel
|
11
|
+
|
12
|
+
|
13
|
+
def integrate_diff(series_diff, start_value):
|
14
|
+
series_diff = np.array(series_diff)
|
15
|
+
series_orig = np.cumsum(series_diff)
|
16
|
+
series_orig += start_value
|
12
17
|
|
18
|
+
return series_orig
|
13
19
|
|
14
|
-
|
15
|
-
|
20
|
+
|
21
|
+
@tags("time_series_data", "forecasting", "visualization")
|
22
|
+
@tasks("regression")
|
23
|
+
def RegressionModelForecastPlotLevels(
|
24
|
+
model: VMModel,
|
25
|
+
dataset: VMDataset,
|
26
|
+
):
|
16
27
|
"""
|
17
|
-
Assesses the alignment between forecasted and observed values in regression models through visual plots
|
18
|
-
handling data transformations.
|
28
|
+
Assesses the alignment between forecasted and observed values in regression models through visual plots
|
19
29
|
|
20
30
|
### Purpose
|
21
31
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
32
|
+
This test aims to visually assess the performance of a regression model by comparing its forecasted values against
|
33
|
+
the actual observed values for both the raw and transformed (integrated) data. This helps determine the accuracy
|
34
|
+
of the model and can help identify overfitting or underfitting. The integration is applied to highlight the trend
|
35
|
+
rather than the absolute level.
|
26
36
|
|
27
37
|
### Test Mechanism
|
28
38
|
|
29
|
-
|
30
|
-
|
39
|
+
This test generates two plots:
|
40
|
+
|
41
|
+
- Raw data vs forecast
|
42
|
+
- Transformed data vs forecast
|
31
43
|
|
32
|
-
|
33
|
-
- Processes each model to generate predictive forecasts for both training and testing datasets.
|
34
|
-
- Contrasts these forecasts with the actual observed values.
|
35
|
-
- Produces plots to visually compare forecasted and observed values for both raw and transformed datasets.
|
36
|
-
- Handles specified transformations (e.g., "integrate") by performing cumulative sums to create a new series before
|
37
|
-
plotting.
|
44
|
+
The transformed data is created by performing a cumulative sum on the raw data.
|
38
45
|
|
39
46
|
### Signs of High Risk
|
40
47
|
|
41
|
-
- Significant deviation between forecasted and observed values
|
48
|
+
- Significant deviation between forecasted and observed values.
|
42
49
|
- Patterns suggesting overfitting or underfitting.
|
43
50
|
- Large discrepancies in the plotted forecasts, indicating potential issues with model generalizability and
|
44
51
|
precision.
|
45
52
|
|
46
53
|
### Strengths
|
47
54
|
|
48
|
-
-
|
49
|
-
|
50
|
-
- **Transformation Handling**: Can process specified data transformations such as "integrate," enhancing
|
51
|
-
flexibility.
|
52
|
-
- **Detailed Perspective**: Assesses performance on both training and testing datasets, offering a comprehensive
|
53
|
-
view of model behavior.
|
55
|
+
- Provides an intuitive, visual way to assess multiple regression models, aiding in easier interpretation and
|
56
|
+
evaluation of forecast accuracy.
|
54
57
|
|
55
58
|
### Limitations
|
56
59
|
|
57
|
-
-
|
58
|
-
-
|
59
|
-
transformations might not be handled.
|
60
|
-
- **Overhead**: Plotting can be computationally intensive for large datasets, increasing runtime.
|
61
|
-
- **Numerical Measurement**: Does not provide a numerical metric to quantify forecast accuracy, relying solely on
|
62
|
-
visual assessment.
|
60
|
+
- Relies heavily on visual interpretation, which may vary between individuals.
|
61
|
+
- Does not provide a numerical metric to quantify forecast accuracy, relying solely on visual assessment.
|
63
62
|
"""
|
63
|
+
index = dataset.df.index
|
64
|
+
|
65
|
+
if not pd.api.types.is_datetime64_any_dtype(index):
|
66
|
+
raise ValueError("Test requires a time series dataset")
|
67
|
+
|
68
|
+
fig, axs = plt.subplots(2, 1)
|
69
|
+
|
70
|
+
y_pred = dataset.y_pred(model)
|
71
|
+
|
72
|
+
# raw data vs forecast
|
73
|
+
axs[0].plot(index, dataset.y, label="Observed", color="grey")
|
74
|
+
axs[0].plot(index, y_pred, label="Forecast")
|
75
|
+
axs[0].set_title("Forecast vs Observed")
|
76
|
+
axs[0].legend()
|
77
|
+
|
78
|
+
# transformed data
|
79
|
+
dataset_y_transformed = integrate_diff(dataset.y, start_value=dataset.y[0])
|
80
|
+
y_pred_transformed = integrate_diff(y_pred, start_value=dataset_y_transformed[0])
|
81
|
+
|
82
|
+
axs[1].plot(
|
83
|
+
index,
|
84
|
+
dataset_y_transformed,
|
85
|
+
label="Observed",
|
86
|
+
color="grey",
|
87
|
+
)
|
88
|
+
axs[1].plot(index, y_pred_transformed, label="Forecast")
|
89
|
+
axs[1].set_title("Integrated Forecast vs Observed")
|
90
|
+
axs[1].legend()
|
91
|
+
|
92
|
+
plt.close()
|
64
93
|
|
65
|
-
|
66
|
-
required_inputs = ["models", "datasets"]
|
67
|
-
default_params = {
|
68
|
-
"transformation": None,
|
69
|
-
}
|
70
|
-
tasks = ["regression"]
|
71
|
-
tags = ["forecasting", "visualization"]
|
72
|
-
|
73
|
-
def run(self):
|
74
|
-
transformation = self.params["transformation"]
|
75
|
-
|
76
|
-
if not self.inputs.models:
|
77
|
-
raise ValueError("List of models must be provided in the models parameter")
|
78
|
-
|
79
|
-
all_models = []
|
80
|
-
for model in self.inputs.models:
|
81
|
-
all_models.append(model)
|
82
|
-
|
83
|
-
figures = self._plot_forecast(all_models, self.inputs.datasets, transformation)
|
84
|
-
|
85
|
-
return self.cache_results(figures=figures)
|
86
|
-
|
87
|
-
def integrate_diff(self, series_diff, start_value):
|
88
|
-
series_diff = np.array(series_diff)
|
89
|
-
series_orig = np.cumsum(series_diff)
|
90
|
-
series_orig += start_value
|
91
|
-
return series_orig
|
92
|
-
|
93
|
-
def _plot_forecast(
|
94
|
-
self,
|
95
|
-
model_list,
|
96
|
-
datasets,
|
97
|
-
transformation=None,
|
98
|
-
):
|
99
|
-
figures = []
|
100
|
-
|
101
|
-
for i, fitted_model in enumerate(model_list):
|
102
|
-
feature_columns = datasets[0].feature_columns
|
103
|
-
|
104
|
-
train_ds = datasets[0]
|
105
|
-
test_ds = datasets[1]
|
106
|
-
|
107
|
-
y_pred = train_ds.y_pred(fitted_model)
|
108
|
-
y_pred_test = test_ds.y_pred(fitted_model)
|
109
|
-
|
110
|
-
all_dates = pd.concat([pd.Series(train_ds.index), pd.Series(test_ds.index)])
|
111
|
-
|
112
|
-
if all_dates.empty:
|
113
|
-
raise ValueError(
|
114
|
-
"No dates in the data. Unable to determine start and end dates."
|
115
|
-
)
|
116
|
-
|
117
|
-
fig, axs = plt.subplots(2, 2)
|
118
|
-
|
119
|
-
# train vs forecast
|
120
|
-
axs[0, 0].plot(
|
121
|
-
train_ds.index, train_ds.y, label="Train Dataset", color="grey"
|
122
|
-
)
|
123
|
-
axs[0, 0].plot(train_ds.index, y_pred, label="Train Forecast")
|
124
|
-
axs[0, 0].set_title(f"Forecast vs Observed for features {feature_columns}")
|
125
|
-
axs[0, 0].legend()
|
126
|
-
|
127
|
-
# test vs forecast
|
128
|
-
axs[0, 1].plot(test_ds.index, test_ds.y, label="Test Dataset", color="grey")
|
129
|
-
axs[0, 1].plot(test_ds.index, y_pred_test, label="Test Forecast")
|
130
|
-
axs[0, 1].set_title(f"Forecast vs Observed for features {feature_columns}")
|
131
|
-
axs[0, 1].legend()
|
132
|
-
|
133
|
-
if transformation == "integrate":
|
134
|
-
train_ds_y_transformed = self.integrate_diff(
|
135
|
-
train_ds.y_df().values, start_value=train_ds.y[0]
|
136
|
-
)
|
137
|
-
|
138
|
-
test_ds_y_transformed = self.integrate_diff(
|
139
|
-
test_ds.y_df().values, start_value=test_ds.y[0]
|
140
|
-
)
|
141
|
-
|
142
|
-
# Use the first value of the transformed train dataset as the start_value for predicted datasets
|
143
|
-
|
144
|
-
y_pred_transformed = self.integrate_diff(
|
145
|
-
y_pred, start_value=train_ds_y_transformed[0]
|
146
|
-
)
|
147
|
-
y_pred_test_transformed = self.integrate_diff(
|
148
|
-
y_pred_test, start_value=test_ds_y_transformed[0]
|
149
|
-
)
|
150
|
-
|
151
|
-
# Create copies of the original datasets and update them to reflect transformed data
|
152
|
-
train_ds_transformed = train_ds.copy
|
153
|
-
train_ds_transformed["y"] = train_ds_y_transformed
|
154
|
-
|
155
|
-
test_ds_transformed = test_ds.copy
|
156
|
-
test_ds_transformed["y"] = test_ds_y_transformed
|
157
|
-
|
158
|
-
# transformed train vs forecast
|
159
|
-
axs[1, 0].plot(
|
160
|
-
train_ds.index,
|
161
|
-
train_ds_y_transformed,
|
162
|
-
label="Train Dataset",
|
163
|
-
color="grey",
|
164
|
-
)
|
165
|
-
|
166
|
-
axs[1, 0].plot(
|
167
|
-
train_ds.index, y_pred_transformed, label="Train Forecast"
|
168
|
-
)
|
169
|
-
|
170
|
-
axs[1, 0].set_title(
|
171
|
-
f"Integrated Forecast vs Observed for features {feature_columns}"
|
172
|
-
)
|
173
|
-
axs[1, 0].legend()
|
174
|
-
|
175
|
-
# transformed test vs forecast
|
176
|
-
axs[1, 1].plot(
|
177
|
-
test_ds.index,
|
178
|
-
test_ds_y_transformed,
|
179
|
-
label="Test Dataset",
|
180
|
-
color="grey",
|
181
|
-
)
|
182
|
-
|
183
|
-
axs[1, 1].plot(
|
184
|
-
test_ds.index, y_pred_test_transformed, label="Test Forecast"
|
185
|
-
)
|
186
|
-
axs[1, 1].set_title(
|
187
|
-
f"Integrated Forecast vs Observed for features {feature_columns}"
|
188
|
-
)
|
189
|
-
axs[1, 1].legend()
|
190
|
-
|
191
|
-
figures.append(
|
192
|
-
Figure(for_object=self, key=f"{self.key}:{i}", figure=fig, metadata={})
|
193
|
-
)
|
194
|
-
|
195
|
-
# Close the figure to prevent it from displaying
|
196
|
-
plt.close(fig)
|
197
|
-
|
198
|
-
return figures
|
94
|
+
return fig
|
@@ -2,19 +2,34 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from
|
5
|
+
from typing import List, Union
|
6
6
|
|
7
7
|
import matplotlib.pyplot as plt
|
8
8
|
import numpy as np
|
9
9
|
|
10
|
+
from validmind import tags, tasks
|
10
11
|
from validmind.logging import get_logger
|
11
|
-
from validmind.vm_models import
|
12
|
+
from validmind.vm_models import VMDataset, VMModel
|
12
13
|
|
13
14
|
logger = get_logger(__name__)
|
14
15
|
|
15
16
|
|
16
|
-
|
17
|
-
|
17
|
+
def integrate_diff(series_diff, start_value):
|
18
|
+
series_diff = np.asarray(series_diff, dtype=np.float64) # Convert to float64
|
19
|
+
series = np.cumsum(series_diff)
|
20
|
+
series += start_value
|
21
|
+
|
22
|
+
return series
|
23
|
+
|
24
|
+
|
25
|
+
@tags("senstivity_analysis", "visualization")
|
26
|
+
@tasks("regression")
|
27
|
+
def RegressionModelSensitivityPlot(
|
28
|
+
dataset: VMDataset,
|
29
|
+
model: VMModel,
|
30
|
+
shocks: List[float] = [0.1],
|
31
|
+
transformation: Union[str, None] = None,
|
32
|
+
):
|
18
33
|
"""
|
19
34
|
Assesses the sensitivity of a regression model to changes in independent variables by applying shocks and
|
20
35
|
visualizing the impact.
|
@@ -57,120 +72,43 @@ class RegressionModelSensitivityPlot(Metric):
|
|
57
72
|
- Provides a visual representation without a numerical risk measure, potentially introducing subjectivity in
|
58
73
|
interpretation.
|
59
74
|
"""
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
75
|
+
features_df = dataset.x_df()
|
76
|
+
target_df = dataset.y_df()
|
77
|
+
|
78
|
+
shocked_dfs = {"Baseline": features_df}
|
79
|
+
for shock in shocks:
|
80
|
+
for col in dataset.feature_columns:
|
81
|
+
temp_df = features_df.copy()
|
82
|
+
temp_df[col] = temp_df[col] * (1 + shock)
|
83
|
+
shocked_dfs[f"Shock of {shock} to {col}"] = temp_df
|
84
|
+
|
85
|
+
predictions = {
|
86
|
+
label: model.predict(shocked_df) for label, shocked_df in shocked_dfs.items()
|
66
87
|
}
|
67
|
-
tasks = ["regression"]
|
68
|
-
tags = ["senstivity_analysis", "visualization"]
|
69
88
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
self.inputs.datasets[1].y_df().values, start_value
|
97
|
-
)
|
98
|
-
|
99
|
-
predictions = self.predict_shocked_datasets(shocked_datasets, model)
|
100
|
-
transformed_predictions = self.transform_predictions(
|
101
|
-
predictions, start_value
|
102
|
-
)
|
103
|
-
|
104
|
-
else:
|
105
|
-
transformed_target = target_df.values
|
106
|
-
transformed_predictions = predictions
|
107
|
-
|
108
|
-
fig = self._plot_predictions(
|
109
|
-
target_df.index, transformed_target, transformed_predictions
|
110
|
-
)
|
111
|
-
figures.append(
|
112
|
-
Figure(for_object=self, key=f"{self.key}:{i}", figure=fig, metadata={})
|
113
|
-
)
|
114
|
-
return self.cache_results(figures=figures)
|
115
|
-
|
116
|
-
def transform_predictions(self, predictions, start_value):
|
117
|
-
transformed_predictions = (
|
118
|
-
{}
|
119
|
-
) # Initialize an empty dictionary to store the transformed predictions
|
120
|
-
|
121
|
-
for (
|
122
|
-
label,
|
123
|
-
pred,
|
124
|
-
) in predictions.items(): # Here, label is the key, pred is the value
|
125
|
-
transformed_pred = self.integrate_diff(pred, start_value)
|
126
|
-
transformed_predictions[
|
127
|
-
label
|
128
|
-
] = transformed_pred # Store transformed dataframe in the new dictionary
|
129
|
-
|
130
|
-
return transformed_predictions
|
131
|
-
|
132
|
-
def predict_shocked_datasets(self, shocked_datasets, model):
|
133
|
-
predictions = {}
|
134
|
-
|
135
|
-
for label, shocked_dataset in shocked_datasets.items():
|
136
|
-
y_pred = model.predict(shocked_dataset)
|
137
|
-
predictions[label] = y_pred
|
138
|
-
|
139
|
-
return predictions
|
140
|
-
|
141
|
-
def _plot_predictions(self, index, target, predictions):
|
142
|
-
fig = plt.figure()
|
143
|
-
|
144
|
-
# Plot the target
|
145
|
-
plt.plot(index, target, label="Observed")
|
146
|
-
|
147
|
-
# Plot each prediction
|
148
|
-
for label, pred in predictions.items():
|
149
|
-
plt.plot(index, pred, label=label)
|
150
|
-
|
151
|
-
plt.legend()
|
152
|
-
|
153
|
-
# Close the figure to prevent it from displaying
|
154
|
-
plt.close(fig)
|
155
|
-
return fig
|
156
|
-
|
157
|
-
def integrate_diff(self, series_diff, start_value):
|
158
|
-
series_diff = np.asarray(series_diff, dtype=np.float64) # Convert to float64
|
159
|
-
series = np.cumsum(series_diff)
|
160
|
-
series += start_value
|
161
|
-
return series
|
162
|
-
|
163
|
-
def apply_shock(self, df, shocks):
|
164
|
-
shocked_dfs = {"Baseline": df.copy()} # Start with the original dataset
|
165
|
-
cols_to_shock = df.columns # All columns
|
166
|
-
|
167
|
-
# Apply shock one variable at a time
|
168
|
-
for shock in shocks:
|
169
|
-
for col in cols_to_shock:
|
170
|
-
temp_df = df.copy()
|
171
|
-
temp_df[col] = temp_df[col] * (1 + shock)
|
172
|
-
shocked_dfs[
|
173
|
-
f"Shock of {shock} to {col}"
|
174
|
-
] = temp_df # Include shock value in the key
|
175
|
-
|
176
|
-
return shocked_dfs
|
89
|
+
if transformation is None:
|
90
|
+
transformed_target = target_df.values
|
91
|
+
transformed_predictions = predictions
|
92
|
+
|
93
|
+
elif transformation == "integrate":
|
94
|
+
transformed_target = integrate_diff(target_df.values, dataset.y[0])
|
95
|
+
transformed_predictions = {
|
96
|
+
label: integrate_diff(pred, dataset.y[0])
|
97
|
+
for label, pred in predictions.items()
|
98
|
+
}
|
99
|
+
|
100
|
+
else:
|
101
|
+
raise ValueError(f"Invalid transformation: {transformation}")
|
102
|
+
|
103
|
+
fig = plt.figure()
|
104
|
+
|
105
|
+
plt.plot(target_df.index, transformed_target, label="Observed")
|
106
|
+
|
107
|
+
for label, pred in transformed_predictions.items():
|
108
|
+
plt.plot(target_df.index, pred, label=label)
|
109
|
+
|
110
|
+
plt.legend()
|
111
|
+
|
112
|
+
plt.close()
|
113
|
+
|
114
|
+
return fig
|
@@ -2,18 +2,17 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from dataclasses import dataclass
|
6
|
-
|
7
|
-
import pandas as pd
|
8
5
|
from sklearn.metrics import mean_squared_error, r2_score
|
9
6
|
|
10
|
-
from validmind
|
7
|
+
from validmind import tags, tasks
|
8
|
+
from validmind.vm_models import VMDataset, VMModel
|
11
9
|
|
12
10
|
from .statsutils import adj_r2_score
|
13
11
|
|
14
12
|
|
15
|
-
@
|
16
|
-
|
13
|
+
@tags("model_performance", "regression")
|
14
|
+
@tasks("regression")
|
15
|
+
def RegressionModelSummary(dataset: VMDataset, model: VMModel):
|
17
16
|
"""
|
18
17
|
Evaluates regression model performance using metrics including R-Squared, Adjusted R-Squared, MSE, and RMSE.
|
19
18
|
|
@@ -26,11 +25,8 @@ class RegressionModelSummary(Metric):
|
|
26
25
|
|
27
26
|
### Test Mechanism
|
28
27
|
|
29
|
-
This test
|
30
|
-
|
31
|
-
calculates several standard regression performance metrics including R-Squared, Adjusted R-Squared, Mean Squared
|
32
|
-
Error (MSE), and Root Mean Squared Error (RMSE), which quantify the approximation of the predicted responses to the
|
33
|
-
actual responses.
|
28
|
+
This test uses the sklearn library to calculate the R-Squared, Adjusted R-Squared, MSE, and RMSE. It outputs a
|
29
|
+
table with the results of these metrics along with the feature columns used by the model.
|
34
30
|
|
35
31
|
### Signs of High Risk
|
36
32
|
|
@@ -45,54 +41,21 @@ class RegressionModelSummary(Metric):
|
|
45
41
|
|
46
42
|
### Limitations
|
47
43
|
|
48
|
-
- Applicable exclusively to regression models.
|
49
44
|
- RMSE and MSE might be sensitive to outliers.
|
50
45
|
- A high R-Squared or Adjusted R-Squared may not necessarily indicate a good model, especially in cases of
|
51
46
|
overfitting.
|
52
47
|
"""
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
r2 = r2_score(y_true, y_pred)
|
66
|
-
adj_r2 = adj_r2_score(y_true, y_pred, len(y_true), len(X_columns))
|
67
|
-
mse = mean_squared_error(y_true=y_true, y_pred=y_pred, squared=True)
|
68
|
-
rmse = mean_squared_error(y_true=y_true, y_pred=y_pred, squared=False)
|
69
|
-
|
70
|
-
results = {
|
71
|
-
"Independent Variables": X_columns,
|
72
|
-
"R-Squared": r2,
|
73
|
-
"Adjusted R-Squared": adj_r2,
|
74
|
-
"MSE": mse,
|
75
|
-
"RMSE": rmse,
|
48
|
+
y_true = dataset.y
|
49
|
+
y_pred = dataset.y_pred(model)
|
50
|
+
|
51
|
+
return [
|
52
|
+
{
|
53
|
+
"Independent Variables": dataset.feature_columns,
|
54
|
+
"R-Squared": r2_score(y_true, y_pred),
|
55
|
+
"Adjusted R-Squared": adj_r2_score(
|
56
|
+
y_true, y_pred, len(y_true), len(dataset.feature_columns)
|
57
|
+
),
|
58
|
+
"MSE": mean_squared_error(y_true=y_true, y_pred=y_pred, squared=True),
|
59
|
+
"RMSE": mean_squared_error(y_true=y_true, y_pred=y_pred, squared=False),
|
76
60
|
}
|
77
|
-
|
78
|
-
|
79
|
-
return self.cache_results(
|
80
|
-
{
|
81
|
-
"regression_analysis": summary_regression.to_dict(orient="records"),
|
82
|
-
}
|
83
|
-
)
|
84
|
-
|
85
|
-
def summary(self, metric_value):
|
86
|
-
"""
|
87
|
-
Build one table for summarizing the regression analysis results
|
88
|
-
"""
|
89
|
-
summary_regression = metric_value["regression_analysis"]
|
90
|
-
|
91
|
-
return ResultSummary(
|
92
|
-
results=[
|
93
|
-
ResultTable(
|
94
|
-
data=summary_regression,
|
95
|
-
metadata=ResultTableMetadata(title="Regression Analysis Results"),
|
96
|
-
),
|
97
|
-
]
|
98
|
-
)
|
61
|
+
]
|