validmind 2.5.24__py3-none-any.whl → 2.6.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. validmind/__init__.py +8 -17
  2. validmind/__version__.py +1 -1
  3. validmind/ai/test_descriptions.py +66 -85
  4. validmind/ai/test_result_description/context.py +2 -2
  5. validmind/ai/utils.py +26 -1
  6. validmind/api_client.py +43 -79
  7. validmind/client.py +5 -7
  8. validmind/client_config.py +1 -1
  9. validmind/datasets/__init__.py +1 -1
  10. validmind/datasets/classification/customer_churn.py +7 -5
  11. validmind/datasets/nlp/__init__.py +2 -2
  12. validmind/errors.py +6 -10
  13. validmind/html_templates/content_blocks.py +18 -16
  14. validmind/logging.py +21 -16
  15. validmind/tests/__init__.py +28 -5
  16. validmind/tests/__types__.py +186 -170
  17. validmind/tests/_store.py +7 -21
  18. validmind/tests/comparison.py +362 -0
  19. validmind/tests/data_validation/ACFandPACFPlot.py +44 -73
  20. validmind/tests/data_validation/ADF.py +49 -83
  21. validmind/tests/data_validation/AutoAR.py +59 -96
  22. validmind/tests/data_validation/AutoMA.py +59 -96
  23. validmind/tests/data_validation/AutoStationarity.py +66 -114
  24. validmind/tests/data_validation/ClassImbalance.py +48 -117
  25. validmind/tests/data_validation/DatasetDescription.py +180 -209
  26. validmind/tests/data_validation/DatasetSplit.py +50 -75
  27. validmind/tests/data_validation/DescriptiveStatistics.py +59 -85
  28. validmind/tests/data_validation/{DFGLSArch.py → DickeyFullerGLS.py} +44 -76
  29. validmind/tests/data_validation/Duplicates.py +21 -90
  30. validmind/tests/data_validation/EngleGrangerCoint.py +53 -75
  31. validmind/tests/data_validation/HighCardinality.py +32 -80
  32. validmind/tests/data_validation/HighPearsonCorrelation.py +29 -97
  33. validmind/tests/data_validation/IQROutliersBarPlot.py +63 -94
  34. validmind/tests/data_validation/IQROutliersTable.py +40 -80
  35. validmind/tests/data_validation/IsolationForestOutliers.py +41 -63
  36. validmind/tests/data_validation/KPSS.py +33 -81
  37. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +47 -95
  38. validmind/tests/data_validation/MissingValues.py +17 -58
  39. validmind/tests/data_validation/MissingValuesBarPlot.py +61 -87
  40. validmind/tests/data_validation/PhillipsPerronArch.py +56 -79
  41. validmind/tests/data_validation/RollingStatsPlot.py +50 -81
  42. validmind/tests/data_validation/SeasonalDecompose.py +102 -184
  43. validmind/tests/data_validation/Skewness.py +27 -64
  44. validmind/tests/data_validation/SpreadPlot.py +34 -57
  45. validmind/tests/data_validation/TabularCategoricalBarPlots.py +46 -65
  46. validmind/tests/data_validation/TabularDateTimeHistograms.py +23 -45
  47. validmind/tests/data_validation/TabularNumericalHistograms.py +27 -46
  48. validmind/tests/data_validation/TargetRateBarPlots.py +54 -93
  49. validmind/tests/data_validation/TimeSeriesFrequency.py +48 -133
  50. validmind/tests/data_validation/TimeSeriesHistogram.py +24 -3
  51. validmind/tests/data_validation/TimeSeriesLinePlot.py +29 -47
  52. validmind/tests/data_validation/TimeSeriesMissingValues.py +59 -135
  53. validmind/tests/data_validation/TimeSeriesOutliers.py +54 -171
  54. validmind/tests/data_validation/TooManyZeroValues.py +21 -70
  55. validmind/tests/data_validation/UniqueRows.py +23 -62
  56. validmind/tests/data_validation/WOEBinPlots.py +83 -109
  57. validmind/tests/data_validation/WOEBinTable.py +28 -69
  58. validmind/tests/data_validation/ZivotAndrewsArch.py +33 -75
  59. validmind/tests/data_validation/nlp/CommonWords.py +49 -57
  60. validmind/tests/data_validation/nlp/Hashtags.py +27 -49
  61. validmind/tests/data_validation/nlp/LanguageDetection.py +7 -13
  62. validmind/tests/data_validation/nlp/Mentions.py +32 -63
  63. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +89 -14
  64. validmind/tests/data_validation/nlp/Punctuations.py +63 -47
  65. validmind/tests/data_validation/nlp/Sentiment.py +4 -0
  66. validmind/tests/data_validation/nlp/StopWords.py +62 -91
  67. validmind/tests/data_validation/nlp/TextDescription.py +116 -159
  68. validmind/tests/data_validation/nlp/Toxicity.py +12 -4
  69. validmind/tests/decorator.py +33 -242
  70. validmind/tests/load.py +212 -153
  71. validmind/tests/model_validation/BertScore.py +13 -7
  72. validmind/tests/model_validation/BleuScore.py +4 -0
  73. validmind/tests/model_validation/ClusterSizeDistribution.py +24 -47
  74. validmind/tests/model_validation/ContextualRecall.py +3 -0
  75. validmind/tests/model_validation/FeaturesAUC.py +43 -74
  76. validmind/tests/model_validation/MeteorScore.py +3 -0
  77. validmind/tests/model_validation/RegardScore.py +5 -1
  78. validmind/tests/model_validation/RegressionResidualsPlot.py +54 -75
  79. validmind/tests/model_validation/embeddings/ClusterDistribution.py +10 -33
  80. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +11 -29
  81. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +19 -31
  82. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +40 -49
  83. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +29 -15
  84. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +25 -11
  85. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +28 -13
  86. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +67 -38
  87. validmind/tests/model_validation/embeddings/utils.py +53 -0
  88. validmind/tests/model_validation/ragas/AnswerCorrectness.py +37 -32
  89. validmind/tests/model_validation/ragas/{AspectCritique.py → AspectCritic.py} +33 -27
  90. validmind/tests/model_validation/ragas/ContextEntityRecall.py +44 -41
  91. validmind/tests/model_validation/ragas/ContextPrecision.py +40 -35
  92. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +133 -0
  93. validmind/tests/model_validation/ragas/ContextRecall.py +40 -35
  94. validmind/tests/model_validation/ragas/Faithfulness.py +42 -30
  95. validmind/tests/model_validation/ragas/NoiseSensitivity.py +59 -35
  96. validmind/tests/model_validation/ragas/{AnswerRelevance.py → ResponseRelevancy.py} +52 -41
  97. validmind/tests/model_validation/ragas/{AnswerSimilarity.py → SemanticSimilarity.py} +39 -34
  98. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +13 -16
  99. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +13 -16
  100. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +51 -89
  101. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +31 -61
  102. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +118 -83
  103. validmind/tests/model_validation/sklearn/CompletenessScore.py +13 -16
  104. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +62 -94
  105. validmind/tests/model_validation/sklearn/FeatureImportance.py +7 -8
  106. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -15
  107. validmind/tests/model_validation/sklearn/HomogeneityScore.py +12 -15
  108. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +23 -53
  109. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +60 -74
  110. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +16 -84
  111. validmind/tests/model_validation/sklearn/MinimumF1Score.py +22 -72
  112. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +29 -78
  113. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +52 -82
  114. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +51 -145
  115. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +60 -78
  116. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +130 -172
  117. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +26 -55
  118. validmind/tests/model_validation/sklearn/ROCCurve.py +43 -77
  119. validmind/tests/model_validation/sklearn/RegressionPerformance.py +41 -94
  120. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +47 -136
  121. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +164 -208
  122. validmind/tests/model_validation/sklearn/SilhouettePlot.py +54 -99
  123. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +50 -124
  124. validmind/tests/model_validation/sklearn/VMeasure.py +12 -15
  125. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +225 -281
  126. validmind/tests/model_validation/statsmodels/AutoARIMA.py +40 -45
  127. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +22 -47
  128. validmind/tests/model_validation/statsmodels/Lilliefors.py +17 -28
  129. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +37 -81
  130. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +37 -105
  131. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +62 -166
  132. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +57 -119
  133. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +20 -57
  134. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +47 -80
  135. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +2 -0
  136. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -2
  137. validmind/tests/output.py +120 -0
  138. validmind/tests/prompt_validation/Bias.py +55 -98
  139. validmind/tests/prompt_validation/Clarity.py +56 -99
  140. validmind/tests/prompt_validation/Conciseness.py +63 -101
  141. validmind/tests/prompt_validation/Delimitation.py +48 -89
  142. validmind/tests/prompt_validation/NegativeInstruction.py +62 -96
  143. validmind/tests/prompt_validation/Robustness.py +80 -121
  144. validmind/tests/prompt_validation/Specificity.py +61 -95
  145. validmind/tests/prompt_validation/ai_powered_test.py +2 -2
  146. validmind/tests/run.py +314 -496
  147. validmind/tests/test_providers.py +109 -79
  148. validmind/tests/utils.py +91 -0
  149. validmind/unit_metrics/__init__.py +16 -155
  150. validmind/unit_metrics/classification/F1.py +1 -0
  151. validmind/unit_metrics/classification/Precision.py +1 -0
  152. validmind/unit_metrics/classification/ROC_AUC.py +1 -0
  153. validmind/unit_metrics/classification/Recall.py +1 -0
  154. validmind/unit_metrics/regression/AdjustedRSquaredScore.py +1 -0
  155. validmind/unit_metrics/regression/GiniCoefficient.py +1 -0
  156. validmind/unit_metrics/regression/HuberLoss.py +1 -0
  157. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -0
  158. validmind/unit_metrics/regression/MeanAbsoluteError.py +1 -0
  159. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -0
  160. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -0
  161. validmind/unit_metrics/regression/MeanSquaredError.py +1 -0
  162. validmind/unit_metrics/regression/QuantileLoss.py +1 -0
  163. validmind/unit_metrics/regression/RSquaredScore.py +2 -1
  164. validmind/unit_metrics/regression/RootMeanSquaredError.py +1 -0
  165. validmind/utils.py +66 -17
  166. validmind/vm_models/__init__.py +2 -17
  167. validmind/vm_models/dataset/dataset.py +31 -4
  168. validmind/vm_models/figure.py +7 -37
  169. validmind/vm_models/model.py +3 -0
  170. validmind/vm_models/result/__init__.py +7 -0
  171. validmind/vm_models/result/result.jinja +21 -0
  172. validmind/vm_models/result/result.py +337 -0
  173. validmind/vm_models/result/utils.py +160 -0
  174. validmind/vm_models/test_suite/runner.py +16 -54
  175. validmind/vm_models/test_suite/summary.py +3 -3
  176. validmind/vm_models/test_suite/test.py +43 -77
  177. validmind/vm_models/test_suite/test_suite.py +8 -40
  178. validmind-2.6.7.dist-info/METADATA +137 -0
  179. {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/RECORD +182 -189
  180. validmind/tests/data_validation/AutoSeasonality.py +0 -190
  181. validmind/tests/metadata.py +0 -59
  182. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -176
  183. validmind/tests/model_validation/ragas/ContextUtilization.py +0 -161
  184. validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -80
  185. validmind/unit_metrics/composite.py +0 -238
  186. validmind/vm_models/test/metric.py +0 -98
  187. validmind/vm_models/test/metric_result.py +0 -61
  188. validmind/vm_models/test/output_template.py +0 -55
  189. validmind/vm_models/test/result_summary.py +0 -76
  190. validmind/vm_models/test/result_wrapper.py +0 -488
  191. validmind/vm_models/test/test.py +0 -103
  192. validmind/vm_models/test/threshold_test.py +0 -106
  193. validmind/vm_models/test/threshold_test_result.py +0 -75
  194. validmind/vm_models/test_context.py +0 -259
  195. validmind-2.5.24.dist-info/METADATA +0 -118
  196. {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/LICENSE +0 -0
  197. {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/WHEEL +0 -0
  198. {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/entry_points.txt +0 -0
@@ -2,197 +2,93 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
6
-
7
5
  import matplotlib.pyplot as plt
8
6
  import numpy as np
9
7
  import pandas as pd
10
8
 
11
- from validmind.vm_models import Figure, Metric
9
+ from validmind import tags, tasks
10
+ from validmind.vm_models import VMDataset, VMModel
11
+
12
+
13
+ def integrate_diff(series_diff, start_value):
14
+ series_diff = np.array(series_diff)
15
+ series_orig = np.cumsum(series_diff)
16
+ series_orig += start_value
12
17
 
18
+ return series_orig
13
19
 
14
- @dataclass
15
- class RegressionModelForecastPlotLevels(Metric):
20
+
21
+ @tags("time_series_data", "forecasting", "visualization")
22
+ @tasks("regression")
23
+ def RegressionModelForecastPlotLevels(
24
+ model: VMModel,
25
+ dataset: VMDataset,
26
+ ):
16
27
  """
17
- Assesses the alignment between forecasted and observed values in regression models through visual plots, including
18
- handling data transformations.
28
+ Assesses the alignment between forecasted and observed values in regression models through visual plots
19
29
 
20
30
  ### Purpose
21
31
 
22
- The `RegressionModelForecastPlotLevels` test aims to visually assess the performance of a series of regression
23
- models by comparing their forecasted values against the actual observed values in both training and test datasets.
24
- This test helps determine the accuracy of the models and can handle specific data transformations before making the
25
- comparison, providing a comprehensive evaluation of model performance.
32
+ This test aims to visually assess the performance of a regression model by comparing its forecasted values against
33
+ the actual observed values for both the raw and transformed (integrated) data. This helps determine the accuracy
34
+ of the model and can help identify overfitting or underfitting. The integration is applied to highlight the trend
35
+ rather than the absolute level.
26
36
 
27
37
  ### Test Mechanism
28
38
 
29
- The test mechanism involves initializing the `RegressionModelForecastPlotLevels` class with an optional
30
- `transformation` parameter. The class then:
39
+ This test generates two plots:
40
+
41
+ - Raw data vs forecast
42
+ - Transformed data vs forecast
31
43
 
32
- - Checks for the presence of model objects and raises a `ValueError` if none are found.
33
- - Processes each model to generate predictive forecasts for both training and testing datasets.
34
- - Contrasts these forecasts with the actual observed values.
35
- - Produces plots to visually compare forecasted and observed values for both raw and transformed datasets.
36
- - Handles specified transformations (e.g., "integrate") by performing cumulative sums to create a new series before
37
- plotting.
44
+ The transformed data is created by performing a cumulative sum on the raw data.
38
45
 
39
46
  ### Signs of High Risk
40
47
 
41
- - Significant deviation between forecasted and observed values in training or testing datasets.
48
+ - Significant deviation between forecasted and observed values.
42
49
  - Patterns suggesting overfitting or underfitting.
43
50
  - Large discrepancies in the plotted forecasts, indicating potential issues with model generalizability and
44
51
  precision.
45
52
 
46
53
  ### Strengths
47
54
 
48
- - **Visual Evaluations**: Provides an intuitive, visual way to assess multiple regression models, aiding in easier
49
- interpretation and evaluation of forecast accuracy.
50
- - **Transformation Handling**: Can process specified data transformations such as "integrate," enhancing
51
- flexibility.
52
- - **Detailed Perspective**: Assesses performance on both training and testing datasets, offering a comprehensive
53
- view of model behavior.
55
+ - Provides an intuitive, visual way to assess multiple regression models, aiding in easier interpretation and
56
+ evaluation of forecast accuracy.
54
57
 
55
58
  ### Limitations
56
59
 
57
- - **Subjectivity**: Relies heavily on visual interpretation, which may vary between individuals.
58
- - **Limited Transformation Capability**: Supports only the "integrate" transformation; other complex
59
- transformations might not be handled.
60
- - **Overhead**: Plotting can be computationally intensive for large datasets, increasing runtime.
61
- - **Numerical Measurement**: Does not provide a numerical metric to quantify forecast accuracy, relying solely on
62
- visual assessment.
60
+ - Relies heavily on visual interpretation, which may vary between individuals.
61
+ - Does not provide a numerical metric to quantify forecast accuracy, relying solely on visual assessment.
63
62
  """
63
+ index = dataset.df.index
64
+
65
+ if not pd.api.types.is_datetime64_any_dtype(index):
66
+ raise ValueError("Test requires a time series dataset")
67
+
68
+ fig, axs = plt.subplots(2, 1)
69
+
70
+ y_pred = dataset.y_pred(model)
71
+
72
+ # raw data vs forecast
73
+ axs[0].plot(index, dataset.y, label="Observed", color="grey")
74
+ axs[0].plot(index, y_pred, label="Forecast")
75
+ axs[0].set_title("Forecast vs Observed")
76
+ axs[0].legend()
77
+
78
+ # transformed data
79
+ dataset_y_transformed = integrate_diff(dataset.y, start_value=dataset.y[0])
80
+ y_pred_transformed = integrate_diff(y_pred, start_value=dataset_y_transformed[0])
81
+
82
+ axs[1].plot(
83
+ index,
84
+ dataset_y_transformed,
85
+ label="Observed",
86
+ color="grey",
87
+ )
88
+ axs[1].plot(index, y_pred_transformed, label="Forecast")
89
+ axs[1].set_title("Integrated Forecast vs Observed")
90
+ axs[1].legend()
91
+
92
+ plt.close()
64
93
 
65
- name = "regression_forecast_plot_levels"
66
- required_inputs = ["models", "datasets"]
67
- default_params = {
68
- "transformation": None,
69
- }
70
- tasks = ["regression"]
71
- tags = ["forecasting", "visualization"]
72
-
73
- def run(self):
74
- transformation = self.params["transformation"]
75
-
76
- if not self.inputs.models:
77
- raise ValueError("List of models must be provided in the models parameter")
78
-
79
- all_models = []
80
- for model in self.inputs.models:
81
- all_models.append(model)
82
-
83
- figures = self._plot_forecast(all_models, self.inputs.datasets, transformation)
84
-
85
- return self.cache_results(figures=figures)
86
-
87
- def integrate_diff(self, series_diff, start_value):
88
- series_diff = np.array(series_diff)
89
- series_orig = np.cumsum(series_diff)
90
- series_orig += start_value
91
- return series_orig
92
-
93
- def _plot_forecast(
94
- self,
95
- model_list,
96
- datasets,
97
- transformation=None,
98
- ):
99
- figures = []
100
-
101
- for i, fitted_model in enumerate(model_list):
102
- feature_columns = datasets[0].feature_columns
103
-
104
- train_ds = datasets[0]
105
- test_ds = datasets[1]
106
-
107
- y_pred = train_ds.y_pred(fitted_model)
108
- y_pred_test = test_ds.y_pred(fitted_model)
109
-
110
- all_dates = pd.concat([pd.Series(train_ds.index), pd.Series(test_ds.index)])
111
-
112
- if all_dates.empty:
113
- raise ValueError(
114
- "No dates in the data. Unable to determine start and end dates."
115
- )
116
-
117
- fig, axs = plt.subplots(2, 2)
118
-
119
- # train vs forecast
120
- axs[0, 0].plot(
121
- train_ds.index, train_ds.y, label="Train Dataset", color="grey"
122
- )
123
- axs[0, 0].plot(train_ds.index, y_pred, label="Train Forecast")
124
- axs[0, 0].set_title(f"Forecast vs Observed for features {feature_columns}")
125
- axs[0, 0].legend()
126
-
127
- # test vs forecast
128
- axs[0, 1].plot(test_ds.index, test_ds.y, label="Test Dataset", color="grey")
129
- axs[0, 1].plot(test_ds.index, y_pred_test, label="Test Forecast")
130
- axs[0, 1].set_title(f"Forecast vs Observed for features {feature_columns}")
131
- axs[0, 1].legend()
132
-
133
- if transformation == "integrate":
134
- train_ds_y_transformed = self.integrate_diff(
135
- train_ds.y_df().values, start_value=train_ds.y[0]
136
- )
137
-
138
- test_ds_y_transformed = self.integrate_diff(
139
- test_ds.y_df().values, start_value=test_ds.y[0]
140
- )
141
-
142
- # Use the first value of the transformed train dataset as the start_value for predicted datasets
143
-
144
- y_pred_transformed = self.integrate_diff(
145
- y_pred, start_value=train_ds_y_transformed[0]
146
- )
147
- y_pred_test_transformed = self.integrate_diff(
148
- y_pred_test, start_value=test_ds_y_transformed[0]
149
- )
150
-
151
- # Create copies of the original datasets and update them to reflect transformed data
152
- train_ds_transformed = train_ds.copy
153
- train_ds_transformed["y"] = train_ds_y_transformed
154
-
155
- test_ds_transformed = test_ds.copy
156
- test_ds_transformed["y"] = test_ds_y_transformed
157
-
158
- # transformed train vs forecast
159
- axs[1, 0].plot(
160
- train_ds.index,
161
- train_ds_y_transformed,
162
- label="Train Dataset",
163
- color="grey",
164
- )
165
-
166
- axs[1, 0].plot(
167
- train_ds.index, y_pred_transformed, label="Train Forecast"
168
- )
169
-
170
- axs[1, 0].set_title(
171
- f"Integrated Forecast vs Observed for features {feature_columns}"
172
- )
173
- axs[1, 0].legend()
174
-
175
- # transformed test vs forecast
176
- axs[1, 1].plot(
177
- test_ds.index,
178
- test_ds_y_transformed,
179
- label="Test Dataset",
180
- color="grey",
181
- )
182
-
183
- axs[1, 1].plot(
184
- test_ds.index, y_pred_test_transformed, label="Test Forecast"
185
- )
186
- axs[1, 1].set_title(
187
- f"Integrated Forecast vs Observed for features {feature_columns}"
188
- )
189
- axs[1, 1].legend()
190
-
191
- figures.append(
192
- Figure(for_object=self, key=f"{self.key}:{i}", figure=fig, metadata={})
193
- )
194
-
195
- # Close the figure to prevent it from displaying
196
- plt.close(fig)
197
-
198
- return figures
94
+ return fig
@@ -2,19 +2,34 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
5
+ from typing import List, Union
6
6
 
7
7
  import matplotlib.pyplot as plt
8
8
  import numpy as np
9
9
 
10
+ from validmind import tags, tasks
10
11
  from validmind.logging import get_logger
11
- from validmind.vm_models import Figure, Metric
12
+ from validmind.vm_models import VMDataset, VMModel
12
13
 
13
14
  logger = get_logger(__name__)
14
15
 
15
16
 
16
- @dataclass
17
- class RegressionModelSensitivityPlot(Metric):
17
+ def integrate_diff(series_diff, start_value):
18
+ series_diff = np.asarray(series_diff, dtype=np.float64) # Convert to float64
19
+ series = np.cumsum(series_diff)
20
+ series += start_value
21
+
22
+ return series
23
+
24
+
25
+ @tags("senstivity_analysis", "visualization")
26
+ @tasks("regression")
27
+ def RegressionModelSensitivityPlot(
28
+ dataset: VMDataset,
29
+ model: VMModel,
30
+ shocks: List[float] = [0.1],
31
+ transformation: Union[str, None] = None,
32
+ ):
18
33
  """
19
34
  Assesses the sensitivity of a regression model to changes in independent variables by applying shocks and
20
35
  visualizing the impact.
@@ -57,120 +72,43 @@ class RegressionModelSensitivityPlot(Metric):
57
72
  - Provides a visual representation without a numerical risk measure, potentially introducing subjectivity in
58
73
  interpretation.
59
74
  """
60
-
61
- name = "regression_sensitivity_plot"
62
- required_inputs = ["models", "datasets"]
63
- default_params = {
64
- "transformation": None,
65
- "shocks": [0.1],
75
+ features_df = dataset.x_df()
76
+ target_df = dataset.y_df()
77
+
78
+ shocked_dfs = {"Baseline": features_df}
79
+ for shock in shocks:
80
+ for col in dataset.feature_columns:
81
+ temp_df = features_df.copy()
82
+ temp_df[col] = temp_df[col] * (1 + shock)
83
+ shocked_dfs[f"Shock of {shock} to {col}"] = temp_df
84
+
85
+ predictions = {
86
+ label: model.predict(shocked_df) for label, shocked_df in shocked_dfs.items()
66
87
  }
67
- tasks = ["regression"]
68
- tags = ["senstivity_analysis", "visualization"]
69
88
 
70
- def run(self):
71
- logger.info(self.params)
72
-
73
- transformation = self.params["transformation"]
74
- shocks = self.params["shocks"]
75
-
76
- if not self.inputs.models:
77
- raise ValueError("List of models must be provided in the models parameter")
78
-
79
- all_models = []
80
- for model in self.inputs.models:
81
- all_models.append(model)
82
-
83
- figures = []
84
- for i, model in enumerate(all_models):
85
- features_df = self.inputs.datasets[1].x_df()
86
- target_df = self.inputs.datasets[1].y_df() # series
87
-
88
- shocked_datasets = self.apply_shock(features_df, shocks)
89
-
90
- predictions = self.predict_shocked_datasets(shocked_datasets, model)
91
-
92
- if transformation == "integrate":
93
- transformed_predictions = []
94
- start_value = self.inputs.datasets[0].y[0]
95
- transformed_target = self.integrate_diff(
96
- self.inputs.datasets[1].y_df().values, start_value
97
- )
98
-
99
- predictions = self.predict_shocked_datasets(shocked_datasets, model)
100
- transformed_predictions = self.transform_predictions(
101
- predictions, start_value
102
- )
103
-
104
- else:
105
- transformed_target = target_df.values
106
- transformed_predictions = predictions
107
-
108
- fig = self._plot_predictions(
109
- target_df.index, transformed_target, transformed_predictions
110
- )
111
- figures.append(
112
- Figure(for_object=self, key=f"{self.key}:{i}", figure=fig, metadata={})
113
- )
114
- return self.cache_results(figures=figures)
115
-
116
- def transform_predictions(self, predictions, start_value):
117
- transformed_predictions = (
118
- {}
119
- ) # Initialize an empty dictionary to store the transformed predictions
120
-
121
- for (
122
- label,
123
- pred,
124
- ) in predictions.items(): # Here, label is the key, pred is the value
125
- transformed_pred = self.integrate_diff(pred, start_value)
126
- transformed_predictions[
127
- label
128
- ] = transformed_pred # Store transformed dataframe in the new dictionary
129
-
130
- return transformed_predictions
131
-
132
- def predict_shocked_datasets(self, shocked_datasets, model):
133
- predictions = {}
134
-
135
- for label, shocked_dataset in shocked_datasets.items():
136
- y_pred = model.predict(shocked_dataset)
137
- predictions[label] = y_pred
138
-
139
- return predictions
140
-
141
- def _plot_predictions(self, index, target, predictions):
142
- fig = plt.figure()
143
-
144
- # Plot the target
145
- plt.plot(index, target, label="Observed")
146
-
147
- # Plot each prediction
148
- for label, pred in predictions.items():
149
- plt.plot(index, pred, label=label)
150
-
151
- plt.legend()
152
-
153
- # Close the figure to prevent it from displaying
154
- plt.close(fig)
155
- return fig
156
-
157
- def integrate_diff(self, series_diff, start_value):
158
- series_diff = np.asarray(series_diff, dtype=np.float64) # Convert to float64
159
- series = np.cumsum(series_diff)
160
- series += start_value
161
- return series
162
-
163
- def apply_shock(self, df, shocks):
164
- shocked_dfs = {"Baseline": df.copy()} # Start with the original dataset
165
- cols_to_shock = df.columns # All columns
166
-
167
- # Apply shock one variable at a time
168
- for shock in shocks:
169
- for col in cols_to_shock:
170
- temp_df = df.copy()
171
- temp_df[col] = temp_df[col] * (1 + shock)
172
- shocked_dfs[
173
- f"Shock of {shock} to {col}"
174
- ] = temp_df # Include shock value in the key
175
-
176
- return shocked_dfs
89
+ if transformation is None:
90
+ transformed_target = target_df.values
91
+ transformed_predictions = predictions
92
+
93
+ elif transformation == "integrate":
94
+ transformed_target = integrate_diff(target_df.values, dataset.y[0])
95
+ transformed_predictions = {
96
+ label: integrate_diff(pred, dataset.y[0])
97
+ for label, pred in predictions.items()
98
+ }
99
+
100
+ else:
101
+ raise ValueError(f"Invalid transformation: {transformation}")
102
+
103
+ fig = plt.figure()
104
+
105
+ plt.plot(target_df.index, transformed_target, label="Observed")
106
+
107
+ for label, pred in transformed_predictions.items():
108
+ plt.plot(target_df.index, pred, label=label)
109
+
110
+ plt.legend()
111
+
112
+ plt.close()
113
+
114
+ return fig
@@ -2,18 +2,17 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
6
-
7
- import pandas as pd
8
5
  from sklearn.metrics import mean_squared_error, r2_score
9
6
 
10
- from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
7
+ from validmind import tags, tasks
8
+ from validmind.vm_models import VMDataset, VMModel
11
9
 
12
10
  from .statsutils import adj_r2_score
13
11
 
14
12
 
15
- @dataclass
16
- class RegressionModelSummary(Metric):
13
+ @tags("model_performance", "regression")
14
+ @tasks("regression")
15
+ def RegressionModelSummary(dataset: VMDataset, model: VMModel):
17
16
  """
18
17
  Evaluates regression model performance using metrics including R-Squared, Adjusted R-Squared, MSE, and RMSE.
19
18
 
@@ -26,11 +25,8 @@ class RegressionModelSummary(Metric):
26
25
 
27
26
  ### Test Mechanism
28
27
 
29
- This test employs the 'train_ds' attribute of the model to gather and analyze the training data. Initially, it
30
- fetches the independent variables and uses the model to make predictions on these given features. Subsequently, it
31
- calculates several standard regression performance metrics including R-Squared, Adjusted R-Squared, Mean Squared
32
- Error (MSE), and Root Mean Squared Error (RMSE), which quantify the approximation of the predicted responses to the
33
- actual responses.
28
+ This test uses the sklearn library to calculate the R-Squared, Adjusted R-Squared, MSE, and RMSE. It outputs a
29
+ table with the results of these metrics along with the feature columns used by the model.
34
30
 
35
31
  ### Signs of High Risk
36
32
 
@@ -45,54 +41,21 @@ class RegressionModelSummary(Metric):
45
41
 
46
42
  ### Limitations
47
43
 
48
- - Applicable exclusively to regression models.
49
44
  - RMSE and MSE might be sensitive to outliers.
50
45
  - A high R-Squared or Adjusted R-Squared may not necessarily indicate a good model, especially in cases of
51
46
  overfitting.
52
47
  """
53
-
54
- name = "regression_model_summary"
55
- required_inputs = ["model", "dataset"]
56
- tasks = ["regression"]
57
- tags = ["model_metadata", "model_comparison"]
58
-
59
- def run(self):
60
- X_columns = self.inputs.dataset.feature_columns
61
-
62
- y_true = self.inputs.dataset.y
63
- y_pred = self.inputs.dataset.y_pred(self.inputs.model)
64
-
65
- r2 = r2_score(y_true, y_pred)
66
- adj_r2 = adj_r2_score(y_true, y_pred, len(y_true), len(X_columns))
67
- mse = mean_squared_error(y_true=y_true, y_pred=y_pred, squared=True)
68
- rmse = mean_squared_error(y_true=y_true, y_pred=y_pred, squared=False)
69
-
70
- results = {
71
- "Independent Variables": X_columns,
72
- "R-Squared": r2,
73
- "Adjusted R-Squared": adj_r2,
74
- "MSE": mse,
75
- "RMSE": rmse,
48
+ y_true = dataset.y
49
+ y_pred = dataset.y_pred(model)
50
+
51
+ return [
52
+ {
53
+ "Independent Variables": dataset.feature_columns,
54
+ "R-Squared": r2_score(y_true, y_pred),
55
+ "Adjusted R-Squared": adj_r2_score(
56
+ y_true, y_pred, len(y_true), len(dataset.feature_columns)
57
+ ),
58
+ "MSE": mean_squared_error(y_true=y_true, y_pred=y_pred, squared=True),
59
+ "RMSE": mean_squared_error(y_true=y_true, y_pred=y_pred, squared=False),
76
60
  }
77
- summary_regression = pd.DataFrame(results)
78
-
79
- return self.cache_results(
80
- {
81
- "regression_analysis": summary_regression.to_dict(orient="records"),
82
- }
83
- )
84
-
85
- def summary(self, metric_value):
86
- """
87
- Build one table for summarizing the regression analysis results
88
- """
89
- summary_regression = metric_value["regression_analysis"]
90
-
91
- return ResultSummary(
92
- results=[
93
- ResultTable(
94
- data=summary_regression,
95
- metadata=ResultTableMetadata(title="Regression Analysis Results"),
96
- ),
97
- ]
98
- )
61
+ ]