validmind 2.2.6__py3-none-any.whl → 2.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. validmind/__init__.py +2 -1
  2. validmind/__version__.py +1 -1
  3. validmind/{ai.py → ai/test_descriptions.py} +74 -82
  4. validmind/ai/utils.py +104 -0
  5. validmind/api_client.py +58 -19
  6. validmind/client.py +5 -5
  7. validmind/models/foundation.py +10 -6
  8. validmind/models/function.py +3 -1
  9. validmind/models/metadata.py +1 -1
  10. validmind/test_suites/__init__.py +1 -9
  11. validmind/test_suites/regression.py +0 -16
  12. validmind/test_suites/statsmodels_timeseries.py +1 -1
  13. validmind/tests/__init__.py +7 -7
  14. validmind/tests/__types__.py +170 -0
  15. validmind/tests/data_validation/ACFandPACFPlot.py +36 -27
  16. validmind/tests/{model_validation/statsmodels → data_validation}/ADF.py +42 -13
  17. validmind/tests/data_validation/BivariateScatterPlots.py +38 -41
  18. validmind/tests/{model_validation/statsmodels → data_validation}/DFGLSArch.py +67 -11
  19. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +1 -1
  20. validmind/tests/data_validation/HighPearsonCorrelation.py +12 -3
  21. validmind/tests/data_validation/IsolationForestOutliers.py +2 -2
  22. validmind/tests/{model_validation/statsmodels → data_validation}/KPSS.py +64 -11
  23. validmind/tests/{model_validation/statsmodels → data_validation}/PhillipsPerronArch.py +65 -11
  24. validmind/tests/data_validation/ScatterPlot.py +1 -1
  25. validmind/tests/data_validation/SeasonalDecompose.py +12 -7
  26. validmind/tests/data_validation/TabularDateTimeHistograms.py +29 -33
  27. validmind/tests/data_validation/WOEBinPlots.py +1 -1
  28. validmind/tests/data_validation/WOEBinTable.py +1 -1
  29. validmind/tests/{model_validation/statsmodels → data_validation}/ZivotAndrewsArch.py +65 -11
  30. validmind/tests/data_validation/nlp/CommonWords.py +1 -1
  31. validmind/tests/data_validation/nlp/Hashtags.py +1 -1
  32. validmind/tests/data_validation/nlp/Mentions.py +1 -1
  33. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +2 -1
  34. validmind/tests/data_validation/nlp/Punctuations.py +1 -1
  35. validmind/tests/data_validation/nlp/Sentiment.py +1 -1
  36. validmind/tests/data_validation/nlp/TextDescription.py +5 -1
  37. validmind/tests/data_validation/nlp/Toxicity.py +1 -1
  38. validmind/tests/decorator.py +13 -1
  39. validmind/tests/model_validation/FeaturesAUC.py +5 -3
  40. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +4 -0
  41. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +4 -0
  42. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +4 -0
  43. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +4 -0
  44. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +4 -0
  45. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +4 -0
  46. validmind/tests/model_validation/ragas/AnswerCorrectness.py +3 -3
  47. validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
  48. validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
  49. validmind/tests/model_validation/ragas/AspectCritique.py +14 -8
  50. validmind/tests/model_validation/ragas/ContextEntityRecall.py +3 -4
  51. validmind/tests/model_validation/ragas/ContextPrecision.py +4 -5
  52. validmind/tests/model_validation/ragas/ContextRecall.py +3 -4
  53. validmind/tests/model_validation/ragas/ContextRelevancy.py +5 -4
  54. validmind/tests/model_validation/ragas/Faithfulness.py +6 -5
  55. validmind/tests/model_validation/ragas/utils.py +35 -9
  56. validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
  57. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +1 -1
  58. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +6 -8
  59. validmind/tests/model_validation/sklearn/RegressionErrors.py +1 -1
  60. validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +14 -8
  61. validmind/tests/model_validation/sklearn/RegressionR2Square.py +1 -1
  62. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +1 -1
  63. validmind/tests/model_validation/statsmodels/GINITable.py +1 -1
  64. validmind/tests/model_validation/statsmodels/JarqueBera.py +1 -1
  65. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +1 -1
  66. validmind/tests/model_validation/statsmodels/LJungBox.py +1 -1
  67. validmind/tests/model_validation/statsmodels/Lilliefors.py +1 -1
  68. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +4 -0
  69. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +9 -4
  70. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +2 -2
  71. validmind/tests/model_validation/statsmodels/RunsTest.py +1 -1
  72. validmind/tests/model_validation/statsmodels/ShapiroWilk.py +1 -1
  73. validmind/tests/prompt_validation/Bias.py +14 -11
  74. validmind/tests/prompt_validation/Clarity.py +14 -11
  75. validmind/tests/prompt_validation/Conciseness.py +14 -11
  76. validmind/tests/prompt_validation/Delimitation.py +14 -11
  77. validmind/tests/prompt_validation/NegativeInstruction.py +14 -11
  78. validmind/tests/prompt_validation/Robustness.py +11 -11
  79. validmind/tests/prompt_validation/Specificity.py +14 -11
  80. validmind/tests/prompt_validation/ai_powered_test.py +53 -75
  81. validmind/unit_metrics/composite.py +2 -1
  82. validmind/utils.py +34 -59
  83. validmind/vm_models/dataset/dataset.py +17 -3
  84. validmind/vm_models/dataset/utils.py +2 -2
  85. validmind/vm_models/model.py +1 -1
  86. validmind/vm_models/test/metric.py +1 -8
  87. validmind/vm_models/test/result_wrapper.py +2 -2
  88. validmind/vm_models/test/test.py +3 -0
  89. validmind/vm_models/test/threshold_test.py +1 -1
  90. validmind/vm_models/test_suite/runner.py +7 -4
  91. {validmind-2.2.6.dist-info → validmind-2.3.3.dist-info}/METADATA +1 -1
  92. {validmind-2.2.6.dist-info → validmind-2.3.3.dist-info}/RECORD +95 -103
  93. validmind/tests/data_validation/DefaultRatesbyRiskBandPlot.py +0 -114
  94. validmind/tests/data_validation/PiTCreditScoresHistogram.py +0 -150
  95. validmind/tests/data_validation/PiTPDHistogram.py +0 -152
  96. validmind/tests/model_validation/statsmodels/ADFTest.py +0 -88
  97. validmind/tests/model_validation/statsmodels/FeatureImportanceAndSignificance.py +0 -198
  98. validmind/tests/model_validation/statsmodels/PDRatingClassPlot.py +0 -151
  99. validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +0 -146
  100. validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +0 -144
  101. validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +0 -127
  102. validmind/tests/model_validation/statsmodels/ResidualsVisualInspection.py +0 -130
  103. {validmind-2.2.6.dist-info → validmind-2.3.3.dist-info}/LICENSE +0 -0
  104. {validmind-2.2.6.dist-info → validmind-2.3.3.dist-info}/WHEEL +0 -0
  105. {validmind-2.2.6.dist-info → validmind-2.3.3.dist-info}/entry_points.txt +0 -0
@@ -1,146 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- import pandas as pd
8
- from sklearn.metrics import mean_squared_error, r2_score
9
-
10
- from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
11
-
12
- from .statsutils import adj_r2_score
13
-
14
-
15
- @dataclass
16
- class RegressionModelInsampleComparison(Metric):
17
- """
18
- Evaluates and compares in-sample performance of multiple regression models using R-Squared, Adjusted R-Squared,
19
- MSE, and RMSE.
20
-
21
- **Purpose**: The RegressionModelInsampleComparison test metric is utilized to evaluate and compare the performance
22
- of multiple regression models trained on the same dataset. Key performance indicators for this comparison include
23
- statistics related to the goodness of fit - R-Squared, Adjusted R-Squared, Mean Squared Error (MSE), and Root Mean
24
- Squared Error (RMSE).
25
-
26
- **Test Mechanism**: The methodology behind this test is as follows -
27
- - Firstly, a verification that the list of models to be tested is indeed not empty occurs.
28
- - Once confirmed, the In-Sample performance of the models is calculated by a private function,
29
- `_in_sample_performance_ols`, that executes the following steps:
30
- - Iterates through each model in the supplied list.
31
- - For each model, the function extracts the features (`X`) and the target (`y_true`) from the training dataset
32
- and computes the predicted target values (`y_pred`).
33
- - The performance metrics for the model are calculated using formulas for R-Squared, Adjusted R-Squared, MSE, and
34
- RMSE.
35
- - The results, including the computed metrics, variables of the model, and the model's identifier, are stored in
36
- a dictionary that is appended to a list.
37
- - The collected results are finally returned as a pandas dataframe.
38
-
39
- **Signs of High Risk**:
40
- - Significantly low values for R-Squared or Adjusted R-Squared.
41
- - Significantly high values for MSE and RMSE.
42
- Please note that what constitutes as "low" or "high" will vary based on the specific context or domain in which the
43
- model is being utilized.
44
-
45
- **Strengths**:
46
- - Enables comparison of in-sample performance across different models on the same dataset, providing insights into
47
- which model fits the data the best.
48
- - Utilizes multiple evaluation methods (R-Squared, Adjusted R-Squared, MSE, RMSE), offering a comprehensive review
49
- of a model's performance.
50
-
51
- **Limitations**:
52
- - The test measures only in-sample performance, i.e., how well a model fits the data it was trained on. However, it
53
- does not give any information on the performance of the model on new, unseen, or out-of-sample data.
54
- - Higher in-sample performance might be a result of overfitting, where the model is just memorizing the training
55
- data. This test is sensitive to such cases.
56
- - The test does not consider additional key factors such as the temporal dynamics of the data, that is, the pattern
57
- of changes in data over time.
58
- - The test does not provide an automated mechanism to determine if the reported metrics are within acceptable
59
- ranges, necessitating human judgment.
60
- """
61
-
62
- name = "regression_insample_performance"
63
- required_inputs = ["model", "dataset"]
64
- metadata = {
65
- "task_types": ["regression"],
66
- "tags": ["model_comparison"],
67
- }
68
-
69
- def run(self):
70
- # Check models list is not empty
71
- if not self.inputs.models:
72
- raise ValueError("List of models must be provided in the models parameter")
73
- all_models = []
74
-
75
- if self.inputs.models is not None:
76
- all_models.extend(self.inputs.models)
77
-
78
- in_sample_performance = self._in_sample_performance_ols(
79
- all_models, self.inputs.dataset
80
- )
81
- in_sample_performance_df = pd.DataFrame(in_sample_performance)
82
-
83
- return self.cache_results(
84
- {
85
- "in_sample_performance": in_sample_performance_df.to_dict(
86
- orient="records"
87
- ),
88
- }
89
- )
90
-
91
- def _in_sample_performance_ols(self, models, dataset):
92
- """
93
- Computes the in-sample performance evaluation metrics for a list of OLS models.
94
- Args:
95
- models (list): A list of statsmodels OLS models.
96
- Returns:
97
- list: A list of dictionaries containing the evaluation results for each model.
98
- Each dictionary contains the following keys:
99
- - 'Model': A string identifying the model.
100
- - 'Independent Variables': A list of strings identifying the independent variables used in the model.
101
- - 'R-Squared': The R-squared value of the model.
102
- - 'Adjusted R-Squared': The adjusted R-squared value of the model.
103
- - 'MSE': The mean squared error of the model.
104
- - 'RMSE': The root mean squared error of the model.
105
- """
106
- evaluation_results = []
107
-
108
- for i, model in enumerate(models):
109
- X_columns = dataset.feature_columns
110
- y_true = dataset.y
111
- y_pred = dataset.y_pred(model)
112
-
113
- # Extract R-squared and Adjusted R-squared
114
- r2 = r2_score(y_true, y_pred)
115
- adj_r2 = adj_r2_score(y_true, y_pred, len(y_true), len(X_columns))
116
- mse = mean_squared_error(y_true=y_true, y_pred=y_pred, squared=True)
117
- rmse = mean_squared_error(y_true=y_true, y_pred=y_pred, squared=False)
118
-
119
- # Append the results to the evaluation_results list
120
- evaluation_results.append(
121
- {
122
- "Model": f"Model {i + 1}",
123
- "Independent Variables": X_columns,
124
- "R-Squared": r2,
125
- "Adjusted R-Squared": adj_r2,
126
- "MSE": mse,
127
- "RMSE": rmse,
128
- }
129
- )
130
-
131
- return evaluation_results
132
-
133
- def summary(self, metric_value):
134
- """
135
- Build one table for summarizing the in-sample performance results
136
- """
137
- summary_in_sample_performance = metric_value["in_sample_performance"]
138
-
139
- return ResultSummary(
140
- results=[
141
- ResultTable(
142
- data=summary_in_sample_performance,
143
- metadata=ResultTableMetadata(title="In-Sample Performance Results"),
144
- ),
145
- ]
146
- )
@@ -1,144 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- import numpy as np
8
- import pandas as pd
9
-
10
- from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
11
-
12
-
13
- @dataclass
14
- class RegressionModelOutsampleComparison(Metric):
15
- """
16
- Computes MSE and RMSE for multiple regression models using out-of-sample test to assess model's prediction accuracy
17
- on unseen data.
18
-
19
- **Purpose**: The RegressionModelOutsampleComparison test is designed to evaluate the predictive performance of
20
- multiple regression models by means of an out-of-sample test. The primary aim of this test is to validate the
21
- model's ability to generalize to unseen data, a common challenge in the context of overfitting. It does this by
22
- computing two critical metrics — Mean Squared Error (MSE) and Root Mean Squared Error (RMSE), which provide a
23
- quantifiable measure of the model's prediction accuracy on the testing dataset.
24
-
25
- **Test Mechanism**: This test requires multiple models (specifically Ordinary Least Squares - OLS regression
26
- models) and a test dataset as inputs. Each model generates predictions using the test dataset. The residuals are
27
- then calculated and used to compute the MSE and RMSE for each model. The test outcomes, which include the model's
28
- name, its MSE, and RMSE, are recorded and returned in a structured dataframe format.
29
-
30
- **Signs of High Risk**:
31
- - High values of MSE or RMSE indicate significant risk, signifying that the model's predictions considerably
32
- deviate from the actual values in the test dataset.
33
- - Consistently large discrepancies between training and testing performance across various models may indicate an
34
- issue with the input data itself or the model selection strategies employed.
35
-
36
- **Strengths**:
37
- - This test offers a comparative evaluation of multiple models' out-of-sample performance, enabling the selection
38
- of the best performing model.
39
- - The use of both MSE and RMSE provides insights into the model's prediction error. While MSE is sensitive to
40
- outliers, emphasizing larger errors, RMSE provides a more interpretable measure of average prediction error given
41
- that it's in the same unit as the dependent variable.
42
-
43
- **Limitations**:
44
- - The applicability of this test is limited to regression tasks, specifically OLS models.
45
- - The test operates under the assumption that the test dataset is a representative sample of the population. This
46
- might not always hold true and can result in less accurate insights.
47
- - The interpretability and the objectivity of the output (MSE and RMSE) can be influenced when the scale of the
48
- dependent variable varies significantly, or the distribution of residuals is heavily skewed or contains outliers.
49
- """
50
-
51
- name = "regression_outsample_performance"
52
- required_inputs = ["model", "dataset"]
53
- metadata = {
54
- "task_types": ["regression"],
55
- "tags": ["model_comparison"],
56
- }
57
-
58
- def run(self):
59
- # Check models list is not empty
60
- if not self.inputs.models:
61
- raise ValueError("List of models must be provided in the models parameter")
62
- all_models = []
63
- if self.inputs.model is not None:
64
- all_models.append(self.inputs.model)
65
-
66
- if self.inputs.models is not None:
67
- all_models.extend(self.inputs.models)
68
-
69
- for model in all_models:
70
- if model.test_ds is None:
71
- raise ValueError(
72
- "Test dataset is missing in the ValidMind Model object"
73
- )
74
-
75
- results = self._out_sample_performance_ols(all_models, self.inputs.dataset)
76
- return self.cache_results(
77
- {
78
- "out_sample_performance": results.to_dict(orient="records"),
79
- }
80
- )
81
-
82
- def _out_sample_performance_ols(self, model_list, dataset):
83
- """
84
- Returns the out-of-sample performance evaluation metrics of a list of OLS regression models.
85
- Args:
86
- model_list (list): A list of OLS models to evaluate.
87
- test_data (pandas.DataFrame): The test dataset containing the independent and dependent variables.
88
- target_col (str): The name of the target variable column in the test dataset.
89
- Returns:
90
- pandas.DataFrame: A DataFrame containing the evaluation results of the OLS models. The columns are 'Model',
91
- 'MSE' (Mean Squared Error), and 'RMSE' (Root Mean Squared Error).
92
- """
93
-
94
- # Initialize a list to store results
95
- results = []
96
-
97
- for fitted_model in model_list:
98
- # Extract the column names of the independent variables from the model
99
- independent_vars = dataset.feature_columns
100
-
101
- # Separate the target variable and features in the test dataset
102
- y_test = dataset.y
103
-
104
- # Predict the test data
105
- y_pred = dataset.y_pred(fitted_model)
106
-
107
- # Calculate the residuals
108
- residuals = y_test - y_pred
109
-
110
- # Calculate the mean squared error and root mean squared error
111
- mse = np.mean(residuals**2)
112
- rmse_val = np.sqrt(mse)
113
-
114
- # Store the results
115
- model_name_with_vars = f"({', '.join(independent_vars)})"
116
- results.append(
117
- {
118
- "Model": model_name_with_vars,
119
- "MSE": mse,
120
- "RMSE": rmse_val,
121
- }
122
- )
123
-
124
- # Create a DataFrame to display the results
125
- results_df = pd.DataFrame(results)
126
-
127
- return results_df
128
-
129
- def summary(self, metric_value):
130
- """
131
- Build one table for summarizing the out-of-sample performance results
132
- """
133
- summary_out_sample_performance = metric_value["out_sample_performance"]
134
-
135
- return ResultSummary(
136
- results=[
137
- ResultTable(
138
- data=summary_out_sample_performance,
139
- metadata=ResultTableMetadata(
140
- title="Out-of-Sample Performance Results"
141
- ),
142
- ),
143
- ]
144
- )
@@ -1,127 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- from sklearn.metrics import mean_squared_error, r2_score
8
-
9
- from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
10
-
11
-
12
- @dataclass
13
- class RegressionModelsPerformance(Metric):
14
- """
15
- Evaluates and compares regression models' performance using R-squared, Adjusted R-squared, and MSE metrics.
16
-
17
- **Purpose**: This metric is used to evaluate and compare the performance of various regression models. Through the
18
- use of key statistical measures such as R-squared, Adjusted R-squared, and Mean Squared Error (MSE), the
19
- performance of different models in predicting dependent variables can be assessed both on the data used for
20
- training (in-sample) and new, unseen data (out-of-sample).
21
-
22
- **Test Mechanism**: The test evaluates a list of provided regression models. For each model, it calculates their
23
- in-sample and out-of-sample performance by deriving the model predictions for the training and testing datasets
24
- respectively, and then comparing these predictions to the actual values. In doing so, it calculates R-squared,
25
- Adjusted R-squared, and MSE for each model, stores the results, and returns them for comparison.
26
-
27
- **Signs of High Risk**:
28
- - High Mean Squared Error (MSE) values.
29
- - Strikingly low values of R-squared and Adjusted R-squared.
30
- - A significant drop in performance when transitioning from in-sample to out-of-sample evaluations, signaling a
31
- potential overfitting issue.
32
-
33
- **Strengths**:
34
- - The test permits comparisons of multiple models simultaneously, providing an objective base for identifying the
35
- top-performing model.
36
- - It delivers both in-sample and out-of-sample evaluations, presenting performance data on unseen data.
37
- - The utilization of R-squared and Adjusted R-squared in conjunction with MSE allows for a detailed view of the
38
- model's explainability and error rate.
39
-
40
- **Limitations**:
41
- - This test is built around the assumption that the residuals of the regression model are normally distributed,
42
- which is a fundamental requirement for Ordinary Least Squares (OLS) regression; thus, it could be not suitable for
43
- models where this assumption is broken.
44
- - The test does not consider cases where higher R-squared or lower MSE values do not necessarily correlate with
45
- better predictive performance, particularly in instances of excessively complex models.
46
- """
47
-
48
- name = "regression_models_performance"
49
- required_inputs = ["models", "in_sample_datasets", "out_of_sample_datasets"]
50
- metadata = {
51
- "task_types": ["regression"],
52
- "tags": ["model_performance", "model_comparison"],
53
- }
54
-
55
- def run(self):
56
- # Check models list is not empty
57
- if not self.inputs.models:
58
- raise ValueError("List of models must be provided in the models parameter")
59
-
60
- all_models = []
61
-
62
- if self.inputs.models is not None:
63
- all_models.extend(self.inputs.models)
64
-
65
- in_sample_results = self.sample_performance_ols(
66
- self.inputs.models, self.inputs.in_sample_datasets
67
- )
68
- out_of_sample_results = self.sample_performance_ols(
69
- self.inputs.models, self.inputs.out_of_sample_datasets
70
- )
71
-
72
- return self.cache_results(
73
- {
74
- "in_sample_performance": in_sample_results,
75
- "out_of_sample_performance": out_of_sample_results,
76
- }
77
- )
78
-
79
- def sample_performance_ols(self, models, datasets):
80
- evaluation_results = []
81
-
82
- for model, dataset in zip(models, datasets):
83
- X_columns = dataset.feature_columns
84
- y_true = dataset.y
85
- y_pred = dataset.y_pred(model)
86
-
87
- # Extract R-squared and Adjusted R-squared
88
- r2 = r2_score(y_true, y_pred)
89
- mse = mean_squared_error(y_true, y_pred)
90
- adj_r2 = 1 - ((1 - r2) * (len(y_true) - 1)) / (
91
- len(y_true) - len(X_columns) - 1
92
- )
93
-
94
- # Append the results to the evaluation_results list
95
- evaluation_results.append(
96
- {
97
- "Model": model.input_id,
98
- "Independent Variables": X_columns,
99
- "R-Squared": r2,
100
- "Adjusted R-Squared": adj_r2,
101
- "MSE": mse,
102
- }
103
- )
104
-
105
- return evaluation_results
106
-
107
- def summary(self, metric_value):
108
- """
109
- Build a table for summarizing the in-sample and out-of-sample performance results
110
- """
111
- summary_in_sample_performance = metric_value["in_sample_performance"]
112
- summary_out_of_sample_performance = metric_value["out_of_sample_performance"]
113
-
114
- return ResultSummary(
115
- results=[
116
- ResultTable(
117
- data=summary_in_sample_performance,
118
- metadata=ResultTableMetadata(title="In-Sample Performance Results"),
119
- ),
120
- ResultTable(
121
- data=summary_out_of_sample_performance,
122
- metadata=ResultTableMetadata(
123
- title="Out-of-Sample Performance Results"
124
- ),
125
- ),
126
- ]
127
- )
@@ -1,130 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- import matplotlib.pyplot as plt
8
- import pandas as pd
9
- import seaborn as sns
10
- from scipy import stats
11
- from statsmodels.graphics.tsaplots import plot_acf
12
- from statsmodels.tsa.seasonal import seasonal_decompose
13
-
14
- from validmind.vm_models import Figure, Metric
15
-
16
-
17
- @dataclass
18
- class ResidualsVisualInspection(Metric):
19
- """
20
- Provides a comprehensive visual analysis of residuals for regression models utilizing various plot types.
21
-
22
- **Purpose**: The main purpose of this metric is to visualize and analyze the residuals (the differences between the
23
- observed and predicted values) of a regression problem. It allows for a graphical exploration of the model's
24
- errors, helping to identify statistical patterns or anomalies that may indicate a systematic bias in the model's
25
- predictions. By inspecting the residuals, we can check how well the model fits the data and meets the assumptions
26
- of the model.
27
-
28
- **Test Mechanism**: The metric generates four common types of residual plots which are: a histogram with kernel
29
- density estimation, a quantile-quantile (Q-Q) plot, a residuals series dot plot, and an autocorrelation function
30
- (ACF) plot.
31
-
32
- - The residuals histogram with kernel density estimation visualizes the distribution of residuals and allows to
33
- check if they are normally distributed.
34
- - Q-Q plot compares the observed quantiles of the data to the quantiles of a standard normal distribution, helping
35
- to assess the normality of residuals.
36
- - A residuals dot plot indicates the variation in residuals over time, which helps in identifying any time-related
37
- pattern in residuals.
38
- - ACF plot visualizes the correlation of an observation with its previous observations, helping to pinpoint any
39
- seasonality effect within residuals.
40
-
41
- **Signs of High Risk**:
42
-
43
- - Skewness or asymmetry in the histogram or a significant deviation from the straight line in the Q-Q plot, which
44
- indicates that the residuals aren't normally distributed.
45
- - Large spikes in the ACF plot, indicating that the residuals are correlated, in violation of the assumption that
46
- they are independent.
47
- - Non-random patterns in the dot plot of residuals, indicating potential model misspecification.
48
-
49
- **Strengths**:
50
-
51
- - Visual analysis of residuals is a powerful yet simple way to understand a model’s behavior across the data set
52
- and to identify problems with the model's assumptions or its fit to the data.
53
- - The test is applicable to any regression model, irrespective of complexity.
54
- - By exploring residuals, we might uncover relationships that were not captured by the model, revealing
55
- opportunities for model improvement.
56
-
57
- **Limitations**:
58
-
59
- - Visual tests are largely subjective and can be open to interpretation. Clear-cut decisions about the model based
60
- solely on these plots may not be possible.
61
- - The metrics from the test do not directly infer the action based on the results; domain-specific knowledge and
62
- expert judgement is often required to interpret the results.
63
- - These plots can indicate a problem with the model but they do not necessarily reveal the nature or cause of the
64
- problem.
65
- - The test assumes that the error terms are identically distributed, which might not always be the case in
66
- real-world scenarios.
67
- """
68
-
69
- name = "residuals_visual_inspection"
70
- required_inputs = ["dataset"]
71
- metadata = {
72
- "task_types": ["regression"],
73
- "tags": ["statsmodels", "visualization"],
74
- }
75
-
76
- @staticmethod
77
- def residual_analysis(residuals, variable_name, axes):
78
- residuals = residuals.dropna().reset_index(
79
- drop=True
80
- ) # drop NaN values and reset index
81
-
82
- # QQ plot
83
- stats.probplot(residuals, dist="norm", plot=axes[0, 1])
84
- axes[0, 1].set_title(f"Residuals Q-Q Plot ({variable_name})")
85
-
86
- # Histogram with KDE
87
- sns.histplot(residuals, kde=True, ax=axes[0, 0])
88
- axes[0, 0].set_xlabel("Residuals")
89
- axes[0, 0].set_title(f"Residuals Histogram ({variable_name})")
90
-
91
- # Residual series dot plot
92
- sns.lineplot(data=residuals, linewidth=0.5, color="red", ax=axes[1, 0])
93
- axes[1, 0].set_title(f"Residual Series Dot Plot ({variable_name})")
94
-
95
- # ACF plot
96
- n_lags = min(100, len(residuals) - 1) # Adjust the number of lags
97
- plot_acf(residuals, ax=axes[1, 1], lags=n_lags, zero=False) # Added zero=False
98
- axes[1, 1].set_title(f"ACF Plot of Residuals ({variable_name})")
99
-
100
- def run(self):
101
- x_train = self.inputs.dataset.df
102
- figures = []
103
-
104
- # TODO: specify which columns to plot via params
105
- for col in x_train.columns:
106
- sd = seasonal_decompose(x_train[col], model="additive")
107
-
108
- # Remove NaN values from the residuals and reset the index
109
- residuals = pd.Series(sd.resid).dropna().reset_index(drop=True)
110
-
111
- # Create subplots
112
- fig, axes = plt.subplots(nrows=2, ncols=2)
113
- fig.suptitle(f"Residuals Inspection for {col}", fontsize=24)
114
-
115
- self.residual_analysis(residuals, col, axes)
116
-
117
- # Adjust the layout
118
- plt.tight_layout()
119
-
120
- # Do this if you want to prevent the figure from being displayed
121
- plt.close("all")
122
-
123
- figures.append(
124
- Figure(
125
- for_object=self,
126
- key=self.key,
127
- figure=fig,
128
- )
129
- )
130
- return self.cache_results(figures=figures)