validmind 2.3.1__py3-none-any.whl → 2.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. validmind/__init__.py +2 -1
  2. validmind/__version__.py +1 -1
  3. validmind/datasets/regression/fred_timeseries.py +272 -0
  4. validmind/test_suites/__init__.py +0 -2
  5. validmind/tests/__init__.py +7 -7
  6. validmind/tests/__types__.py +180 -0
  7. validmind/tests/data_validation/SeasonalDecompose.py +68 -40
  8. validmind/tests/data_validation/TimeSeriesDescription.py +74 -0
  9. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +76 -0
  10. validmind/tests/data_validation/TimeSeriesHistogram.py +29 -45
  11. validmind/tests/data_validation/TimeSeriesOutliers.py +30 -41
  12. validmind/tests/decorator.py +12 -0
  13. validmind/tests/model_validation/ModelMetadataComparison.py +59 -0
  14. validmind/tests/model_validation/ModelPredictionResiduals.py +103 -0
  15. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +131 -0
  16. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +76 -0
  17. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +103 -0
  18. validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +83 -0
  19. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +1 -1
  20. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +76 -0
  21. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +63 -0
  22. validmind/utils.py +34 -0
  23. {validmind-2.3.1.dist-info → validmind-2.3.5.dist-info}/METADATA +70 -36
  24. {validmind-2.3.1.dist-info → validmind-2.3.5.dist-info}/RECORD +28 -16
  25. /validmind/datasets/regression/datasets/{lending_club_loan_rates.csv → leanding_club_loan_rates.csv} +0 -0
  26. {validmind-2.3.1.dist-info → validmind-2.3.5.dist-info}/LICENSE +0 -0
  27. {validmind-2.3.1.dist-info → validmind-2.3.5.dist-info}/WHEEL +0 -0
  28. {validmind-2.3.1.dist-info → validmind-2.3.5.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,103 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import pandas as pd
6
+ import plotly.graph_objects as go
7
+ from scipy.stats import kstest
8
+
9
+ from validmind import tags, tasks
10
+
11
+
12
+ @tags("regression")
13
+ @tasks("residual_analysis", "visualization")
14
+ def ModelPredictionResiduals(
15
+ datasets, models, nbins=100, p_value_threshold=0.05, start_date=None, end_date=None
16
+ ):
17
+ """
18
+ Plot the residuals and histograms for each model, and generate a summary table
19
+ with the Kolmogorov-Smirnov normality test results.
20
+
21
+ **Purpose**: The purpose of this function is to visualize the residuals of model predictions and
22
+ assess the normality of residuals using the Kolmogorov-Smirnov test.
23
+
24
+ **Test Mechanism**: The function iterates through each dataset-model pair, calculates residuals, and generates
25
+ two figures for each model: one for the time series of residuals and one for the histogram of residuals.
26
+ It also calculates the KS test for normality and summarizes the results in a table.
27
+
28
+ **Signs of High Risk**:
29
+ - If the residuals are not normally distributed, it could indicate issues with model assumptions.
30
+ - High skewness or kurtosis in the residuals may indicate model misspecification.
31
+
32
+ **Strengths**:
33
+ - Provides a clear visualization of residuals over time and their distribution.
34
+ - Includes statistical tests to assess the normality of residuals.
35
+
36
+ **Limitations**:
37
+ - Assumes that the dataset is provided as a DataFrameDataset object with a .df attribute to access
38
+ the pandas DataFrame.
39
+ - Only generates plots for datasets with a datetime index, and will raise an error for other types of indices.
40
+ """
41
+
42
+ figures = []
43
+ summary = []
44
+
45
+ for dataset, model in zip(datasets, models):
46
+ df = dataset.df.copy()
47
+
48
+ # Filter DataFrame by date range if specified
49
+ if start_date:
50
+ df = df[df.index >= pd.to_datetime(start_date)]
51
+ if end_date:
52
+ df = df[df.index <= pd.to_datetime(end_date)]
53
+
54
+ y_true = dataset.y
55
+ y_pred = dataset.y_pred(model)
56
+ residuals = y_true - y_pred
57
+
58
+ # Plot residuals
59
+ residuals_fig = go.Figure()
60
+ residuals_fig.add_trace(
61
+ go.Scatter(x=df.index, y=residuals, mode="lines", name="Residuals")
62
+ )
63
+ residuals_fig.update_layout(
64
+ title=f"Residuals for {model.input_id}",
65
+ xaxis_title="Date",
66
+ yaxis_title="Residuals",
67
+ font=dict(size=16),
68
+ showlegend=False,
69
+ )
70
+ figures.append(residuals_fig)
71
+
72
+ # Plot histogram of residuals
73
+ hist_fig = go.Figure()
74
+ hist_fig.add_trace(go.Histogram(x=residuals, nbinsx=nbins, name="Residuals"))
75
+ hist_fig.update_layout(
76
+ title=f"Histogram of Residuals for {model.input_id}",
77
+ xaxis_title="Residuals",
78
+ yaxis_title="Frequency",
79
+ font=dict(size=16),
80
+ showlegend=False,
81
+ )
82
+ figures.append(hist_fig)
83
+
84
+ # Perform KS normality test
85
+ ks_stat, p_value = kstest(
86
+ residuals, "norm", args=(residuals.mean(), residuals.std())
87
+ )
88
+ ks_normality = "Normal" if p_value > p_value_threshold else "Not Normal"
89
+
90
+ summary.append(
91
+ {
92
+ "Model": model.input_id,
93
+ "KS Statistic": ks_stat,
94
+ "p-value": p_value,
95
+ "KS Normality": ks_normality,
96
+ "p-value Threshold": p_value_threshold,
97
+ }
98
+ )
99
+
100
+ # Create a summary DataFrame for the KS normality test results
101
+ summary_df = pd.DataFrame(summary)
102
+
103
+ return (summary_df, *figures)
@@ -0,0 +1,131 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ import plotly.graph_objects as go
8
+ from scipy.stats import norm
9
+
10
+ from validmind import tags, tasks
11
+
12
+
13
+ @tags("model_predictions", "visualization")
14
+ @tasks("regression", "time_series_forecasting")
15
+ def TimeSeriesPredictionWithCI(dataset, model, confidence=0.95):
16
+ """
17
+ Plot actual vs predicted values for a time series with confidence intervals and compute breaches.
18
+
19
+ **Purpose**: The purpose of this function is to visualize the actual versus predicted values for time series data, including confidence intervals, and to compute and report the number of breaches beyond these intervals.
20
+
21
+ **Test Mechanism**: The function calculates the standard deviation of prediction errors, determines the confidence intervals, and counts the number of actual values that fall outside these intervals (breaches). It then generates a plot with the actual values, predicted values, and confidence intervals, and returns a DataFrame summarizing the breach information.
22
+
23
+ **Signs of High Risk**:
24
+ - A high number of breaches indicates that the model's predictions are not reliable within the specified confidence level.
25
+ - Significant deviations between actual and predicted values may highlight model inadequacies or issues with data quality.
26
+
27
+ **Strengths**:
28
+ - Provides a visual representation of prediction accuracy and the uncertainty around predictions.
29
+ - Includes a statistical measure of prediction reliability through confidence intervals.
30
+ - Computes and reports breaches, offering a quantitative assessment of prediction performance.
31
+
32
+ **Limitations**:
33
+ - Assumes that the dataset is provided as a DataFrameDataset object with a datetime index.
34
+ - Requires that `dataset.y_pred(model)` returns the predicted values for the model.
35
+ - The calculation of confidence intervals assumes normally distributed errors, which may not hold for all datasets.
36
+ """
37
+ dataset_name = dataset.input_id
38
+ model_name = model.input_id
39
+ time_index = dataset.df.index # Assuming the index of the dataset is datetime
40
+
41
+ # Get actual and predicted values
42
+ y_true = dataset.y
43
+ y_pred = dataset.y_pred(model)
44
+
45
+ # Compute the standard deviation of the errors
46
+ errors = y_true - y_pred
47
+ std_error = np.std(errors)
48
+
49
+ # Compute z-score for the given confidence level
50
+ z_score = norm.ppf(1 - (1 - confidence) / 2)
51
+
52
+ # Compute confidence intervals
53
+ lower_conf = y_pred - z_score * std_error
54
+ upper_conf = y_pred + z_score * std_error
55
+
56
+ # Calculate breaches
57
+ upper_breaches = (y_true > upper_conf).sum()
58
+ lower_breaches = (y_true < lower_conf).sum()
59
+ total_breaches = upper_breaches + lower_breaches
60
+
61
+ # Create DataFrame
62
+ breaches_df = pd.DataFrame(
63
+ {
64
+ "Confidence Level": [confidence],
65
+ "Total Breaches": [total_breaches],
66
+ "Upper Breaches": [upper_breaches],
67
+ "Lower Breaches": [lower_breaches],
68
+ }
69
+ )
70
+
71
+ # Plotting
72
+ fig = go.Figure()
73
+
74
+ # Plot actual values
75
+ fig.add_trace(
76
+ go.Scatter(
77
+ x=time_index,
78
+ y=y_true,
79
+ mode="lines",
80
+ name="Actual Values",
81
+ line=dict(color="blue"),
82
+ )
83
+ )
84
+
85
+ # Plot predicted values
86
+ fig.add_trace(
87
+ go.Scatter(
88
+ x=time_index,
89
+ y=y_pred,
90
+ mode="lines",
91
+ name=f"Predicted by {model_name}",
92
+ line=dict(color="red"),
93
+ )
94
+ )
95
+
96
+ # Add confidence interval lower bound as an invisible line
97
+ fig.add_trace(
98
+ go.Scatter(
99
+ x=time_index,
100
+ y=lower_conf,
101
+ mode="lines",
102
+ line=dict(width=0),
103
+ showlegend=False,
104
+ name="CI Lower",
105
+ )
106
+ )
107
+
108
+ # Add confidence interval upper bound and fill area
109
+ fig.add_trace(
110
+ go.Scatter(
111
+ x=time_index,
112
+ y=upper_conf,
113
+ mode="lines",
114
+ fill="tonexty",
115
+ fillcolor="rgba(200, 200, 200, 0.5)",
116
+ line=dict(width=0),
117
+ showlegend=True,
118
+ name="Confidence Interval",
119
+ )
120
+ )
121
+
122
+ # Update layout
123
+ fig.update_layout(
124
+ title=f"Time Series Actual vs Predicted Values for {dataset_name} and {model_name}",
125
+ xaxis_title="Time",
126
+ yaxis_title="Values",
127
+ legend_title="Legend",
128
+ template="plotly_white",
129
+ )
130
+
131
+ return fig, breaches_df
@@ -0,0 +1,76 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import plotly.express as px
6
+ import plotly.graph_objects as go
7
+
8
+ from validmind import tags, tasks
9
+
10
+
11
+ @tags("model_predictions", "visualization")
12
+ @tasks("regression", "time_series_forecasting")
13
+ def TimeSeriesPredictionsPlot(datasets, models):
14
+ """
15
+ Plot actual vs predicted values for time series data and generate a visual comparison for each model.
16
+
17
+ **Purpose**: The purpose of this function is to visualize the actual versus predicted values for time series data across different models.
18
+
19
+ **Test Mechanism**: The function iterates through each dataset-model pair, plots the actual values from the dataset, and overlays the predicted values from each model using Plotly for interactive visualization.
20
+
21
+ **Signs of High Risk**:
22
+ - Large discrepancies between actual and predicted values indicate poor model performance.
23
+ - Systematic deviations in predicted values can highlight model bias or issues with data patterns.
24
+
25
+ **Strengths**:
26
+ - Provides a clear visual comparison of model predictions against actual values.
27
+ - Uses Plotly for interactive and visually appealing plots.
28
+ - Can handle multiple models and datasets, displaying them with distinct colors.
29
+
30
+ **Limitations**:
31
+ - Assumes that the dataset is provided as a DataFrameDataset object with a datetime index.
32
+ - Requires that `dataset.y_pred(model)` returns the predicted values for the model.
33
+ - Visualization might become cluttered with a large number of models or datasets.
34
+ """
35
+ fig = go.Figure()
36
+
37
+ # Use Plotly's color sequence for different model predictions
38
+ colors = px.colors.qualitative.Plotly
39
+
40
+ # Plot actual values from the first dataset
41
+ dataset = datasets[0]
42
+ time_index = dataset.df.index # Assuming the index of the dataset is datetime
43
+ fig.add_trace(
44
+ go.Scatter(
45
+ x=time_index,
46
+ y=dataset.y,
47
+ mode="lines",
48
+ name="Actual Values",
49
+ line=dict(color="blue"),
50
+ )
51
+ )
52
+
53
+ # Plot predicted values for each dataset-model pair
54
+ for idx, (dataset, model) in enumerate(zip(datasets, models)):
55
+ model_name = model.input_id
56
+ y_pred = dataset.y_pred(model)
57
+ fig.add_trace(
58
+ go.Scatter(
59
+ x=time_index,
60
+ y=y_pred,
61
+ mode="lines",
62
+ name=f"Predicted by {model_name}",
63
+ line=dict(color=colors[idx % len(colors)]),
64
+ )
65
+ )
66
+
67
+ # Update layout
68
+ fig.update_layout(
69
+ title="Time Series Actual vs Predicted Values",
70
+ xaxis_title="Time",
71
+ yaxis_title="Values",
72
+ legend_title="Legend",
73
+ template="plotly_white",
74
+ )
75
+
76
+ return fig
@@ -0,0 +1,103 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+
6
+ import pandas as pd
7
+ import plotly.express as px
8
+ from sklearn import metrics
9
+
10
+ from validmind import tags, tasks
11
+
12
+
13
+ @tags("model_performance", "sklearn")
14
+ @tasks("regression", "time_series_forecasting")
15
+ def TimeSeriesR2SquareBySegments(datasets, models, segments=None):
16
+ """
17
+ Plot R-Squared values for each model over specified time segments and generate a bar chart
18
+ with the results.
19
+
20
+ **Purpose**: The purpose of this function is to plot the R-Squared values for different models applied to various segments of the time series data.
21
+
22
+ **Parameters**:
23
+ - datasets: List of datasets to evaluate.
24
+ - models: List of models to evaluate.
25
+ - segments: Dictionary with 'start_date' and 'end_date' keys containing lists of start and end dates for each segments. If None, the time series will be segmented into two halves.
26
+
27
+ **Test Mechanism**: The function iterates through each dataset-model pair, calculates the R-Squared values for specified time segments, and generates a bar chart with these results.
28
+
29
+ **Signs of High Risk**:
30
+ - If the R-Squared values are significantly low for certain segments, it could indicate that the model is not explaining much of the variability in the dataset for those segments.
31
+
32
+ **Strengths**:
33
+ - Provides a visual representation of model performance across different time segments.
34
+ - Allows for identification of segments where models perform poorly.
35
+
36
+ **Limitations**:
37
+ - Assumes that the dataset is provided as a DataFrameDataset object with `y`, `y_pred`, and `feature_columns` attributes.
38
+ - Requires that `dataset.y_pred(model)` returns the predicted values for the model.
39
+ - Assumes that `y_true` and `y_pred` are pandas Series with datetime indices.
40
+ """
41
+ results_list = []
42
+
43
+ for dataset, model in zip(datasets, models):
44
+ dataset_name = dataset.input_id
45
+ model_name = model.input_id
46
+
47
+ y_true = dataset.y
48
+ y_pred = dataset.y_pred(model)
49
+
50
+ # Ensure y_true and y_pred are pandas Series with the same index
51
+ if not isinstance(y_true, pd.Series):
52
+ y_true = pd.Series(y_true, index=dataset.df.index)
53
+ if not isinstance(y_pred, pd.Series):
54
+ y_pred = pd.Series(y_pred, index=dataset.df.index)
55
+
56
+ index = dataset.df.index
57
+
58
+ if segments is None:
59
+ mid_point = len(index) // 2
60
+ segments = {
61
+ "start_date": [index.min(), index[mid_point]],
62
+ "end_date": [index[mid_point - 1], index.max()],
63
+ }
64
+
65
+ for segment_index, (start_date, end_date) in enumerate(
66
+ zip(segments["start_date"], segments["end_date"])
67
+ ):
68
+ mask = (index >= start_date) & (index <= end_date)
69
+ y_true_segment = y_true.loc[mask]
70
+ y_pred_segment = y_pred.loc[mask]
71
+
72
+ if len(y_true_segment) > 0 and len(y_pred_segment) > 0:
73
+ r2s = metrics.r2_score(y_true_segment, y_pred_segment)
74
+ results_list.append(
75
+ {
76
+ "Model": model_name,
77
+ "Dataset": dataset_name,
78
+ "Segments": f"Segment {segment_index + 1}",
79
+ "Start Date": start_date,
80
+ "End Date": end_date,
81
+ "R-Squared": r2s,
82
+ }
83
+ )
84
+
85
+ # Convert results list to a DataFrame
86
+ results_df = pd.DataFrame(results_list)
87
+
88
+ # Plotting
89
+ fig = px.bar(
90
+ results_df,
91
+ x="Segments",
92
+ y="R-Squared",
93
+ color="Model",
94
+ barmode="group",
95
+ title="R-Squared Comparison by Segment and Model",
96
+ labels={
97
+ "R-Squared": "R-Squared Value",
98
+ "Segment": "Time Segment",
99
+ "Model": "Model",
100
+ },
101
+ )
102
+
103
+ return fig, results_df
@@ -0,0 +1,83 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import pandas as pd
6
+ from sklearn.inspection import permutation_importance
7
+
8
+ from validmind import tags, tasks
9
+
10
+
11
+ @tags("model_explainability", "sklearn")
12
+ @tasks("regression", "time_series_forecasting")
13
+ def FeatureImportanceComparison(datasets, models, num_features=3):
14
+ """
15
+ Compare feature importance scores for each model and generate a summary table
16
+ with the top important features.
17
+
18
+ **Purpose**: The purpose of this function is to compare the feature importance scores for different models applied to various datasets.
19
+
20
+ **Test Mechanism**: The function iterates through each dataset-model pair, calculates permutation feature importance (PFI) scores, and generates a summary table with the top `num_features` important features for each model.
21
+
22
+ **Signs of High Risk**:
23
+ - If key features expected to be important are ranked low, it could indicate potential issues with model training or data quality.
24
+ - High variance in feature importance scores across different models may suggest instability in feature selection.
25
+
26
+ **Strengths**:
27
+ - Provides a clear comparison of the most important features for each model.
28
+ - Uses permutation importance, which is a model-agnostic method and can be applied to any estimator.
29
+
30
+ **Limitations**:
31
+ - Assumes that the dataset is provided as a DataFrameDataset object with `x_df` and `y_df` methods to access feature and target data.
32
+ - Requires that `model.model` is compatible with `sklearn.inspection.permutation_importance`.
33
+ - The function's output is dependent on the number of features specified by `num_features`, which defaults to 3 but can be adjusted.
34
+
35
+
36
+ """
37
+ results_list = []
38
+
39
+ for dataset, model in zip(datasets, models):
40
+ x = dataset.x_df()
41
+ y = dataset.y_df()
42
+
43
+ pfi_values = permutation_importance(
44
+ model.model,
45
+ x,
46
+ y,
47
+ random_state=0,
48
+ n_jobs=-2,
49
+ )
50
+
51
+ # Create a dictionary to store PFI scores
52
+ pfi = {
53
+ column: pfi_values["importances_mean"][i]
54
+ for i, column in enumerate(x.columns)
55
+ }
56
+
57
+ # Sort features by their importance
58
+ sorted_features = sorted(pfi.items(), key=lambda item: item[1], reverse=True)
59
+
60
+ # Extract the top `num_features` features
61
+ top_features = sorted_features[:num_features]
62
+
63
+ # Prepare the result for the current model and dataset
64
+ result = {
65
+ "Model": model.input_id,
66
+ "Dataset": dataset.input_id,
67
+ }
68
+
69
+ # Dynamically add feature columns to the result
70
+ for i in range(num_features):
71
+ if i < len(top_features):
72
+ result[
73
+ f"Feature {i + 1}"
74
+ ] = f"[{top_features[i][0]}; {top_features[i][1]:.4f}]"
75
+ else:
76
+ result[f"Feature {i + 1}"] = None
77
+
78
+ # Append the result to the list
79
+ results_list.append(result)
80
+
81
+ # Convert the results list to a DataFrame
82
+ results_df = pd.DataFrame(results_list)
83
+ return results_df
@@ -121,7 +121,7 @@ class PermutationFeatureImportance(Metric):
121
121
  figures=[
122
122
  Figure(
123
123
  for_object=self,
124
- key="pfi",
124
+ key=f"pfi_{self.inputs.dataset.input_id}_{self.inputs.model.input_id}",
125
125
  figure=fig,
126
126
  ),
127
127
  ],
@@ -0,0 +1,76 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ from sklearn import metrics
8
+
9
+ from validmind import tags, tasks
10
+ from validmind.logging import get_logger
11
+
12
+ logger = get_logger(__name__)
13
+
14
+
15
+ @tags("model_performance", "sklearn")
16
+ @tasks("regression", "time_series_forecasting")
17
+ def RegressionErrorsComparison(datasets, models):
18
+ """
19
+ Compare regression error metrics for each model and generate a summary table
20
+ with the results.
21
+
22
+ **Purpose**: The purpose of this function is to compare the regression errors for different models applied to various datasets.
23
+
24
+ **Test Mechanism**: The function iterates through each dataset-model pair, calculates various error metrics (MAE, MSE, MAPE, MBD), and generates a summary table with these results.
25
+
26
+ **Signs of High Risk**:
27
+ - High Mean Absolute Error (MAE) or Mean Squared Error (MSE) indicates poor model performance.
28
+ - High Mean Absolute Percentage Error (MAPE) suggests large percentage errors, especially problematic if the true values are small.
29
+ - Mean Bias Deviation (MBD) significantly different from zero indicates systematic overestimation or underestimation by the model.
30
+
31
+ **Strengths**:
32
+ - Provides multiple error metrics to assess model performance from different perspectives.
33
+ - Includes a check to avoid division by zero when calculating MAPE.
34
+
35
+ **Limitations**:
36
+ - Assumes that the dataset is provided as a DataFrameDataset object with `y`, `y_pred`, and `feature_columns` attributes.
37
+ - The function relies on the `logger` from `validmind.logging` to warn about zero values in `y_true`, which should be correctly implemented and imported.
38
+ - Requires that `dataset.y_pred(model)` returns the predicted values for the model.
39
+ """
40
+ results_list = []
41
+
42
+ for dataset, model in zip(datasets, models):
43
+ dataset_name = dataset.input_id
44
+ model_name = model.input_id
45
+
46
+ y_true = dataset.y
47
+ y_pred = dataset.y_pred(model) # Assuming dataset has X for features
48
+ y_true = y_true.astype(y_pred.dtype)
49
+
50
+ mae = metrics.mean_absolute_error(y_true, y_pred)
51
+ mse = metrics.mean_squared_error(y_true, y_pred)
52
+
53
+ if np.any(y_true == 0):
54
+ logger.warning(
55
+ "y_true contains zero values. Skipping MAPE calculation to avoid division by zero."
56
+ )
57
+ mape = None
58
+ else:
59
+ mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
60
+ mbd = np.mean(y_pred - y_true)
61
+
62
+ # Append results to the list
63
+ results_list.append(
64
+ {
65
+ "Model": model_name,
66
+ "Dataset": dataset_name,
67
+ "Mean Absolute Error (MAE)": mae,
68
+ "Mean Squared Error (MSE)": mse,
69
+ "Mean Absolute Percentage Error (MAPE)": mape,
70
+ "Mean Bias Deviation (MBD)": mbd,
71
+ }
72
+ )
73
+
74
+ # Convert results list to a DataFrame
75
+ results_df = pd.DataFrame(results_list)
76
+ return results_df
@@ -0,0 +1,63 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import pandas as pd
6
+ from sklearn import metrics
7
+
8
+ from validmind import tags, tasks
9
+ from validmind.tests.model_validation.statsmodels.statsutils import adj_r2_score
10
+
11
+
12
+ @tags("model_performance", "sklearn")
13
+ @tasks("regression", "time_series_forecasting")
14
+ def RegressionR2SquareComparison(datasets, models):
15
+ """
16
+ Compare R-Squared and Adjusted R-Squared values for each model and generate a summary table
17
+ with the results.
18
+
19
+ **Purpose**: The purpose of this function is to compare the R-Squared and Adjusted R-Squared values for different models applied to various datasets.
20
+
21
+ **Test Mechanism**: The function iterates through each dataset-model pair, calculates the R-Squared and Adjusted R-Squared values, and generates a summary table with these results.
22
+
23
+ **Signs of High Risk**:
24
+ - If the R-Squared values are significantly low, it could indicate that the model is not explaining much of the variability in the dataset.
25
+ - A significant difference between R-Squared and Adjusted R-Squared values might indicate that the model includes irrelevant features.
26
+
27
+ **Strengths**:
28
+ - Provides a quantitative measure of model performance in terms of variance explained.
29
+ - Adjusted R-Squared accounts for the number of predictors, making it a more reliable measure when comparing models with different numbers of features.
30
+
31
+ **Limitations**:
32
+ - Assumes that the dataset is provided as a DataFrameDataset object with `y`, `y_pred`, and `feature_columns` attributes.
33
+ - The function relies on `adj_r2_score` from the `statsmodels.statsutils` module, which should be correctly implemented and imported.
34
+ - Requires that `dataset.y_pred(model)` returns the predicted values for the model.
35
+
36
+ """
37
+ results_list = []
38
+
39
+ for dataset, model in zip(datasets, models):
40
+ dataset_name = dataset.input_id
41
+ model_name = model.input_id
42
+
43
+ y_true = dataset.y
44
+ y_pred = dataset.y_pred(model) # Assuming dataset has X for features
45
+ y_true = y_true.astype(y_pred.dtype)
46
+
47
+ r2s = metrics.r2_score(y_true, y_pred)
48
+ X_columns = dataset.feature_columns
49
+ adj_r2 = adj_r2_score(y_true, y_pred, len(y_true), len(X_columns))
50
+
51
+ # Append results to the list
52
+ results_list.append(
53
+ {
54
+ "Model": model_name,
55
+ "Dataset": dataset_name,
56
+ "R-Squared": r2s,
57
+ "Adjusted R-Squared": adj_r2,
58
+ }
59
+ )
60
+
61
+ # Convert results list to a DataFrame
62
+ results_df = pd.DataFrame(results_list)
63
+ return results_df