validmind 2.2.5__py3-none-any.whl → 2.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/{ai.py → ai/test_descriptions.py} +127 -69
  3. validmind/ai/utils.py +104 -0
  4. validmind/api_client.py +70 -31
  5. validmind/client.py +5 -5
  6. validmind/logging.py +38 -32
  7. validmind/models/foundation.py +10 -6
  8. validmind/models/function.py +3 -1
  9. validmind/models/metadata.py +1 -1
  10. validmind/test_suites/__init__.py +1 -7
  11. validmind/test_suites/regression.py +0 -16
  12. validmind/test_suites/statsmodels_timeseries.py +1 -1
  13. validmind/tests/data_validation/ACFandPACFPlot.py +36 -27
  14. validmind/tests/{model_validation/statsmodels → data_validation}/ADF.py +42 -13
  15. validmind/tests/data_validation/BivariateScatterPlots.py +38 -41
  16. validmind/tests/{model_validation/statsmodels → data_validation}/DFGLSArch.py +67 -11
  17. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +1 -1
  18. validmind/tests/data_validation/HighPearsonCorrelation.py +12 -3
  19. validmind/tests/data_validation/IsolationForestOutliers.py +2 -2
  20. validmind/tests/{model_validation/statsmodels → data_validation}/KPSS.py +64 -11
  21. validmind/tests/{model_validation/statsmodels → data_validation}/PhillipsPerronArch.py +65 -11
  22. validmind/tests/data_validation/ScatterPlot.py +1 -1
  23. validmind/tests/data_validation/SeasonalDecompose.py +12 -7
  24. validmind/tests/data_validation/TabularDateTimeHistograms.py +29 -33
  25. validmind/tests/data_validation/WOEBinPlots.py +1 -1
  26. validmind/tests/data_validation/WOEBinTable.py +1 -1
  27. validmind/tests/{model_validation/statsmodels → data_validation}/ZivotAndrewsArch.py +65 -11
  28. validmind/tests/data_validation/nlp/CommonWords.py +1 -1
  29. validmind/tests/data_validation/nlp/Hashtags.py +1 -1
  30. validmind/tests/data_validation/nlp/Mentions.py +1 -1
  31. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +2 -1
  32. validmind/tests/data_validation/nlp/Punctuations.py +1 -1
  33. validmind/tests/data_validation/nlp/Sentiment.py +1 -1
  34. validmind/tests/data_validation/nlp/TextDescription.py +5 -1
  35. validmind/tests/data_validation/nlp/Toxicity.py +1 -1
  36. validmind/tests/decorator.py +1 -1
  37. validmind/tests/model_validation/FeaturesAUC.py +5 -3
  38. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +4 -0
  39. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +4 -0
  40. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +4 -0
  41. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +4 -0
  42. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +4 -0
  43. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +4 -0
  44. validmind/tests/model_validation/ragas/AnswerCorrectness.py +3 -3
  45. validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
  46. validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
  47. validmind/tests/model_validation/ragas/AspectCritique.py +14 -8
  48. validmind/tests/model_validation/ragas/ContextEntityRecall.py +3 -4
  49. validmind/tests/model_validation/ragas/ContextPrecision.py +4 -5
  50. validmind/tests/model_validation/ragas/ContextRecall.py +3 -4
  51. validmind/tests/model_validation/ragas/ContextRelevancy.py +5 -4
  52. validmind/tests/model_validation/ragas/Faithfulness.py +6 -5
  53. validmind/tests/model_validation/ragas/utils.py +35 -9
  54. validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
  55. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +1 -1
  56. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +6 -8
  57. validmind/tests/model_validation/sklearn/RegressionErrors.py +1 -1
  58. validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +14 -8
  59. validmind/tests/model_validation/sklearn/RegressionR2Square.py +1 -1
  60. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +1 -1
  61. validmind/tests/model_validation/statsmodels/GINITable.py +1 -1
  62. validmind/tests/model_validation/statsmodels/JarqueBera.py +1 -1
  63. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +1 -1
  64. validmind/tests/model_validation/statsmodels/LJungBox.py +1 -1
  65. validmind/tests/model_validation/statsmodels/Lilliefors.py +1 -1
  66. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +4 -0
  67. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +9 -4
  68. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +2 -2
  69. validmind/tests/model_validation/statsmodels/RunsTest.py +1 -1
  70. validmind/tests/model_validation/statsmodels/ShapiroWilk.py +1 -1
  71. validmind/tests/prompt_validation/Bias.py +14 -11
  72. validmind/tests/prompt_validation/Clarity.py +14 -11
  73. validmind/tests/prompt_validation/Conciseness.py +14 -11
  74. validmind/tests/prompt_validation/Delimitation.py +14 -11
  75. validmind/tests/prompt_validation/NegativeInstruction.py +14 -11
  76. validmind/tests/prompt_validation/Robustness.py +11 -11
  77. validmind/tests/prompt_validation/Specificity.py +14 -11
  78. validmind/tests/prompt_validation/ai_powered_test.py +53 -75
  79. validmind/unit_metrics/composite.py +2 -1
  80. validmind/utils.py +4 -49
  81. validmind/vm_models/dataset/dataset.py +17 -3
  82. validmind/vm_models/dataset/utils.py +2 -2
  83. validmind/vm_models/model.py +1 -1
  84. validmind/vm_models/test/metric.py +1 -8
  85. validmind/vm_models/test/result_wrapper.py +27 -34
  86. validmind/vm_models/test/test.py +3 -0
  87. validmind/vm_models/test/threshold_test.py +1 -1
  88. validmind/vm_models/test_suite/runner.py +12 -6
  89. validmind/vm_models/test_suite/summary.py +18 -7
  90. validmind/vm_models/test_suite/test.py +13 -20
  91. {validmind-2.2.5.dist-info → validmind-2.3.1.dist-info}/METADATA +1 -1
  92. {validmind-2.2.5.dist-info → validmind-2.3.1.dist-info}/RECORD +95 -104
  93. validmind/tests/data_validation/DefaultRatesbyRiskBandPlot.py +0 -114
  94. validmind/tests/data_validation/PiTCreditScoresHistogram.py +0 -150
  95. validmind/tests/data_validation/PiTPDHistogram.py +0 -152
  96. validmind/tests/model_validation/statsmodels/ADFTest.py +0 -88
  97. validmind/tests/model_validation/statsmodels/FeatureImportanceAndSignificance.py +0 -198
  98. validmind/tests/model_validation/statsmodels/PDRatingClassPlot.py +0 -151
  99. validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +0 -146
  100. validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +0 -144
  101. validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +0 -127
  102. validmind/tests/model_validation/statsmodels/ResidualsVisualInspection.py +0 -130
  103. {validmind-2.2.5.dist-info → validmind-2.3.1.dist-info}/LICENSE +0 -0
  104. {validmind-2.2.5.dist-info → validmind-2.3.1.dist-info}/WHEEL +0 -0
  105. {validmind-2.2.5.dist-info → validmind-2.3.1.dist-info}/entry_points.txt +0 -0
@@ -1,150 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- import pandas as pd
8
- import plotly.graph_objects as go
9
- from plotly.subplots import make_subplots
10
-
11
- from validmind.vm_models import Figure, Metric
12
-
13
-
14
- @dataclass
15
- class PiTCreditScoresHistogram(Metric):
16
- """
17
- Generates a histogram visualization for observed and predicted credit default scores.
18
-
19
- **Purpose**:
20
- The PiT (Point in Time) Credit Scores Histogram metric is used to evaluate the predictive performance of a credit
21
- risk assessment model. This metric provides a visual representation of observed versus predicted default scores and
22
- enables quick and intuitive comparison for model assessment.
23
-
24
- **Test Mechanism**:
25
- This metric generates histograms for both observed and predicted score distributions of defaults and non-defaults.
26
- The simultaneous representation of both the observed and predicted scores sheds light on the model's ability to
27
- accurately predict credit risk.
28
-
29
- **Signs of High Risk**:
30
- - Significant discrepancies between the observed and predicted histograms, suggesting that the model may not be
31
- adequately addressing certain risk factors.
32
- - Concentration of predicted defaults towards one end of the graph, or uneven distribution in comparison to
33
- observed scores, indicating potential issues in the model's interpretation of the data or outcome prediction.
34
-
35
- **Strengths**:
36
- - Provides an intuitive visual representation of model performance that's easy to comprehend, even for individuals
37
- without a technical background.
38
- - Useful for understanding the model's ability to distinguish between defaulting and non-defaulting entities.
39
- - Specifically tailored for assessing credit risk models. The Point in Time (PiT) factor considers the evolution of
40
- credit risk over time.
41
-
42
- **Limitations**:
43
- - As the information is visual, precise and quantitative results for detailed statistical analyses may not be
44
- obtained.
45
- - The method relies on manual inspection and comparison, introducing subjectivity and potential bias.
46
- - Subtle discrepancies might go unnoticed and it could be less reliable for identifying such cues.
47
- - Performance may degrade when score distributions overlap significantly or when too many scores are plotted,
48
- resulting in cluttered or hard-to-decipher graphs.
49
- """
50
-
51
- name = "pit_credit_scores_histogram"
52
- required_inputs = ["dataset", "model"]
53
- default_params = {"title": "Histogram of Scores"}
54
- metadata = {
55
- "task_types": ["classification"],
56
- "tags": ["tabular_data", "visualization", "credit_risk"],
57
- }
58
-
59
- @staticmethod
60
- def plot_score_histogram(
61
- df,
62
- default_column,
63
- predicted_default_column,
64
- scores_column,
65
- title,
66
- point_in_time_date,
67
- ):
68
- fig = make_subplots(
69
- rows=1, cols=2, subplot_titles=("Observed Default", "Predicted Default")
70
- )
71
-
72
- observed_data_0 = df[df[default_column] == 0][scores_column]
73
- observed_data_1 = df[df[default_column] == 1][scores_column]
74
-
75
- predicted_data_0 = df[df[predicted_default_column] == 0][scores_column]
76
- predicted_data_1 = df[df[predicted_default_column] == 1][scores_column]
77
-
78
- fig.add_trace(
79
- go.Histogram(x=observed_data_0, opacity=0.75, name="Observed Default = 0"),
80
- row=1,
81
- col=1,
82
- )
83
- fig.add_trace(
84
- go.Histogram(x=observed_data_1, opacity=0.75, name="Observed Default = 1"),
85
- row=1,
86
- col=1,
87
- )
88
-
89
- fig.add_trace(
90
- go.Histogram(
91
- x=predicted_data_0, opacity=0.75, name="Predicted Default = 0"
92
- ),
93
- row=1,
94
- col=2,
95
- )
96
- fig.add_trace(
97
- go.Histogram(
98
- x=predicted_data_1, opacity=0.75, name="Predicted Default = 1"
99
- ),
100
- row=1,
101
- col=2,
102
- )
103
-
104
- title += f" (PiT: {point_in_time_date.strftime('%d %b %Y')})"
105
- fig.update_layout(barmode="overlay", title_text=title)
106
-
107
- return fig
108
-
109
- def run(self):
110
- df = self.inputs.dataset.df
111
- default_column = (
112
- self.params.get("default_column") or self.inputs.dataset.target_column
113
- )
114
- predicted_default_column = (
115
- self.params.get("predicted_default_column")
116
- or self.inputs.dataset.y_pred(self.inputs.model),
117
- )
118
- scores_column = self.params["scores_column"]
119
- point_in_time_column = self.params["point_in_time_column"]
120
-
121
- title = self.params["title"]
122
-
123
- point_in_time_date = pd.to_datetime(df[point_in_time_column].iloc[0])
124
-
125
- fig = self.plot_score_histogram(
126
- df,
127
- default_column,
128
- predicted_default_column,
129
- scores_column,
130
- title,
131
- point_in_time_date,
132
- )
133
-
134
- return self.cache_results(
135
- metric_value={
136
- "score_histogram": {
137
- "observed_scores": list(df[df[default_column] == 1][scores_column]),
138
- "predicted_scores": list(
139
- df[df[predicted_default_column] == 1][scores_column]
140
- ),
141
- },
142
- },
143
- figures=[
144
- Figure(
145
- for_object=self,
146
- key="score_histogram",
147
- figure=fig,
148
- )
149
- ],
150
- )
@@ -1,152 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- import pandas as pd
8
- import plotly.graph_objects as go
9
- from plotly.subplots import make_subplots
10
-
11
- from validmind.vm_models import Figure, Metric
12
-
13
-
14
- @dataclass
15
- class PiTPDHistogram(Metric):
16
- """
17
- Assesses credit risk prediction accuracy of a model by comparing actual and predicted defaults at a chosen point in
18
- time.
19
-
20
- **Purpose**: The PiTPDHistogram metric uses Probability of Default (PD) calculations for individual instances
21
- within both training and test data sets in order to assess a model's proficiency in predicting credit risk. A
22
- distinctive point in time (PiT) is chosen for these PD calculations, and the results for both actual and predicted
23
- defaults are presented in histogram form. This visualization is aimed at simplifying the understanding of model
24
- prediction accuracy.
25
-
26
- **Test Mechanism**: Instances are categorized into two groups - those for actual defaults and those for predicted
27
- defaults, with '1' indicating a default and '0' indicating non-default. PD is calculated for each instance, and
28
- based on these calculations, two histograms are created, one for actual defaults and one for predicted defaults. If
29
- the predicted default frequency matches that of the actual defaults, the model's performance is deemed effective.
30
-
31
- **Signs of High Risk**:
32
- - Discrepancies between the actual and predicted default histograms may suggest model inefficiency.
33
- - Variations in histogram shapes or divergences in default probability distributions could be concerning.
34
- - Significant mismatches in peak default probabilities could also be red flags.
35
-
36
- **Strengths**:
37
- - Provides a visual comparison between actual and predicted defaults, aiding in the understanding of model
38
- performance.
39
- - Helps reveal model bias and areas where the model's performance could be improved.
40
- - Easier to understand than purely numerical evaluations or other complicated visualization measures.
41
-
42
- **Limitations**:
43
- - The metric remains largely interpretive and subjective, as the extent and relevance of visual discrepancies often
44
- need to be evaluated manually, leading to potentially inconsistent results across different analyses.
45
- - This metric alone may not capture all the complexities and nuances of model performance.
46
- - The information provided is limited to a specific point in time, potentially neglecting the model's performance
47
- under various circumstances or different time periods.
48
- """
49
-
50
- name = "pit_pd_histogram"
51
- required_context = ["dataset"]
52
- default_params = {"title": "Histogram of PiT Probability of Default"}
53
- metadata = {
54
- "task_types": ["classification"],
55
- "tags": ["tabular_data", "visualization", "credit_risk"],
56
- }
57
-
58
- @staticmethod
59
- def plot_pit_pd_histogram(
60
- df,
61
- default_column,
62
- predicted_default_column,
63
- default_probabilities_column,
64
- title,
65
- point_in_time_date,
66
- ):
67
- fig = make_subplots(
68
- rows=1, cols=2, subplot_titles=("Observed Default", "Predicted Default")
69
- )
70
-
71
- observed_data_0 = df[df[default_column] == 0][default_probabilities_column]
72
- observed_data_1 = df[df[default_column] == 1][default_probabilities_column]
73
-
74
- predicted_data_0 = df[df[predicted_default_column] == 0][
75
- default_probabilities_column
76
- ]
77
- predicted_data_1 = df[df[predicted_default_column] == 1][
78
- default_probabilities_column
79
- ]
80
-
81
- fig.add_trace(
82
- go.Histogram(x=observed_data_0, opacity=0.75, name="Observed Default = 0"),
83
- row=1,
84
- col=1,
85
- )
86
- fig.add_trace(
87
- go.Histogram(x=observed_data_1, opacity=0.75, name="Observed Default = 1"),
88
- row=1,
89
- col=1,
90
- )
91
-
92
- fig.add_trace(
93
- go.Histogram(
94
- x=predicted_data_0, opacity=0.75, name="Predicted Default = 0"
95
- ),
96
- row=1,
97
- col=2,
98
- )
99
- fig.add_trace(
100
- go.Histogram(
101
- x=predicted_data_1, opacity=0.75, name="Predicted Default = 1"
102
- ),
103
- row=1,
104
- col=2,
105
- )
106
-
107
- title += f" (PiT: {point_in_time_date.strftime('%d %b %Y')})"
108
- fig.update_layout(barmode="overlay", title_text=title)
109
-
110
- return fig
111
-
112
- def run(self):
113
- df = self.inputs.dataset.df
114
- default_column = self.params["default_column"]
115
- predicted_default_column = self.params["predicted_default_column"]
116
- default_probabilities_column = self.params["default_probabilities_column"]
117
- point_in_time_column = self.params["point_in_time_column"]
118
-
119
- title = self.params["title"]
120
-
121
- point_in_time_date = pd.to_datetime(df[point_in_time_column].iloc[0])
122
-
123
- fig = self.plot_pit_pd_histogram(
124
- df,
125
- default_column,
126
- predicted_default_column,
127
- default_probabilities_column,
128
- title,
129
- point_in_time_date,
130
- )
131
-
132
- return self.cache_results(
133
- metric_value={
134
- "prob_histogram": {
135
- "observed_probs": list(
136
- df[df[default_column] == 1][default_probabilities_column]
137
- ),
138
- "predicted_probs": list(
139
- df[df[predicted_default_column] == 1][
140
- default_probabilities_column
141
- ]
142
- ),
143
- },
144
- },
145
- figures=[
146
- Figure(
147
- for_object=self,
148
- key="prob_histogram",
149
- figure=fig,
150
- )
151
- ],
152
- )
@@ -1,88 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- from statsmodels.tsa.stattools import adfuller
8
-
9
- from validmind.vm_models import ThresholdTest, ThresholdTestResult
10
-
11
-
12
- @dataclass
13
- class ADFTest(ThresholdTest):
14
- """
15
- Assesses the stationarity of time series data using the Augmented Dickey-Fuller (ADF) test.
16
-
17
- **Purpose**: The Augmented Dickey-Fuller (ADF) metric test is designed to evaluate the presence of a unit root in a
18
- time series. This essentially translates to assessing the stationarity of a time series dataset. This is vital in
19
- time series analysis, regression tasks, and forecasting, as these often need the data to be stationary.
20
-
21
- **Test Mechanism**: This test application utilizes the "adfuller" function from Python's “statsmodels” library. It
22
- applies this function to each column of the training dataset, subsequently calculating the ADF statistic, p-value,
23
- the number of lags used, and the number of observations in the sample for each column. If a column's p-value is
24
- lower than the predetermined threshold (usually 0.05), the series is considered stationary, and the test is deemed
25
- passed for that column.
26
-
27
- **Signs of High Risk**:
28
- - A p-value that surpasses the threshold value indicates a high risk or potential model performance issue.
29
- - A high p-value suggests that the null hypothesis (of a unit root being present) cannot be rejected. This in turn
30
- suggests that the series is non-stationary which could potentially yield unreliable and falsified results for the
31
- model's performance and forecast.
32
-
33
- **Strengths**:
34
- - Archetypal Test for Stationarity: The ADF test is a comprehensive approach towards testing the stationarity of
35
- time series data. Such testing is vital for many machine learning and statistical models.
36
- - Detailed Output: The function generates detailed output, including the number of lags used and the number of
37
- observations, which adds to understanding a series’ behaviour.
38
-
39
- **Limitations**:
40
- - Dependence on Threshold: The result of this test freights heavily on the threshold chosen. Hence, an imprudent
41
- threshold value might lead to false acceptance or rejection of the null hypothesis.
42
- - Not Effective for Trending Data: The test suffers when it operates under the assumption that the data does not
43
- encapsulate any deterministic trend. In the presence of such a trend, it might falsely identify a series as
44
- non-stationary.
45
- - Potential for False Positives: The ADF test especially in the case of larger datasets, tends to reject the null
46
- hypothesis, escalating the chances of false positives.
47
- """
48
-
49
- name = "adf_test"
50
- required_inputs = ["dataset"]
51
- default_params = {"threshold": 0.05}
52
- metadata = {
53
- "task_types": ["regression"],
54
- "tags": [
55
- "time_series_data",
56
- "statsmodels",
57
- "forecasting",
58
- "statistical_test",
59
- "stationarity",
60
- ],
61
- }
62
-
63
- def run(self):
64
- x_train = self.inputs.dataset.df
65
-
66
- results = []
67
- for col in x_train.columns:
68
- # adf_values[col] = adfuller(x_train[col].values)
69
- adf, pvalue, usedlag, nobs, critical_values, icbest = adfuller(
70
- x_train[col].values
71
- )
72
-
73
- col_passed = pvalue < self.params["threshold"]
74
- results.append(
75
- ThresholdTestResult(
76
- column=col,
77
- passed=col_passed,
78
- values={
79
- "adf": adf,
80
- "pvalue": pvalue,
81
- "usedlag": usedlag,
82
- "nobs": nobs,
83
- "icbest": icbest,
84
- },
85
- )
86
- )
87
-
88
- return self.cache_results(results, passed=all([r.passed for r in results]))
@@ -1,198 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- import pandas as pd
8
- import plotly.graph_objects as go
9
- from sklearn.inspection import permutation_importance
10
-
11
- from validmind.logging import get_logger
12
- from validmind.vm_models import Figure, Metric
13
-
14
- logger = get_logger(__name__)
15
-
16
-
17
- @dataclass
18
- class FeatureImportanceAndSignificance(Metric):
19
- """
20
- Evaluates and visualizes the statistical significance and feature importance using regression and decision tree
21
- models.
22
-
23
- **Purpose**: The 'FeatureImportanceAndSignificance' test evaluates the statistical significance and the importance
24
- of features in the context of the machine learning model. By comparing the p-values from a regression model and the
25
- feature importances from a decision tree model, this test aids in determining the most significant variables from a
26
- statistical and a machine learning perspective, assisting in feature selection during the model development process.
27
-
28
- **Test Mechanism**: The test first compares the p-values from a regression model and the feature importances from a
29
- decision tree model. These values are normalized to ensure a uniform comparison. The 'p_threshold' parameter is
30
- used to determine what p-value is considered statistically significant and if the 'significant_only' parameter is
31
- true, only features with p-values below this threshold are included in the final output. The output from this test
32
- includes an interactive visualization displaying normalized p-values and the associated feature importances. The
33
- test throws an error if it does not receive both a regression model and a decision tree model.
34
-
35
- **Signs of High Risk**:
36
- - Exceptionally high or low p-values, which suggest that a feature may not be significant or meaningful in the
37
- context of the model.
38
- - If many variables with small feature importance values have significant p-values, this could indicate that the
39
- model might be overfitting.
40
-
41
- **Strengths**:
42
- - Combines two perspectives statistical significance (p-values) and feature importance (decision tree model),
43
- making it a robust feature selection test.
44
- - Provides an interactive visualization making it easy to interpret and understand the results.
45
-
46
- **Limitations**:
47
- - The test only works with a regression model and a decision tree model which may limit its applicability.
48
- - The test does not take into account potential correlations or causative relationships between features which may
49
- lead to misinterpretations of significance and importance.
50
- - Over-reliance on the p-value as a cut-off for feature significance can be seen as arbitrary and may not truly
51
- reflect the real-world importance of the feature.
52
- """
53
-
54
- name = "feature_importance_and_significance"
55
- required_inputs = ["models"]
56
- default_params = {
57
- "fontsize": 10,
58
- "p_threshold": 0.05,
59
- "significant_only": False,
60
- "figure_height": 800,
61
- "bar_width": 0.3,
62
- }
63
- metadata = {
64
- "task_types": ["regression"],
65
- "tags": [
66
- "statsmodels",
67
- "feature_importance",
68
- "statistical_test",
69
- "visualization",
70
- ],
71
- }
72
-
73
- def compute_p_values_and_feature_importances(
74
- self, regression_model, decision_tree_model
75
- ):
76
- p_values = regression_model.model.pvalues
77
- feature_importances = permutation_importance(
78
- decision_tree_model.model,
79
- decision_tree_model.train_ds.x,
80
- decision_tree_model.train_ds.y,
81
- random_state=0,
82
- n_jobs=-2,
83
- ).importances_mean
84
-
85
- p_values = p_values / max(p_values)
86
- feature_importances = feature_importances / max(feature_importances)
87
-
88
- return p_values, feature_importances
89
-
90
- def create_dataframe(
91
- self,
92
- p_values,
93
- feature_importances,
94
- regression_model,
95
- significant_only,
96
- p_threshold,
97
- ):
98
- df = pd.DataFrame(
99
- {
100
- "Normalized p-value": p_values,
101
- "Normalized Feature Importance": feature_importances,
102
- },
103
- index=regression_model.train_ds.x_df().columns,
104
- )
105
-
106
- if significant_only:
107
- df = df[df["Normalized p-value"] <= p_threshold]
108
-
109
- df = df.sort_values(by="Normalized Feature Importance", ascending=True)
110
-
111
- return df
112
-
113
- def create_figure(self, df, fontsize, figure_height, bar_width):
114
- fig = go.Figure()
115
-
116
- title_text = (
117
- "Significant Features (p-value <= {0})".format(self.params["p_threshold"])
118
- if self.params["significant_only"]
119
- else "All Features"
120
- )
121
-
122
- fig.update_layout(
123
- title=title_text,
124
- barmode="group",
125
- height=figure_height,
126
- yaxis=dict(tickfont=dict(size=fontsize)),
127
- xaxis=dict(title="Normalized Value", titlefont=dict(size=fontsize)),
128
- )
129
-
130
- fig.add_trace(
131
- go.Bar(
132
- y=df.index,
133
- x=df["Normalized p-value"],
134
- name="Normalized p-value",
135
- orientation="h",
136
- marker=dict(color="skyblue"),
137
- width=bar_width,
138
- )
139
- )
140
-
141
- fig.add_trace(
142
- go.Bar(
143
- y=df.index,
144
- x=df["Normalized Feature Importance"],
145
- name="Normalized Feature Importance",
146
- orientation="h",
147
- marker=dict(color="orange"),
148
- width=bar_width,
149
- )
150
- )
151
-
152
- return fig
153
-
154
- def run(self):
155
- fontsize = self.params["fontsize"]
156
- significant_only = self.params["significant_only"]
157
- p_threshold = self.params["p_threshold"]
158
- figure_height = self.params["figure_height"]
159
- bar_width = self.params["bar_width"]
160
-
161
- all_models = []
162
-
163
- if self.inputs.models is not None:
164
- all_models.extend(self.inputs.models)
165
-
166
- if len(self.inputs.models) != 2:
167
- raise ValueError("Two models must be provided")
168
-
169
- regression_model = self.inputs.models[0]
170
- decision_tree_model = self.inputs.models[1]
171
-
172
- p_values, feature_importances = self.compute_p_values_and_feature_importances(
173
- regression_model, decision_tree_model
174
- )
175
-
176
- df = self.create_dataframe(
177
- p_values,
178
- feature_importances,
179
- regression_model,
180
- significant_only,
181
- p_threshold,
182
- )
183
-
184
- fig = self.create_figure(df, fontsize, figure_height, bar_width)
185
-
186
- return self.cache_results(
187
- figures=[
188
- Figure(
189
- for_object=self,
190
- key=self.key,
191
- figure=fig,
192
- metadata={
193
- "model_regression": str(regression_model.model),
194
- "model_decision_tree": str(decision_tree_model.model),
195
- },
196
- )
197
- ]
198
- )