validmind 2.5.8__py3-none-any.whl → 2.5.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +26 -7
- validmind/api_client.py +89 -43
- validmind/client.py +2 -2
- validmind/client_config.py +11 -14
- validmind/datasets/regression/fred_timeseries.py +67 -138
- validmind/template.py +1 -0
- validmind/test_suites/__init__.py +0 -2
- validmind/test_suites/statsmodels_timeseries.py +1 -1
- validmind/test_suites/summarization.py +0 -1
- validmind/test_suites/time_series.py +0 -43
- validmind/tests/__types__.py +3 -13
- validmind/tests/data_validation/ACFandPACFPlot.py +15 -13
- validmind/tests/data_validation/ADF.py +31 -24
- validmind/tests/data_validation/AutoAR.py +9 -9
- validmind/tests/data_validation/AutoMA.py +23 -16
- validmind/tests/data_validation/AutoSeasonality.py +18 -16
- validmind/tests/data_validation/AutoStationarity.py +21 -16
- validmind/tests/data_validation/BivariateScatterPlots.py +67 -96
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +82 -124
- validmind/tests/data_validation/ClassImbalance.py +15 -12
- validmind/tests/data_validation/DFGLSArch.py +19 -13
- validmind/tests/data_validation/DatasetDescription.py +17 -11
- validmind/tests/data_validation/DatasetSplit.py +7 -5
- validmind/tests/data_validation/DescriptiveStatistics.py +28 -21
- validmind/tests/data_validation/Duplicates.py +33 -25
- validmind/tests/data_validation/EngleGrangerCoint.py +35 -33
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +59 -71
- validmind/tests/data_validation/HighCardinality.py +19 -12
- validmind/tests/data_validation/HighPearsonCorrelation.py +27 -22
- validmind/tests/data_validation/IQROutliersBarPlot.py +13 -10
- validmind/tests/data_validation/IQROutliersTable.py +40 -36
- validmind/tests/data_validation/IsolationForestOutliers.py +21 -14
- validmind/tests/data_validation/KPSS.py +34 -29
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +22 -15
- validmind/tests/data_validation/MissingValues.py +32 -27
- validmind/tests/data_validation/MissingValuesBarPlot.py +25 -21
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +71 -84
- validmind/tests/data_validation/PhillipsPerronArch.py +37 -30
- validmind/tests/data_validation/RollingStatsPlot.py +31 -23
- validmind/tests/data_validation/ScatterPlot.py +63 -78
- validmind/tests/data_validation/SeasonalDecompose.py +38 -34
- validmind/tests/data_validation/Skewness.py +35 -37
- validmind/tests/data_validation/SpreadPlot.py +35 -35
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +23 -17
- validmind/tests/data_validation/TabularDateTimeHistograms.py +21 -13
- validmind/tests/data_validation/TabularDescriptionTables.py +51 -16
- validmind/tests/data_validation/TabularNumericalHistograms.py +25 -22
- validmind/tests/data_validation/TargetRateBarPlots.py +21 -14
- validmind/tests/data_validation/TimeSeriesDescription.py +25 -18
- validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +23 -17
- validmind/tests/data_validation/TimeSeriesFrequency.py +24 -17
- validmind/tests/data_validation/TimeSeriesHistogram.py +33 -32
- validmind/tests/data_validation/TimeSeriesLinePlot.py +17 -10
- validmind/tests/data_validation/TimeSeriesMissingValues.py +15 -10
- validmind/tests/data_validation/TimeSeriesOutliers.py +37 -33
- validmind/tests/data_validation/TooManyZeroValues.py +16 -11
- validmind/tests/data_validation/UniqueRows.py +11 -6
- validmind/tests/data_validation/WOEBinPlots.py +23 -16
- validmind/tests/data_validation/WOEBinTable.py +35 -30
- validmind/tests/data_validation/ZivotAndrewsArch.py +34 -28
- validmind/tests/data_validation/nlp/CommonWords.py +21 -14
- validmind/tests/data_validation/nlp/Hashtags.py +27 -20
- validmind/tests/data_validation/nlp/LanguageDetection.py +33 -14
- validmind/tests/data_validation/nlp/Mentions.py +21 -15
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +32 -9
- validmind/tests/data_validation/nlp/Punctuations.py +24 -20
- validmind/tests/data_validation/nlp/Sentiment.py +27 -8
- validmind/tests/data_validation/nlp/StopWords.py +26 -19
- validmind/tests/data_validation/nlp/TextDescription.py +36 -35
- validmind/tests/data_validation/nlp/Toxicity.py +32 -9
- validmind/tests/decorator.py +81 -42
- validmind/tests/model_validation/BertScore.py +36 -27
- validmind/tests/model_validation/BleuScore.py +25 -19
- validmind/tests/model_validation/ClusterSizeDistribution.py +38 -34
- validmind/tests/model_validation/ContextualRecall.py +35 -13
- validmind/tests/model_validation/FeaturesAUC.py +32 -13
- validmind/tests/model_validation/MeteorScore.py +46 -33
- validmind/tests/model_validation/ModelMetadata.py +32 -64
- validmind/tests/model_validation/ModelPredictionResiduals.py +75 -73
- validmind/tests/model_validation/RegardScore.py +30 -14
- validmind/tests/model_validation/RegressionResidualsPlot.py +10 -5
- validmind/tests/model_validation/RougeScore.py +36 -30
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +30 -14
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +27 -30
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +68 -63
- validmind/tests/model_validation/TokenDisparity.py +31 -23
- validmind/tests/model_validation/ToxicityScore.py +26 -17
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +24 -20
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +30 -27
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +7 -5
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +32 -23
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +7 -5
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +15 -11
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +29 -29
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +34 -25
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +38 -26
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +40 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +18 -17
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +40 -45
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +17 -19
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +29 -25
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +38 -28
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -4
- validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
- validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
- validmind/tests/model_validation/ragas/AspectCritique.py +7 -0
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +9 -8
- validmind/tests/model_validation/ragas/ContextPrecision.py +5 -4
- validmind/tests/model_validation/ragas/ContextRecall.py +5 -4
- validmind/tests/model_validation/ragas/Faithfulness.py +5 -4
- validmind/tests/model_validation/ragas/utils.py +6 -0
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +19 -12
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +22 -17
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +27 -25
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +7 -5
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +40 -78
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +15 -17
- validmind/tests/model_validation/sklearn/CompletenessScore.py +17 -11
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -15
- validmind/tests/model_validation/sklearn/FeatureImportance.py +95 -0
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +7 -7
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +19 -12
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +35 -30
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +10 -5
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +32 -32
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +23 -23
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +15 -10
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +26 -19
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +38 -18
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +31 -25
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +8 -6
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +24 -17
- validmind/tests/model_validation/sklearn/ROCCurve.py +12 -7
- validmind/tests/model_validation/sklearn/RegressionErrors.py +74 -130
- validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +27 -12
- validmind/tests/model_validation/sklearn/{RegressionModelsPerformanceComparison.py → RegressionPerformance.py} +18 -20
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +55 -93
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +32 -13
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +36 -32
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +7 -5
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +27 -19
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +25 -18
- validmind/tests/model_validation/sklearn/VMeasure.py +14 -13
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +7 -5
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +24 -18
- validmind/tests/model_validation/statsmodels/BoxPierce.py +14 -10
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +73 -104
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +19 -12
- validmind/tests/model_validation/statsmodels/GINITable.py +44 -77
- validmind/tests/model_validation/statsmodels/JarqueBera.py +27 -22
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +33 -34
- validmind/tests/model_validation/statsmodels/LJungBox.py +32 -28
- validmind/tests/model_validation/statsmodels/Lilliefors.py +27 -24
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +87 -119
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +100 -0
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +14 -9
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +17 -13
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +46 -43
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +38 -36
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +30 -28
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +18 -11
- validmind/tests/model_validation/statsmodels/RunsTest.py +32 -28
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +75 -107
- validmind/tests/model_validation/statsmodels/ShapiroWilk.py +15 -8
- validmind/tests/ongoing_monitoring/FeatureDrift.py +10 -6
- validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +31 -25
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +29 -21
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +31 -23
- validmind/tests/prompt_validation/Bias.py +14 -11
- validmind/tests/prompt_validation/Clarity.py +16 -14
- validmind/tests/prompt_validation/Conciseness.py +7 -5
- validmind/tests/prompt_validation/Delimitation.py +23 -22
- validmind/tests/prompt_validation/NegativeInstruction.py +7 -5
- validmind/tests/prompt_validation/Robustness.py +12 -10
- validmind/tests/prompt_validation/Specificity.py +13 -11
- validmind/tests/prompt_validation/ai_powered_test.py +6 -0
- validmind/tests/run.py +68 -23
- validmind/unit_metrics/__init__.py +81 -144
- validmind/unit_metrics/classification/{sklearn/Accuracy.py → Accuracy.py} +1 -1
- validmind/unit_metrics/classification/{sklearn/F1.py → F1.py} +1 -1
- validmind/unit_metrics/classification/{sklearn/Precision.py → Precision.py} +1 -1
- validmind/unit_metrics/classification/{sklearn/ROC_AUC.py → ROC_AUC.py} +1 -2
- validmind/unit_metrics/classification/{sklearn/Recall.py → Recall.py} +1 -1
- validmind/unit_metrics/regression/{sklearn/AdjustedRSquaredScore.py → AdjustedRSquaredScore.py} +1 -1
- validmind/unit_metrics/regression/GiniCoefficient.py +1 -1
- validmind/unit_metrics/regression/HuberLoss.py +1 -1
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -1
- validmind/unit_metrics/regression/{sklearn/MeanAbsoluteError.py → MeanAbsoluteError.py} +1 -1
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -1
- validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -1
- validmind/unit_metrics/regression/{sklearn/MeanSquaredError.py → MeanSquaredError.py} +1 -1
- validmind/unit_metrics/regression/QuantileLoss.py +1 -1
- validmind/unit_metrics/regression/{sklearn/RSquaredScore.py → RSquaredScore.py} +1 -1
- validmind/unit_metrics/regression/{sklearn/RootMeanSquaredError.py → RootMeanSquaredError.py} +1 -1
- validmind/vm_models/dataset/dataset.py +2 -0
- validmind/vm_models/figure.py +5 -0
- validmind/vm_models/test/result_wrapper.py +93 -132
- {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/METADATA +1 -1
- {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/RECORD +203 -210
- validmind/tests/data_validation/ANOVAOneWayTable.py +0 -138
- validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -142
- validmind/tests/data_validation/BivariateHistograms.py +0 -117
- validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -124
- validmind/tests/data_validation/MissingValuesRisk.py +0 -88
- validmind/tests/model_validation/ModelMetadataComparison.py +0 -59
- validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -83
- validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -135
- validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -103
- {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/LICENSE +0 -0
- {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/WHEEL +0 -0
- {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/entry_points.txt +0 -0
@@ -3,109 +3,80 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import itertools
|
6
|
-
from dataclasses import dataclass
|
7
6
|
|
8
7
|
import plotly.express as px
|
9
8
|
|
10
|
-
from validmind
|
9
|
+
from validmind import tags, tasks
|
11
10
|
|
12
11
|
|
13
|
-
@
|
14
|
-
|
12
|
+
@tags("tabular_data", "numerical_data", "visualization")
|
13
|
+
@tasks("classification")
|
14
|
+
def BivariateScatterPlots(dataset):
|
15
15
|
"""
|
16
|
-
Generates bivariate scatterplots to visually inspect relationships between pairs of predictor variables
|
17
|
-
learning classification tasks.
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
- Scatterplots are
|
49
|
-
|
50
|
-
-
|
51
|
-
|
16
|
+
Generates bivariate scatterplots to visually inspect relationships between pairs of numerical predictor variables
|
17
|
+
in machine learning classification tasks.
|
18
|
+
|
19
|
+
### Purpose
|
20
|
+
|
21
|
+
This function is intended for visual inspection and monitoring of relationships between pairs of numerical
|
22
|
+
variables in a machine learning model targeting classification tasks. It helps in understanding how predictor
|
23
|
+
variables (features) interact with each other, which can inform feature selection, model-building strategies, and
|
24
|
+
identify potential biases or irregularities in the data.
|
25
|
+
|
26
|
+
### Test Mechanism
|
27
|
+
|
28
|
+
The function creates scatter plots for each pair of numerical features in the dataset. It first filters out
|
29
|
+
non-numerical and binary features, ensuring the plots focus on meaningful numerical relationships. The resulting
|
30
|
+
scatterplots are color-coded uniformly to avoid visual distraction, and the function returns a tuple of Plotly
|
31
|
+
figure objects, each representing a scatter plot for a pair of features.
|
32
|
+
|
33
|
+
### Signs of High Risk
|
34
|
+
|
35
|
+
- Visual patterns suggesting non-linear relationships, multicollinearity, clustering, or outlier points in the
|
36
|
+
scatter plots.
|
37
|
+
- Such issues could affect the assumptions and performance of certain models, especially those assuming linearity,
|
38
|
+
like logistic regression.
|
39
|
+
|
40
|
+
### Strengths
|
41
|
+
|
42
|
+
- Scatterplots provide an intuitive and visual tool to explore relationships between two variables.
|
43
|
+
- They are useful for identifying outliers, variable associations, and trends, including non-linear patterns.
|
44
|
+
- Supports visualization of binary or multi-class classification datasets, focusing on numerical features.
|
45
|
+
|
46
|
+
### Limitations
|
47
|
+
|
48
|
+
- Scatterplots are limited to bivariate analysis, showing relationships between only two variables at a time.
|
49
|
+
- Not ideal for very large datasets where overlapping points can reduce the clarity of the visualization.
|
50
|
+
- Scatterplots are exploratory tools and do not provide quantitative measures of model quality or performance.
|
51
|
+
- Interpretation is subjective and relies on the domain knowledge and judgment of the viewer.
|
52
52
|
"""
|
53
|
+
figures = []
|
53
54
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
"categorical_data",
|
61
|
-
"binary_classification",
|
62
|
-
"multiclass_classification",
|
63
|
-
"visualization",
|
55
|
+
# Select numerical features
|
56
|
+
features = dataset.feature_columns_numeric
|
57
|
+
|
58
|
+
# Select non-binary features
|
59
|
+
features = [
|
60
|
+
feature for feature in features if len(dataset.df[feature].unique()) > 2
|
64
61
|
]
|
65
62
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
Figure(for_object=self, key=f"{self.key}:{x}_{y}", figure=fig)
|
87
|
-
)
|
88
|
-
|
89
|
-
return figures
|
90
|
-
|
91
|
-
def run(self):
|
92
|
-
selected_columns = self.params["selected_columns"]
|
93
|
-
|
94
|
-
if selected_columns is None:
|
95
|
-
# Use all columns if selected_columns is not provided
|
96
|
-
selected_columns = self.inputs.dataset.df.columns.tolist()
|
97
|
-
else:
|
98
|
-
# Check if all selected columns exist in the dataframe
|
99
|
-
missing_columns = [
|
100
|
-
col
|
101
|
-
for col in selected_columns
|
102
|
-
if col not in self.inputs.dataset.df.columns
|
103
|
-
]
|
104
|
-
if missing_columns:
|
105
|
-
raise ValueError(
|
106
|
-
f"The following selected columns are not in the dataframe: {missing_columns}"
|
107
|
-
)
|
108
|
-
|
109
|
-
figures = self.plot_bivariate_scatter(selected_columns)
|
110
|
-
|
111
|
-
return self.cache_results(figures=figures)
|
63
|
+
df = dataset.df[features]
|
64
|
+
|
65
|
+
# Generate all pairs of columns
|
66
|
+
features_pairs = list(itertools.combinations(df.columns, 2))
|
67
|
+
|
68
|
+
for x, y in features_pairs:
|
69
|
+
fig = px.scatter(
|
70
|
+
df,
|
71
|
+
x=x,
|
72
|
+
y=y,
|
73
|
+
title=f"{x} and {y}",
|
74
|
+
labels={x: x, y: y},
|
75
|
+
opacity=0.7,
|
76
|
+
color_discrete_sequence=["blue"], # Use the same color for all points
|
77
|
+
)
|
78
|
+
fig.update_traces(marker=dict(color="blue"))
|
79
|
+
|
80
|
+
figures.append(fig)
|
81
|
+
|
82
|
+
return tuple(figures)
|
@@ -2,140 +2,98 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from dataclasses import dataclass
|
6
5
|
|
7
6
|
import pandas as pd
|
8
7
|
from scipy.stats import chi2_contingency
|
9
8
|
|
10
|
-
from validmind
|
9
|
+
from validmind import tags, tasks
|
11
10
|
|
12
11
|
|
13
|
-
@
|
14
|
-
|
12
|
+
@tags("tabular_data", "categorical_data", "statistical_test")
|
13
|
+
@tasks("classification")
|
14
|
+
def ChiSquaredFeaturesTable(dataset, p_threshold=0.05):
|
15
15
|
"""
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
- The
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
-
|
51
|
-
|
16
|
+
Assesses the statistical association between categorical features and a target variable using the Chi-Squared test.
|
17
|
+
|
18
|
+
### Purpose
|
19
|
+
|
20
|
+
The `ChiSquaredFeaturesTable` function is designed to evaluate the relationship between categorical features and a
|
21
|
+
target variable in a dataset. It performs a Chi-Squared test of independence for each categorical feature to
|
22
|
+
determine whether a statistically significant association exists with the target variable. This is particularly
|
23
|
+
useful in Model Risk Management for understanding the relevance of features and identifying potential biases in a
|
24
|
+
classification model.
|
25
|
+
|
26
|
+
### Test Mechanism
|
27
|
+
|
28
|
+
The function creates a contingency table for each categorical feature and the target variable, then applies the
|
29
|
+
Chi-Squared test to compute the Chi-squared statistic and the p-value. The results for each feature include the
|
30
|
+
variable name, Chi-squared statistic, p-value, p-value threshold, and a pass/fail status based on whether the
|
31
|
+
p-value is below the specified threshold. The output is a DataFrame summarizing these results, sorted by p-value to
|
32
|
+
highlight the most statistically significant associations.
|
33
|
+
|
34
|
+
### Signs of High Risk
|
35
|
+
|
36
|
+
- High p-values (greater than the set threshold) indicate a lack of significant association between a feature and
|
37
|
+
the target variable, resulting in a 'Fail' status.
|
38
|
+
- Features with a 'Fail' status might not be relevant for the model, which could negatively impact model
|
39
|
+
performance.
|
40
|
+
|
41
|
+
### Strengths
|
42
|
+
|
43
|
+
- Provides a clear, statistical assessment of the relationship between categorical features and the target variable.
|
44
|
+
- Produces an easily interpretable summary with a 'Pass/Fail' outcome for each feature, helping in feature
|
45
|
+
selection.
|
46
|
+
- The p-value threshold is adjustable, allowing for flexibility in statistical rigor.
|
47
|
+
|
48
|
+
### Limitations
|
49
|
+
|
50
|
+
- Assumes the dataset is tabular and consists of categorical variables, which may not be suitable for all datasets.
|
51
|
+
- The test is designed for classification tasks and is not applicable to regression problems.
|
52
|
+
- As with all hypothesis tests, the Chi-Squared test can only detect associations, not causal relationships.
|
53
|
+
- The choice of p-value threshold can affect the interpretation of feature relevance, and different thresholds may
|
54
|
+
lead to different conclusions.
|
52
55
|
"""
|
53
56
|
|
54
|
-
|
55
|
-
required_inputs = ["dataset"]
|
56
|
-
default_params = {"cat_features": None, "p_threshold": 0.05}
|
57
|
-
tasks = ["classification"]
|
58
|
-
tags = [
|
59
|
-
"tabular_data",
|
60
|
-
"categorical_data",
|
61
|
-
"statistical_test",
|
62
|
-
"binary_classification",
|
63
|
-
"multiclass_classification",
|
64
|
-
]
|
65
|
-
|
66
|
-
def run(self):
|
67
|
-
target_column = self.inputs.dataset.target_column
|
68
|
-
cat_features = self.params["cat_features"]
|
69
|
-
p_threshold = self.params["p_threshold"]
|
70
|
-
|
71
|
-
# Ensure cat_features is provided
|
72
|
-
if not cat_features:
|
73
|
-
cat_features = self.inputs.dataset.feature_columns_categorical
|
74
|
-
|
75
|
-
df = self.inputs.dataset.df
|
76
|
-
|
77
|
-
chi_squared_results = self.chi_squared_categorical_feature_selection(
|
78
|
-
df, cat_features, target_column, p_threshold
|
79
|
-
)
|
57
|
+
target_column = dataset.target_column
|
80
58
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
59
|
+
features = dataset.feature_columns_categorical
|
60
|
+
|
61
|
+
results_df = _chi_squared_categorical_feature_selection(
|
62
|
+
dataset.df, features, target_column, p_threshold
|
63
|
+
)
|
64
|
+
|
65
|
+
return results_df
|
86
66
|
|
87
|
-
def chi_squared_categorical_feature_selection(
|
88
|
-
self, df, cat_features, target, p_threshold
|
89
|
-
):
|
90
|
-
# Ensure the columns exist in the dataframe
|
91
|
-
for var in cat_features:
|
92
|
-
if var not in df.columns:
|
93
|
-
raise ValueError(f"The column '{var}' does not exist in the dataframe.")
|
94
|
-
if target not in df.columns:
|
95
|
-
raise ValueError(
|
96
|
-
f"The target column '{target}' does not exist in the dataframe."
|
97
|
-
)
|
98
|
-
|
99
|
-
results = []
|
100
|
-
|
101
|
-
for var in cat_features:
|
102
|
-
# Create a contingency table
|
103
|
-
contingency_table = pd.crosstab(df[var], df[target])
|
104
|
-
|
105
|
-
# Perform the Chi-Square test
|
106
|
-
chi2, p, _, _ = chi2_contingency(contingency_table)
|
107
|
-
|
108
|
-
# Add the result to the list of results
|
109
|
-
results.append(
|
110
|
-
[var, chi2, p, p_threshold, "Pass" if p <= p_threshold else "Fail"]
|
111
|
-
)
|
112
|
-
|
113
|
-
# Convert results to a DataFrame and return
|
114
|
-
results_df = pd.DataFrame(
|
115
|
-
results,
|
116
|
-
columns=[
|
117
|
-
"Variable",
|
118
|
-
"Chi-squared statistic",
|
119
|
-
"p-value",
|
120
|
-
"Threshold",
|
121
|
-
"Pass/Fail",
|
122
|
-
],
|
123
|
-
)
|
124
67
|
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
)
|
140
|
-
]
|
68
|
+
def _chi_squared_categorical_feature_selection(df, features, target, p_threshold):
|
69
|
+
|
70
|
+
results = []
|
71
|
+
|
72
|
+
for var in features:
|
73
|
+
# Create a contingency table
|
74
|
+
contingency_table = pd.crosstab(df[var], df[target])
|
75
|
+
|
76
|
+
# Perform the Chi-Square test
|
77
|
+
chi2, p, _, _ = chi2_contingency(contingency_table)
|
78
|
+
|
79
|
+
# Add the result to the list of results
|
80
|
+
results.append(
|
81
|
+
[var, chi2, p, p_threshold, "Pass" if p <= p_threshold else "Fail"]
|
141
82
|
)
|
83
|
+
|
84
|
+
# Convert results to a DataFrame and return
|
85
|
+
results_df = pd.DataFrame(
|
86
|
+
results,
|
87
|
+
columns=[
|
88
|
+
"Variable",
|
89
|
+
"Chi-squared statistic",
|
90
|
+
"p-value",
|
91
|
+
"Threshold",
|
92
|
+
"Pass/Fail",
|
93
|
+
],
|
94
|
+
)
|
95
|
+
|
96
|
+
# Sort by p-value in ascending order
|
97
|
+
results_df = results_df.sort_values(by="p-value")
|
98
|
+
|
99
|
+
return results_df
|
@@ -28,17 +28,20 @@ class ClassImbalance(ThresholdTest):
|
|
28
28
|
"""
|
29
29
|
Evaluates and quantifies class distribution imbalance in a dataset used by a machine learning model.
|
30
30
|
|
31
|
-
|
32
|
-
utilized by a machine learning model. Specifically, it aims to ensure that the classes aren't overly skewed, which
|
33
|
-
could lead to bias in the model's predictions. It's crucial to have a balanced training dataset to avoid creating a
|
34
|
-
model that's biased with high accuracy for the majority class and low accuracy for the minority class.
|
31
|
+
### Purpose
|
35
32
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
33
|
+
The Class Imbalance test is designed to evaluate the distribution of target classes in a dataset that's utilized by
|
34
|
+
a machine learning model. Specifically, it aims to ensure that the classes aren't overly skewed, which could lead
|
35
|
+
to bias in the model's predictions. It's crucial to have a balanced training dataset to avoid creating a model
|
36
|
+
that's biased with high accuracy for the majority class and low accuracy for the minority class.
|
40
37
|
|
41
|
-
|
38
|
+
### Test Mechanism
|
39
|
+
|
40
|
+
This Class Imbalance test operates by calculating the frequency (expressed as a percentage) of each class in the
|
41
|
+
target column of the dataset. It then checks whether each class appears in at least a set minimum percentage of the
|
42
|
+
total records. This minimum percentage is a modifiable parameter, but the default value is set to 10%.
|
43
|
+
|
44
|
+
### Signs of High Risk
|
42
45
|
|
43
46
|
- Any class that represents less than the pre-set minimum percentage threshold is marked as high risk, implying a
|
44
47
|
potential class imbalance.
|
@@ -46,7 +49,7 @@ class ClassImbalance(ThresholdTest):
|
|
46
49
|
- Fundamentally, if any class fails this test, it's highly likely that the dataset possesses imbalanced class
|
47
50
|
distribution.
|
48
51
|
|
49
|
-
|
52
|
+
### Strengths
|
50
53
|
|
51
54
|
- The test can spot under-represented classes that could affect the efficiency of a machine learning model.
|
52
55
|
- The calculation is straightforward and swift.
|
@@ -56,7 +59,7 @@ class ClassImbalance(ThresholdTest):
|
|
56
59
|
- The test creates a visually insightful plot showing the classes and their corresponding proportions, enhancing
|
57
60
|
interpretability and comprehension of the data.
|
58
61
|
|
59
|
-
|
62
|
+
### Limitations
|
60
63
|
|
61
64
|
- The test might struggle to perform well or provide vital insights for datasets with a high number of classes. In
|
62
65
|
such cases, the imbalance could be inevitable due to the inherent class distribution.
|
@@ -66,7 +69,7 @@ class ClassImbalance(ThresholdTest):
|
|
66
69
|
different classes, which might fluctuate based on specific applications or domains.
|
67
70
|
- While it can identify imbalances in class distribution, it doesn't provide direct methods to address or correct
|
68
71
|
these imbalances.
|
69
|
-
- The test is only applicable for classification
|
72
|
+
- The test is only applicable for classification operations and unsuitable for regression or clustering tasks.
|
70
73
|
"""
|
71
74
|
|
72
75
|
# Changing the name test to avoid a name clash
|
@@ -17,32 +17,38 @@ logger = get_logger(__name__)
|
|
17
17
|
@dataclass
|
18
18
|
class DFGLSArch(Metric):
|
19
19
|
"""
|
20
|
-
|
20
|
+
Assesses stationarity in time series data using the Dickey-Fuller GLS test to determine the order of integration.
|
21
21
|
|
22
|
-
|
23
|
-
series data. For machine learning models dealing with time series and forecasting, this metric evaluates the
|
24
|
-
existence of a unit root, thereby checking whether a time series is non-stationary. This analysis is a crucial
|
25
|
-
initial step when dealing with time series data.
|
22
|
+
### Purpose
|
26
23
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
24
|
+
The Dickey-Fuller GLS (DFGLS) test is utilized to determine the order of integration in time series data. For
|
25
|
+
machine learning models dealing with time series and forecasting, this metric evaluates the existence of a unit
|
26
|
+
root, thereby checking whether a time series is non-stationary. This analysis is a crucial initial step when
|
27
|
+
dealing with time series data.
|
28
|
+
|
29
|
+
### Test Mechanism
|
30
|
+
|
31
|
+
This code implements the Dickey-Fuller GLS unit root test on each attribute of the dataset. This process involves
|
32
|
+
iterating through every column of the dataset and applying the DFGLS test to assess the presence of a unit root.
|
33
|
+
The resulting information, including the test statistic ('stat'), the p-value ('pvalue'), the quantity of lagged
|
34
|
+
differences utilized in the regression ('usedlag'), and the number of observations ('nobs'), is subsequently stored.
|
35
|
+
|
36
|
+
### Signs of High Risk
|
32
37
|
|
33
|
-
**Signs of High Risk**:
|
34
38
|
- A high p-value for the DFGLS test represents a high risk. Specifically, a p-value above a typical threshold of
|
35
39
|
0.05 suggests that the time series data is quite likely to be non-stationary, thus presenting a high risk for
|
36
40
|
generating unreliable forecasts.
|
37
41
|
|
38
|
-
|
42
|
+
### Strengths
|
43
|
+
|
39
44
|
- The Dickey-Fuller GLS test is a potent tool for checking the stationarity of time series data.
|
40
45
|
- It helps to verify the assumptions of the models before the actual construction of the machine learning models
|
41
46
|
proceeds.
|
42
47
|
- The results produced by this metric offer a clear insight into whether the data is appropriate for specific
|
43
48
|
machine learning models, especially those demanding the stationarity of time series data.
|
44
49
|
|
45
|
-
|
50
|
+
### Limitations
|
51
|
+
|
46
52
|
- Despite its benefits, the DFGLS test does present some drawbacks. It can potentially lead to inaccurate
|
47
53
|
conclusions if the time series data incorporates a structural break.
|
48
54
|
- If the time series tends to follow a trend while still being stationary, the test might misinterpret it,
|
@@ -25,42 +25,48 @@ class DatasetDescription(Metric):
|
|
25
25
|
"""
|
26
26
|
Provides comprehensive analysis and statistical summaries of each field in a machine learning model's dataset.
|
27
27
|
|
28
|
-
|
28
|
+
### Purpose
|
29
|
+
|
29
30
|
The test depicted in the script is meant to run a comprehensive analysis on a Machine Learning model's datasets.
|
30
31
|
The test or metric is implemented to obtain a complete summary of the fields in the dataset, including vital
|
31
32
|
statistics of each field such as count, distinct values, missing values, histograms for numerical, categorical,
|
32
33
|
boolean, and text fields. This summary gives a comprehensive overview of the dataset to better understand the
|
33
34
|
characteristics of the data that the model is trained on or evaluates.
|
34
35
|
|
35
|
-
|
36
|
+
### Test Mechanism
|
37
|
+
|
36
38
|
The DatasetDescription class accomplishes the purpose as follows: firstly, the test method "run" infers the data
|
37
|
-
type of each column in the dataset and stores the details (id, column type). For each field,
|
39
|
+
type of each column in the dataset and stores the details (id, column type). For each field, the
|
38
40
|
"describe_dataset_field" method is invoked to collect statistical information about the field, including count,
|
39
41
|
missing value count and its proportion to the total, unique value count, and its proportion to the total. Depending
|
40
42
|
on the data type of a field, histograms are generated that reflect the distribution of data within the field.
|
41
|
-
Numerical fields use "get_numerical_histograms" method to calculate histogram distribution, whereas for
|
43
|
+
Numerical fields use the "get_numerical_histograms" method to calculate histogram distribution, whereas for
|
42
44
|
categorical, boolean and text fields, a histogram is computed with frequencies of each unique value in the
|
43
45
|
datasets. For unsupported types, an error is raised. Lastly, a summary table is built to aggregate all the
|
44
46
|
statistical insights and histograms of the fields in a dataset.
|
45
47
|
|
46
|
-
|
47
|
-
|
48
|
+
### Signs of High Risk
|
49
|
+
|
50
|
+
- High ratio of missing values to total values in one or more fields which may impact the quality of the
|
51
|
+
predictions.
|
48
52
|
- Unsupported data types in dataset fields.
|
49
53
|
- Large number of unique values in the dataset's fields which might make it harder for the model to establish
|
50
54
|
patterns.
|
51
55
|
- Extreme skewness or irregular distribution of data as reflected in the histograms.
|
52
56
|
|
53
|
-
|
54
|
-
|
57
|
+
### Strengths
|
58
|
+
|
59
|
+
- Provides a detailed analysis of the dataset with versatile summaries like count, unique values, histograms, etc.
|
55
60
|
- Flexibility in handling different types of data: numerical, categorical, boolean, and text.
|
56
61
|
- Useful in detecting problems in the dataset like missing values, unsupported data types, irregular data
|
57
|
-
distribution etc.
|
62
|
+
distribution, etc.
|
58
63
|
- The summary gives a comprehensive understanding of dataset features allowing developers to make informed
|
59
64
|
decisions.
|
60
65
|
|
61
|
-
|
66
|
+
### Limitations
|
67
|
+
|
62
68
|
- The computation can be expensive from a resource standpoint, particularly for large datasets with numerous fields.
|
63
|
-
- The histograms use arbitrary number of bins which may not be the optimal number of bins for specific data
|
69
|
+
- The histograms use an arbitrary number of bins which may not be the optimal number of bins for specific data
|
64
70
|
distribution.
|
65
71
|
- Unsupported data types for columns will raise an error which may limit evaluating the dataset.
|
66
72
|
- Fields with all null or missing values are not included in histogram computation.
|
@@ -10,26 +10,28 @@ class DatasetSplit(Metric):
|
|
10
10
|
Evaluates and visualizes the distribution proportions among training, testing, and validation datasets of an ML
|
11
11
|
model.
|
12
12
|
|
13
|
-
|
13
|
+
### Purpose
|
14
|
+
|
14
15
|
The DatasetSplit test is designed to evaluate and visualize the distribution of data among training, testing, and
|
15
16
|
validation datasets, if available, within a given machine learning model. The main purpose is to assess whether the
|
16
17
|
model's datasets are split appropriately, as an imbalanced split might affect the model's ability to learn from the
|
17
18
|
data and generalize to unseen data.
|
18
19
|
|
19
|
-
|
20
|
+
### Test Mechanism
|
21
|
+
|
20
22
|
The DatasetSplit test first calculates the total size of all available datasets in the model. Then, for each
|
21
23
|
individual dataset, the methodology involves determining the size of the dataset and its proportion relative to the
|
22
24
|
total size. The results are then conveniently summarized in a table that shows dataset names, sizes, and
|
23
25
|
proportions. Absolute size and proportion of the total dataset size are displayed for each individual dataset.
|
24
26
|
|
25
|
-
|
27
|
+
### Signs of High Risk
|
26
28
|
|
27
29
|
- A very small training dataset, which may result in the model not learning enough from the data.
|
28
30
|
- A very large training dataset and a small test dataset, which may lead to model overfitting and poor
|
29
31
|
generalization to unseen data.
|
30
32
|
- A small or non-existent validation dataset, which might complicate the model's performance assessment.
|
31
33
|
|
32
|
-
|
34
|
+
### Strengths
|
33
35
|
|
34
36
|
- The DatasetSplit test provides a clear, understandable visualization of dataset split proportions, which can
|
35
37
|
highlight any potential imbalance in dataset splits quickly.
|
@@ -37,7 +39,7 @@ class DatasetSplit(Metric):
|
|
37
39
|
- The metric is not tied to any specific data type and is applicable to tabular data, time series data, or text
|
38
40
|
data.
|
39
41
|
|
40
|
-
|
42
|
+
### Limitations
|
41
43
|
|
42
44
|
- The DatasetSplit test does not provide any insight into the quality or diversity of the data within each split,
|
43
45
|
just the size and proportion.
|