validmind 2.5.8__py3-none-any.whl → 2.5.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +80 -119
- validmind/ai/test_result_description/config.yaml +29 -0
- validmind/ai/test_result_description/context.py +73 -0
- validmind/ai/test_result_description/image_processing.py +124 -0
- validmind/ai/test_result_description/system.jinja +39 -0
- validmind/ai/test_result_description/user.jinja +25 -0
- validmind/api_client.py +89 -43
- validmind/client.py +2 -2
- validmind/client_config.py +11 -14
- validmind/datasets/credit_risk/__init__.py +1 -0
- validmind/datasets/credit_risk/datasets/lending_club_biased.csv.gz +0 -0
- validmind/datasets/credit_risk/lending_club_bias.py +142 -0
- validmind/datasets/regression/fred_timeseries.py +67 -138
- validmind/template.py +1 -0
- validmind/test_suites/__init__.py +0 -2
- validmind/test_suites/statsmodels_timeseries.py +1 -1
- validmind/test_suites/summarization.py +0 -1
- validmind/test_suites/time_series.py +0 -43
- validmind/tests/__types__.py +14 -15
- validmind/tests/data_validation/ACFandPACFPlot.py +15 -13
- validmind/tests/data_validation/ADF.py +31 -24
- validmind/tests/data_validation/AutoAR.py +9 -9
- validmind/tests/data_validation/AutoMA.py +23 -16
- validmind/tests/data_validation/AutoSeasonality.py +18 -16
- validmind/tests/data_validation/AutoStationarity.py +21 -16
- validmind/tests/data_validation/BivariateScatterPlots.py +67 -96
- validmind/tests/{model_validation/statsmodels → data_validation}/BoxPierce.py +34 -34
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +85 -124
- validmind/tests/data_validation/ClassImbalance.py +15 -12
- validmind/tests/data_validation/DFGLSArch.py +19 -13
- validmind/tests/data_validation/DatasetDescription.py +17 -11
- validmind/tests/data_validation/DatasetSplit.py +7 -5
- validmind/tests/data_validation/DescriptiveStatistics.py +28 -21
- validmind/tests/data_validation/Duplicates.py +33 -25
- validmind/tests/data_validation/EngleGrangerCoint.py +35 -33
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +59 -71
- validmind/tests/data_validation/HighCardinality.py +19 -12
- validmind/tests/data_validation/HighPearsonCorrelation.py +27 -22
- validmind/tests/data_validation/IQROutliersBarPlot.py +13 -10
- validmind/tests/data_validation/IQROutliersTable.py +40 -36
- validmind/tests/data_validation/IsolationForestOutliers.py +21 -14
- validmind/tests/data_validation/JarqueBera.py +70 -0
- validmind/tests/data_validation/KPSS.py +34 -29
- validmind/tests/data_validation/LJungBox.py +66 -0
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +22 -15
- validmind/tests/data_validation/MissingValues.py +32 -27
- validmind/tests/data_validation/MissingValuesBarPlot.py +25 -21
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +71 -84
- validmind/tests/data_validation/PhillipsPerronArch.py +37 -30
- validmind/tests/data_validation/ProtectedClassesCombination.py +197 -0
- validmind/tests/data_validation/ProtectedClassesDescription.py +130 -0
- validmind/tests/data_validation/ProtectedClassesDisparity.py +133 -0
- validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +172 -0
- validmind/tests/data_validation/RollingStatsPlot.py +31 -23
- validmind/tests/data_validation/RunsTest.py +72 -0
- validmind/tests/data_validation/ScatterPlot.py +63 -78
- validmind/tests/data_validation/SeasonalDecompose.py +38 -34
- validmind/tests/{model_validation/statsmodels → data_validation}/ShapiroWilk.py +35 -30
- validmind/tests/data_validation/Skewness.py +35 -37
- validmind/tests/data_validation/SpreadPlot.py +35 -35
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +23 -17
- validmind/tests/data_validation/TabularDateTimeHistograms.py +21 -13
- validmind/tests/data_validation/TabularDescriptionTables.py +51 -16
- validmind/tests/data_validation/TabularNumericalHistograms.py +25 -22
- validmind/tests/data_validation/TargetRateBarPlots.py +21 -14
- validmind/tests/data_validation/TimeSeriesDescription.py +25 -18
- validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +23 -17
- validmind/tests/data_validation/TimeSeriesFrequency.py +24 -17
- validmind/tests/data_validation/TimeSeriesHistogram.py +33 -32
- validmind/tests/data_validation/TimeSeriesLinePlot.py +17 -10
- validmind/tests/data_validation/TimeSeriesMissingValues.py +15 -10
- validmind/tests/data_validation/TimeSeriesOutliers.py +37 -33
- validmind/tests/data_validation/TooManyZeroValues.py +16 -11
- validmind/tests/data_validation/UniqueRows.py +11 -6
- validmind/tests/data_validation/WOEBinPlots.py +23 -16
- validmind/tests/data_validation/WOEBinTable.py +35 -30
- validmind/tests/data_validation/ZivotAndrewsArch.py +34 -28
- validmind/tests/data_validation/nlp/CommonWords.py +21 -14
- validmind/tests/data_validation/nlp/Hashtags.py +42 -40
- validmind/tests/data_validation/nlp/LanguageDetection.py +33 -14
- validmind/tests/data_validation/nlp/Mentions.py +21 -15
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +32 -9
- validmind/tests/data_validation/nlp/Punctuations.py +24 -20
- validmind/tests/data_validation/nlp/Sentiment.py +27 -8
- validmind/tests/data_validation/nlp/StopWords.py +26 -19
- validmind/tests/data_validation/nlp/TextDescription.py +39 -36
- validmind/tests/data_validation/nlp/Toxicity.py +32 -9
- validmind/tests/decorator.py +81 -42
- validmind/tests/model_validation/BertScore.py +36 -27
- validmind/tests/model_validation/BleuScore.py +25 -19
- validmind/tests/model_validation/ClusterSizeDistribution.py +38 -34
- validmind/tests/model_validation/ContextualRecall.py +38 -13
- validmind/tests/model_validation/FeaturesAUC.py +32 -13
- validmind/tests/model_validation/MeteorScore.py +46 -33
- validmind/tests/model_validation/ModelMetadata.py +32 -64
- validmind/tests/model_validation/ModelPredictionResiduals.py +75 -73
- validmind/tests/model_validation/RegardScore.py +30 -14
- validmind/tests/model_validation/RegressionResidualsPlot.py +10 -5
- validmind/tests/model_validation/RougeScore.py +36 -30
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +30 -14
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +27 -30
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +68 -63
- validmind/tests/model_validation/TokenDisparity.py +31 -23
- validmind/tests/model_validation/ToxicityScore.py +26 -17
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +24 -20
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +30 -27
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +7 -5
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +32 -23
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +7 -5
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +15 -11
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +29 -29
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +34 -25
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +38 -26
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +40 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +18 -17
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +40 -45
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +17 -19
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +29 -25
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +38 -28
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -4
- validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
- validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
- validmind/tests/model_validation/ragas/AspectCritique.py +12 -6
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +9 -8
- validmind/tests/model_validation/ragas/ContextPrecision.py +5 -4
- validmind/tests/model_validation/ragas/ContextRecall.py +5 -4
- validmind/tests/model_validation/ragas/ContextUtilization.py +155 -0
- validmind/tests/model_validation/ragas/Faithfulness.py +5 -4
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +152 -0
- validmind/tests/model_validation/ragas/utils.py +6 -0
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +19 -12
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +22 -17
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +27 -25
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +7 -5
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +40 -78
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +15 -17
- validmind/tests/model_validation/sklearn/CompletenessScore.py +17 -11
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -15
- validmind/tests/model_validation/sklearn/FeatureImportance.py +95 -0
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +7 -7
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +19 -12
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +35 -30
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +10 -5
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +32 -32
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +23 -23
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +15 -10
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +26 -19
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +38 -18
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +32 -26
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +8 -6
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +24 -17
- validmind/tests/model_validation/sklearn/ROCCurve.py +12 -7
- validmind/tests/model_validation/sklearn/RegressionErrors.py +74 -130
- validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +27 -12
- validmind/tests/model_validation/sklearn/{RegressionModelsPerformanceComparison.py → RegressionPerformance.py} +18 -20
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +55 -94
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +32 -13
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +36 -32
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +66 -5
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +27 -19
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +25 -18
- validmind/tests/model_validation/sklearn/VMeasure.py +14 -13
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +7 -5
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +24 -18
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +73 -104
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +59 -32
- validmind/tests/model_validation/statsmodels/GINITable.py +44 -77
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +33 -34
- validmind/tests/model_validation/statsmodels/Lilliefors.py +27 -24
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +86 -119
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +100 -0
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +14 -9
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +17 -13
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +46 -43
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +38 -36
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +30 -28
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +18 -11
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +75 -107
- validmind/tests/ongoing_monitoring/FeatureDrift.py +10 -6
- validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +31 -25
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +29 -21
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +31 -23
- validmind/tests/prompt_validation/Bias.py +14 -11
- validmind/tests/prompt_validation/Clarity.py +16 -14
- validmind/tests/prompt_validation/Conciseness.py +7 -5
- validmind/tests/prompt_validation/Delimitation.py +23 -22
- validmind/tests/prompt_validation/NegativeInstruction.py +7 -5
- validmind/tests/prompt_validation/Robustness.py +12 -10
- validmind/tests/prompt_validation/Specificity.py +13 -11
- validmind/tests/prompt_validation/ai_powered_test.py +6 -0
- validmind/tests/run.py +68 -23
- validmind/unit_metrics/__init__.py +81 -144
- validmind/unit_metrics/classification/{sklearn/Accuracy.py → Accuracy.py} +1 -1
- validmind/unit_metrics/classification/{sklearn/F1.py → F1.py} +1 -1
- validmind/unit_metrics/classification/{sklearn/Precision.py → Precision.py} +1 -1
- validmind/unit_metrics/classification/{sklearn/ROC_AUC.py → ROC_AUC.py} +1 -2
- validmind/unit_metrics/classification/{sklearn/Recall.py → Recall.py} +1 -1
- validmind/unit_metrics/regression/{sklearn/AdjustedRSquaredScore.py → AdjustedRSquaredScore.py} +1 -1
- validmind/unit_metrics/regression/GiniCoefficient.py +1 -1
- validmind/unit_metrics/regression/HuberLoss.py +1 -1
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -1
- validmind/unit_metrics/regression/{sklearn/MeanAbsoluteError.py → MeanAbsoluteError.py} +1 -1
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -1
- validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -1
- validmind/unit_metrics/regression/{sklearn/MeanSquaredError.py → MeanSquaredError.py} +1 -1
- validmind/unit_metrics/regression/QuantileLoss.py +1 -1
- validmind/unit_metrics/regression/{sklearn/RSquaredScore.py → RSquaredScore.py} +1 -1
- validmind/unit_metrics/regression/{sklearn/RootMeanSquaredError.py → RootMeanSquaredError.py} +1 -1
- validmind/utils.py +4 -0
- validmind/vm_models/dataset/dataset.py +2 -0
- validmind/vm_models/figure.py +5 -0
- validmind/vm_models/test/metric.py +1 -0
- validmind/vm_models/test/result_wrapper.py +143 -158
- validmind/vm_models/test/threshold_test.py +1 -0
- {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/METADATA +4 -3
- validmind-2.5.18.dist-info/RECORD +324 -0
- validmind/tests/data_validation/ANOVAOneWayTable.py +0 -138
- validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -142
- validmind/tests/data_validation/BivariateHistograms.py +0 -117
- validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -124
- validmind/tests/data_validation/MissingValuesRisk.py +0 -88
- validmind/tests/model_validation/ModelMetadataComparison.py +0 -59
- validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -83
- validmind/tests/model_validation/statsmodels/JarqueBera.py +0 -73
- validmind/tests/model_validation/statsmodels/LJungBox.py +0 -66
- validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -135
- validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -103
- validmind/tests/model_validation/statsmodels/RunsTest.py +0 -71
- validmind-2.5.8.dist-info/RECORD +0 -318
- {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/LICENSE +0 -0
- {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/WHEEL +0 -0
- {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/entry_points.txt +0 -0
@@ -2,38 +2,48 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
import pandas as pd
|
5
6
|
from scipy import stats
|
6
7
|
|
7
|
-
from validmind
|
8
|
+
from validmind import tags, tasks
|
8
9
|
|
9
10
|
|
10
|
-
|
11
|
+
@tasks("classification", "regression")
|
12
|
+
@tags("tabular_data", "data_distribution", "statistical_test")
|
13
|
+
def ShapiroWilk(dataset):
|
11
14
|
"""
|
12
15
|
Evaluates feature-wise normality of training data using the Shapiro-Wilk test.
|
13
16
|
|
14
|
-
|
15
|
-
|
17
|
+
### Purpose
|
18
|
+
|
19
|
+
The Shapiro-Wilk test is utilized to investigate whether a particular dataset conforms to the standard normal
|
20
|
+
distribution. This analysis is crucial in machine learning modeling because the normality of the data can
|
16
21
|
profoundly impact the performance of the model. This metric is especially useful in evaluating various features of
|
17
22
|
the dataset in both classification and regression tasks.
|
18
23
|
|
19
|
-
|
20
|
-
|
21
|
-
|
24
|
+
### Test Mechanism
|
25
|
+
|
26
|
+
The Shapiro-Wilk test is conducted on each feature column of the training dataset to determine if the data
|
27
|
+
contained fall within the normal distribution. The test presents a statistic and a p-value, with the p-value
|
28
|
+
serving to validate or repudiate the null hypothesis, which is that the tested data is normally distributed.
|
29
|
+
|
30
|
+
### Signs of High Risk
|
22
31
|
|
23
|
-
**Signs of High Risk**:
|
24
32
|
- A p-value that falls below 0.05 signifies a high risk as it discards the null hypothesis, indicating that the
|
25
33
|
data does not adhere to the normal distribution.
|
26
34
|
- For machine learning models built on the presumption of data normality, such an outcome could result in subpar
|
27
35
|
performance or incorrect predictions.
|
28
36
|
|
29
|
-
|
37
|
+
### Strengths
|
38
|
+
|
30
39
|
- The Shapiro-Wilk test is esteemed for its level of accuracy, thereby making it particularly well-suited to
|
31
40
|
datasets of small to moderate sizes.
|
32
41
|
- It proves its versatility through its efficient functioning in both classification and regression tasks.
|
33
42
|
- By separately testing each feature column, the Shapiro-Wilk test can raise an alarm if a specific feature does
|
34
43
|
not comply with the normality.
|
35
44
|
|
36
|
-
|
45
|
+
### Limitations
|
46
|
+
|
37
47
|
- The Shapiro-Wilk test's sensitivity can be a disadvantage as it often rejects the null hypothesis (i.e., data is
|
38
48
|
normally distributed), even for minor deviations, especially in large datasets. This may lead to unwarranted 'false
|
39
49
|
alarms' of high risk by deeming the data as not normally distributed even if it approximates normal distribution.
|
@@ -42,23 +52,18 @@ class ShapiroWilk(Metric):
|
|
42
52
|
- Lastly, the Shapiro-Wilk test is not optimally suited for processing data with pronounced skewness or kurtosis.
|
43
53
|
"""
|
44
54
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
"stat": sw_stat,
|
61
|
-
"pvalue": sw_pvalue,
|
62
|
-
}
|
63
|
-
|
64
|
-
return self.cache_results(sw_values)
|
55
|
+
df = dataset.df[dataset.feature_columns_numeric]
|
56
|
+
|
57
|
+
sw_values = {}
|
58
|
+
for col in df.columns:
|
59
|
+
sw_stat, sw_pvalue = stats.shapiro(df[col].values)
|
60
|
+
sw_values[col] = {
|
61
|
+
"stat": sw_stat,
|
62
|
+
"pvalue": sw_pvalue,
|
63
|
+
}
|
64
|
+
|
65
|
+
sw_df = pd.DataFrame.from_dict(sw_values, orient="index")
|
66
|
+
sw_df.reset_index(inplace=True)
|
67
|
+
sw_df.columns = ["column", "stat", "pvalue"]
|
68
|
+
|
69
|
+
return sw_df
|
@@ -20,43 +20,41 @@ from validmind.vm_models import (
|
|
20
20
|
@dataclass
|
21
21
|
class Skewness(ThresholdTest):
|
22
22
|
"""
|
23
|
-
Evaluates the skewness of numerical data in a
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
test
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
-
|
57
|
-
|
58
|
-
- The risk grading is largely dependent on a subjective threshold, which may result in excessive strictness or
|
59
|
-
leniency depending upon selection. This factor might require expert input and recurrent iterations for refinement.
|
23
|
+
Evaluates the skewness of numerical data in a dataset to check against a defined threshold, aiming to ensure data
|
24
|
+
quality and optimize model performance.
|
25
|
+
|
26
|
+
### Purpose
|
27
|
+
|
28
|
+
The purpose of the Skewness test is to measure the asymmetry in the distribution of data within a predictive
|
29
|
+
machine learning model. Specifically, it evaluates the divergence of said distribution from a normal distribution.
|
30
|
+
Understanding the level of skewness helps identify data quality issues, which are crucial for optimizing the
|
31
|
+
performance of traditional machine learning models in both classification and regression settings.
|
32
|
+
|
33
|
+
### Test Mechanism
|
34
|
+
|
35
|
+
This test calculates the skewness of numerical columns in the dataset, focusing specifically on numerical data
|
36
|
+
types. The calculated skewness value is then compared against a predetermined maximum threshold, which is set by
|
37
|
+
default to 1. If the skewness value is less than this maximum threshold, the test passes; otherwise, it fails. The
|
38
|
+
test results, along with the skewness values and column names, are then recorded for further analysis.
|
39
|
+
|
40
|
+
### Signs of High Risk
|
41
|
+
|
42
|
+
- Substantial skewness levels that significantly exceed the maximum threshold.
|
43
|
+
- Persistent skewness in the data, indicating potential issues with the foundational assumptions of the machine
|
44
|
+
learning model.
|
45
|
+
- Subpar model performance, erroneous predictions, or biased inferences due to skewed data distributions.
|
46
|
+
|
47
|
+
### Strengths
|
48
|
+
|
49
|
+
- Fast and efficient identification of unequal data distributions within a machine learning model.
|
50
|
+
- Adjustable maximum threshold parameter, allowing for customization based on user needs.
|
51
|
+
- Provides a clear quantitative measure to mitigate model risks related to data skewness.
|
52
|
+
|
53
|
+
### Limitations
|
54
|
+
|
55
|
+
- Only evaluates numeric columns, potentially missing skewness or bias in non-numeric data.
|
56
|
+
- Assumes that data should follow a normal distribution, which may not always be applicable to real-world data.
|
57
|
+
- Subjective threshold for risk grading, requiring expert input and recurrent iterations for refinement.
|
60
58
|
"""
|
61
59
|
|
62
60
|
name = "skewness"
|
@@ -10,46 +10,46 @@ from validmind.vm_models import Figure, Metric
|
|
10
10
|
|
11
11
|
class SpreadPlot(Metric):
|
12
12
|
"""
|
13
|
-
|
14
|
-
|
13
|
+
Assesses potential correlations between pairs of time series variables through visualization to enhance
|
14
|
+
understanding of their relationships.
|
15
15
|
|
16
|
-
|
17
|
-
The SpreadPlot metric is intended to graphically illustrate and analyse the relationships between pairs of time
|
18
|
-
series variables within a given dataset. This facilitated understanding helps in identifying and assessing
|
19
|
-
potential time series correlations, like cointegration, between the variables.
|
16
|
+
### Purpose
|
20
17
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
This method is iterated for each unique pair of variables in the dataset.
|
18
|
+
The SpreadPlot test aims to graphically illustrate and analyze the relationships between pairs of time series
|
19
|
+
variables within a given dataset. This facilitated understanding helps in identifying and assessing potential time
|
20
|
+
series correlations, such as cointegration, between the variables.
|
25
21
|
|
26
|
-
|
27
|
-
Potential indicators of high risk related to the SpreadPlot metric might include:
|
22
|
+
### Test Mechanism
|
28
23
|
|
29
|
-
|
30
|
-
|
31
|
-
variables
|
24
|
+
The SpreadPlot test computes and represents the spread between each pair of time series variables in the dataset.
|
25
|
+
Specifically, the difference between two variables is calculated and presented as a line graph. This process is
|
26
|
+
iterated for each unique pair of variables in the dataset, allowing for comprehensive visualization of their
|
27
|
+
relationships.
|
28
|
+
|
29
|
+
### Signs of High Risk
|
30
|
+
|
31
|
+
- Large fluctuations in the spread over a given timespan.
|
32
|
+
- Unexpected patterns or trends that may signal potential risks in the underlying correlations between the
|
33
|
+
variables.
|
32
34
|
- Presence of significant missing data or extreme outlier values, which could potentially skew the spread and
|
33
|
-
indicate high risk
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
-
|
39
|
-
-
|
40
|
-
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
-
|
47
|
-
|
48
|
-
-
|
49
|
-
|
50
|
-
-
|
51
|
-
plots
|
52
|
-
- It might not completely capture intricate non-linear relationships between the variables
|
35
|
+
indicate high risk.
|
36
|
+
|
37
|
+
### Strengths
|
38
|
+
|
39
|
+
- Allows for thorough visual examination and interpretation of the correlations between time-series pairs.
|
40
|
+
- Aids in revealing complex relationships like cointegration.
|
41
|
+
- Enhances interpretability by visualizing the relationships, thereby helping in spotting outliers and trends.
|
42
|
+
- Capable of handling numerous variable pairs from the dataset through a versatile and adaptable process.
|
43
|
+
|
44
|
+
### Limitations
|
45
|
+
|
46
|
+
- Primarily serves as a visualization tool and does not offer quantitative measurements or statistics to
|
47
|
+
objectively determine relationships.
|
48
|
+
- Heavily relies on the quality and granularity of the data—missing data or outliers can notably disturb the
|
49
|
+
interpretation of relationships.
|
50
|
+
- Can become inefficient or difficult to interpret with a high number of variables due to the profuse number of
|
51
|
+
plots.
|
52
|
+
- Might not completely capture intricate non-linear relationships between the variables.
|
53
53
|
"""
|
54
54
|
|
55
55
|
name = "spread_plot"
|
@@ -10,17 +10,21 @@ from validmind.vm_models import Figure, Metric
|
|
10
10
|
|
11
11
|
class TabularCategoricalBarPlots(Metric):
|
12
12
|
"""
|
13
|
-
Generates and visualizes bar plots for each category in categorical features to evaluate dataset's composition.
|
13
|
+
Generates and visualizes bar plots for each category in categorical features to evaluate the dataset's composition.
|
14
14
|
|
15
|
-
|
16
|
-
evaluate the dataset's composition by displaying the counts of each category in each categorical feature.
|
15
|
+
### Purpose
|
17
16
|
|
18
|
-
|
19
|
-
|
20
|
-
separate bar plot is generated. The number of occurrences for each category is calculated and displayed on the
|
21
|
-
plot. If a dataset contains multiple categorical columns, multiple bar plots are produced.
|
17
|
+
The purpose of this metric is to visually analyze categorical data using bar plots. It is intended to evaluate the
|
18
|
+
dataset's composition by displaying the counts of each category in each categorical feature.
|
22
19
|
|
23
|
-
|
20
|
+
### Test Mechanism
|
21
|
+
|
22
|
+
The provided dataset is first checked to determine if it contains any categorical variables. If no categorical
|
23
|
+
columns are found, the tool raises a ValueError. For each categorical variable in the dataset, a separate bar plot
|
24
|
+
is generated. The number of occurrences for each category is calculated and displayed on the plot. If a dataset
|
25
|
+
contains multiple categorical columns, multiple bar plots are produced.
|
26
|
+
|
27
|
+
### Signs of High Risk
|
24
28
|
|
25
29
|
- High risk could occur if the categorical variables exhibit an extreme imbalance, with categories having very few
|
26
30
|
instances possibly being underrepresented in the model, which could affect the model's performance and its ability
|
@@ -28,17 +32,19 @@ class TabularCategoricalBarPlots(Metric):
|
|
28
32
|
- Another sign of risk is if there are too many categories in a single variable, which could lead to overfitting
|
29
33
|
and make the model complex.
|
30
34
|
|
31
|
-
|
32
|
-
|
33
|
-
|
35
|
+
### Strengths
|
36
|
+
|
37
|
+
- Provides a visual and intuitively understandable representation of categorical data.
|
38
|
+
- Aids in the analysis of variable distributions.
|
39
|
+
- Helps in easily identifying imbalances or rare categories that could affect the model's performance.
|
34
40
|
|
35
|
-
|
41
|
+
### Limitations
|
36
42
|
|
37
|
-
- This method only works with categorical data
|
38
|
-
-
|
39
|
-
|
40
|
-
-
|
41
|
-
|
43
|
+
- This method only works with categorical data and won't apply to numerical variables.
|
44
|
+
- It does not provide informative value when there are too many categories, as the bar chart could become cluttered
|
45
|
+
and hard to interpret.
|
46
|
+
- Offers no insights into the model's performance or precision, but rather provides a descriptive analysis of the
|
47
|
+
input.
|
42
48
|
"""
|
43
49
|
|
44
50
|
name = "tabular_categorical_bar_plots"
|
@@ -10,26 +10,33 @@ from validmind.vm_models import Figure, Metric
|
|
10
10
|
|
11
11
|
class TabularDateTimeHistograms(Metric):
|
12
12
|
"""
|
13
|
-
Generates histograms to provide graphical insight into the distribution of time intervals in model's datetime
|
13
|
+
Generates histograms to provide graphical insight into the distribution of time intervals in a model's datetime
|
14
|
+
data.
|
14
15
|
|
15
|
-
|
16
|
-
of time intervals in a machine learning model's datetime data. By plotting histograms of differences between
|
17
|
-
consecutive date entries in all datetime variables, it enables an examination of the underlying pattern of time
|
18
|
-
series data and identification of anomalies.
|
16
|
+
### Purpose
|
19
17
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
18
|
+
The `TabularDateTimeHistograms` metric is designed to provide graphical insight into the distribution of time
|
19
|
+
intervals in a machine learning model's datetime data. By plotting histograms of differences between consecutive
|
20
|
+
date entries in all datetime variables, it enables an examination of the underlying pattern of time series data and
|
21
|
+
identification of anomalies.
|
22
|
+
|
23
|
+
### Test Mechanism
|
24
|
+
|
25
|
+
This test operates by first identifying all datetime columns and extracting them from the dataset. For each
|
26
|
+
datetime column, it next computes the differences (in days) between consecutive dates, excluding zero values, and
|
27
|
+
visualizes these differences in a histogram. The Plotly library's histogram function is used to generate
|
28
|
+
histograms, which are labeled appropriately and provide a graphical representation of the frequency of different
|
29
|
+
day intervals in the dataset.
|
30
|
+
|
31
|
+
### Signs of High Risk
|
25
32
|
|
26
|
-
**Signs of High Risk**:
|
27
33
|
- If no datetime columns are detected in the dataset, this would lead to a ValueError. Hence, the absence of
|
28
34
|
datetime columns signifies a high risk.
|
29
35
|
- A severely skewed or irregular distribution depicted in the histogram may indicate possible complications with
|
30
36
|
the data, such as faulty timestamps or abnormalities.
|
31
37
|
|
32
|
-
|
38
|
+
### Strengths
|
39
|
+
|
33
40
|
- The metric offers a visual overview of time interval frequencies within the dataset, supporting the recognition
|
34
41
|
of inherent patterns.
|
35
42
|
- Histogram plots can aid in the detection of potential outliers and data anomalies, contributing to an assessment
|
@@ -37,7 +44,8 @@ class TabularDateTimeHistograms(Metric):
|
|
37
44
|
- The metric is versatile, compatible with a range of task types, including classification and regression, and can
|
38
45
|
work with multiple datetime variables if present.
|
39
46
|
|
40
|
-
|
47
|
+
### Limitations
|
48
|
+
|
41
49
|
- A major weakness of this metric is its dependence on the visual examination of data, as it does not provide a
|
42
50
|
measurable evaluation of the model.
|
43
51
|
- The metric might overlook complex or multi-dimensional trends in the data.
|
@@ -13,14 +13,17 @@ def TabularDescriptionTables(dataset):
|
|
13
13
|
"""
|
14
14
|
Summarizes key descriptive statistics for numerical, categorical, and datetime variables in a dataset.
|
15
15
|
|
16
|
-
|
17
|
-
categorical, and datetime variables present in a dataset. The attributes it measures include the count, mean,
|
18
|
-
minimum and maximum values, percentage of missing values, data types of fields, and unique values for categorical
|
19
|
-
fields, among others.
|
16
|
+
### Purpose
|
20
17
|
|
21
|
-
|
22
|
-
|
23
|
-
|
18
|
+
The main purpose of this metric is to gather and present the descriptive statistics of numerical, categorical, and
|
19
|
+
datetime variables present in a dataset. The attributes it measures include the count, mean, minimum and maximum
|
20
|
+
values, percentage of missing values, data types of fields, and unique values for categorical fields, among others.
|
21
|
+
|
22
|
+
### Test Mechanism
|
23
|
+
|
24
|
+
The test first segregates the variables in the dataset according to their data types (numerical, categorical, or
|
25
|
+
datetime). Then, it compiles summary statistics for each type of variable. The specifics of these statistics vary
|
26
|
+
depending on the type of variable:
|
24
27
|
|
25
28
|
- For numerical variables, the metric extracts descriptors like count, mean, minimum and maximum values, count of
|
26
29
|
missing values, and data types.
|
@@ -29,14 +32,16 @@ def TabularDescriptionTables(dataset):
|
|
29
32
|
- For datetime variables, it counts the number of unique values, identifies the earliest and latest dates, counts
|
30
33
|
missing values, and identifies data types.
|
31
34
|
|
32
|
-
|
35
|
+
### Signs of High Risk
|
36
|
+
|
33
37
|
- Masses of missing values in the descriptive statistics results could hint at high risk or failure, indicating
|
34
38
|
potential data collection, integrity, and quality issues.
|
35
39
|
- Detection of inappropriate distributions for numerical variables, like having negative values for variables that
|
36
40
|
are always supposed to be positive.
|
37
41
|
- Identifying inappropriate data types, like a continuous variable being encoded as a categorical type.
|
38
42
|
|
39
|
-
|
43
|
+
### Strengths
|
44
|
+
|
40
45
|
- Provides a comprehensive overview of the dataset.
|
41
46
|
- Gives a snapshot into the essence of the numerical, categorical, and datetime fields.
|
42
47
|
- Identifies potential data quality issues such as missing values or inconsistencies crucial for building credible
|
@@ -44,7 +49,8 @@ def TabularDescriptionTables(dataset):
|
|
44
49
|
- The metadata, including the data type and missing value information, are vital for anyone including data
|
45
50
|
scientists dealing with the dataset before the modeling process.
|
46
51
|
|
47
|
-
|
52
|
+
### Limitations
|
53
|
+
|
48
54
|
- It does not perform any deeper statistical analysis or tests on the data.
|
49
55
|
- It does not handle issues such as outliers, or relationships between variables.
|
50
56
|
- It offers no insights into potential correlations or possible interactions between variables.
|
@@ -57,15 +63,44 @@ def TabularDescriptionTables(dataset):
|
|
57
63
|
categorical_fields = get_categorical_columns(dataset)
|
58
64
|
datetime_fields = get_datetime_columns(dataset)
|
59
65
|
|
60
|
-
summary_stats_numerical =
|
61
|
-
dataset, numerical_fields
|
66
|
+
summary_stats_numerical = (
|
67
|
+
get_summary_statistics_numerical(dataset, numerical_fields)
|
68
|
+
if numerical_fields
|
69
|
+
else pd.DataFrame()
|
70
|
+
)
|
71
|
+
summary_stats_categorical = (
|
72
|
+
get_summary_statistics_categorical(dataset, categorical_fields)
|
73
|
+
if categorical_fields
|
74
|
+
else pd.DataFrame()
|
75
|
+
)
|
76
|
+
summary_stats_datetime = (
|
77
|
+
get_summary_statistics_datetime(dataset, datetime_fields)
|
78
|
+
if datetime_fields
|
79
|
+
else pd.DataFrame()
|
80
|
+
)
|
81
|
+
|
82
|
+
# Replace empty DataFrames with None
|
83
|
+
summary_stats_numerical = (
|
84
|
+
summary_stats_numerical if not summary_stats_numerical.empty else None
|
62
85
|
)
|
63
|
-
summary_stats_categorical =
|
64
|
-
|
86
|
+
summary_stats_categorical = (
|
87
|
+
summary_stats_categorical if not summary_stats_categorical.empty else None
|
88
|
+
)
|
89
|
+
summary_stats_datetime = (
|
90
|
+
summary_stats_datetime if not summary_stats_datetime.empty else None
|
65
91
|
)
|
66
|
-
summary_stats_datetime = get_summary_statistics_datetime(dataset, datetime_fields)
|
67
92
|
|
68
|
-
|
93
|
+
# Return a tuple with only non-None values (tables with data)
|
94
|
+
return tuple(
|
95
|
+
filter(
|
96
|
+
lambda x: x is not None,
|
97
|
+
(
|
98
|
+
summary_stats_numerical,
|
99
|
+
summary_stats_categorical,
|
100
|
+
summary_stats_datetime,
|
101
|
+
),
|
102
|
+
)
|
103
|
+
)
|
69
104
|
|
70
105
|
|
71
106
|
def get_summary_statistics_numerical(dataset, numerical_fields):
|
@@ -13,39 +13,42 @@ class TabularNumericalHistograms(Metric):
|
|
13
13
|
Generates histograms for each numerical feature in a dataset to provide visual insights into data distribution and
|
14
14
|
detect potential issues.
|
15
15
|
|
16
|
-
|
17
|
-
histograms for each numerical feature in the dataset. Histograms aid in the exploratory analysis of data, offering
|
18
|
-
insight into the distribution of the data, skewness, presence of outliers, and central tendencies. It helps in
|
19
|
-
understanding if the inputs to the model are normally distributed which is a common assumption in many machine
|
20
|
-
learning algorithms.
|
16
|
+
### Purpose
|
21
17
|
|
22
|
-
|
23
|
-
|
24
|
-
|
18
|
+
The purpose of this test is to provide visual analysis of numerical data through the generation of histograms for
|
19
|
+
each numerical feature in the dataset. Histograms aid in the exploratory analysis of data, offering insight into
|
20
|
+
the distribution of the data, skewness, presence of outliers, and central tendencies. It helps in understanding if
|
21
|
+
the inputs to the model are normally distributed, which is a common assumption in many machine learning algorithms.
|
25
22
|
|
26
|
-
|
23
|
+
### Test Mechanism
|
24
|
+
|
25
|
+
This test scans the provided dataset and extracts all the numerical columns. For each numerical column, it
|
26
|
+
constructs a histogram using plotly, with 50 bins. The deployment of histograms offers a robust visual aid,
|
27
|
+
ensuring unruffled identification and understanding of numerical data distribution patterns.
|
28
|
+
|
29
|
+
### Signs of High Risk
|
27
30
|
|
28
31
|
- A high degree of skewness
|
29
32
|
- Unexpected data distributions
|
30
33
|
- Existence of extreme outliers in the histograms
|
34
|
+
|
31
35
|
These may indicate issues with the data that the model is receiving. If data for a numerical feature is expected to
|
32
|
-
follow a certain distribution (like normal distribution) but does not, it could lead to sub-par performance by
|
33
|
-
model. As such these instances should be treated as high-risk indicators.
|
36
|
+
follow a certain distribution (like a normal distribution) but does not, it could lead to sub-par performance by
|
37
|
+
the model. As such these instances should be treated as high-risk indicators.
|
34
38
|
|
35
|
-
|
39
|
+
### Strengths
|
36
40
|
|
37
|
-
-
|
38
|
-
|
39
|
-
-
|
40
|
-
- It can be applied to large datasets and multiple numerical variables conveniently.
|
41
|
+
- Provides a simple, easy-to-interpret visualization of how data for each numerical attribute is distributed.
|
42
|
+
- Helps detect skewed values and outliers that could potentially harm the AI model's performance.
|
43
|
+
- Can be applied to large datasets and multiple numerical variables conveniently.
|
41
44
|
|
42
|
-
|
45
|
+
### Limitations
|
43
46
|
|
44
|
-
-
|
45
|
-
-
|
46
|
-
-
|
47
|
-
|
48
|
-
-
|
47
|
+
- Only works with numerical data, thus ignoring non-numerical or categorical data.
|
48
|
+
- Does not analyze relationships between different features, only the individual feature distributions.
|
49
|
+
- Is a univariate analysis and may miss patterns or anomalies that only appear when considering multiple variables
|
50
|
+
together.
|
51
|
+
- Does not provide any insight into how these features affect the output of the model; it is purely an input
|
49
52
|
analysis tool.
|
50
53
|
"""
|
51
54
|
|
@@ -13,29 +13,36 @@ class TargetRateBarPlots(Metric):
|
|
13
13
|
Generates bar plots visualizing the default rates of categorical features for a classification machine learning
|
14
14
|
model.
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
16
|
+
### Purpose
|
17
|
+
|
18
|
+
This test, implemented as a metric, is designed to provide an intuitive, graphical summary of the decision-making
|
19
|
+
patterns exhibited by a categorical classification machine learning model. The model's performance is evaluated
|
20
|
+
using bar plots depicting the ratio of target rates—meaning the proportion of positive classes—for different
|
21
|
+
categorical inputs. This allows for an easy, at-a-glance understanding of the model's accuracy.
|
22
|
+
|
23
|
+
### Test Mechanism
|
24
|
+
|
25
|
+
The test involves creating a pair of bar plots for each categorical feature in the dataset. The first plot depicts
|
26
|
+
the frequency of each category in the dataset, with each category visually distinguished by its unique color. The
|
27
|
+
second plot shows the mean target rate of each category (sourced from the "default_column"). Plotly, a Python
|
28
|
+
library, is used to generate these plots, with distinct plots created for each feature. If no specific columns are
|
29
|
+
selected, the test will generate plots for each categorical column in the dataset.
|
30
|
+
|
31
|
+
### Signs of High Risk
|
32
|
+
|
28
33
|
- Inconsistent or non-binary values in the "default_column" could complicate or render impossible the calculation
|
29
34
|
of average target rates.
|
30
35
|
- Particularly low or high target rates for a specific category might suggest that the model is misclassifying
|
31
36
|
instances of that category.
|
32
37
|
|
33
|
-
|
38
|
+
### Strengths
|
39
|
+
|
34
40
|
- This test offers a visually interpretable breakdown of the model's decisions, providing an easy way to spot
|
35
41
|
irregularities, inconsistencies, or patterns.
|
36
42
|
- Its flexibility allows for the inspection of one or multiple columns, as needed.
|
37
43
|
|
38
|
-
|
44
|
+
### Limitations
|
45
|
+
|
39
46
|
- The test is less useful when dealing with numeric or continuous data, as it's designed specifically for
|
40
47
|
categorical features.
|
41
48
|
- If the model in question is dealing with a multi-class problem rather than binary classification, the test's
|