validmind 2.8.28__py3-none-any.whl → 2.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai/utils.py +4 -24
- validmind/api_client.py +6 -17
- validmind/logging.py +48 -0
- validmind/models/function.py +11 -3
- validmind/tests/__init__.py +2 -0
- validmind/tests/__types__.py +18 -0
- validmind/tests/data_validation/ACFandPACFPlot.py +3 -1
- validmind/tests/data_validation/ADF.py +3 -1
- validmind/tests/data_validation/AutoAR.py +3 -1
- validmind/tests/data_validation/AutoMA.py +5 -1
- validmind/tests/data_validation/AutoStationarity.py +5 -1
- validmind/tests/data_validation/BivariateScatterPlots.py +3 -1
- validmind/tests/data_validation/BoxPierce.py +4 -1
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +1 -1
- validmind/tests/data_validation/ClassImbalance.py +1 -1
- validmind/tests/data_validation/DatasetDescription.py +4 -1
- validmind/tests/data_validation/DatasetSplit.py +3 -2
- validmind/tests/data_validation/DescriptiveStatistics.py +3 -1
- validmind/tests/data_validation/DickeyFullerGLS.py +3 -1
- validmind/tests/data_validation/Duplicates.py +3 -1
- validmind/tests/data_validation/EngleGrangerCoint.py +6 -1
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
- validmind/tests/data_validation/HighCardinality.py +3 -1
- validmind/tests/data_validation/HighPearsonCorrelation.py +4 -1
- validmind/tests/data_validation/IQROutliersBarPlot.py +4 -1
- validmind/tests/data_validation/IQROutliersTable.py +6 -1
- validmind/tests/data_validation/IsolationForestOutliers.py +3 -1
- validmind/tests/data_validation/JarqueBera.py +3 -1
- validmind/tests/data_validation/KPSS.py +3 -1
- validmind/tests/data_validation/LJungBox.py +3 -1
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +6 -1
- validmind/tests/data_validation/MissingValues.py +5 -1
- validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
- validmind/tests/data_validation/MutualInformation.py +4 -1
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +3 -1
- validmind/tests/data_validation/PhillipsPerronArch.py +3 -1
- validmind/tests/data_validation/ProtectedClassesCombination.py +5 -1
- validmind/tests/data_validation/ProtectedClassesDescription.py +5 -1
- validmind/tests/data_validation/ProtectedClassesDisparity.py +5 -3
- validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +9 -2
- validmind/tests/data_validation/RollingStatsPlot.py +5 -1
- validmind/tests/data_validation/RunsTest.py +1 -1
- validmind/tests/data_validation/ScatterPlot.py +2 -1
- validmind/tests/data_validation/ScoreBandDefaultRates.py +3 -1
- validmind/tests/data_validation/SeasonalDecompose.py +6 -1
- validmind/tests/data_validation/ShapiroWilk.py +4 -1
- validmind/tests/data_validation/Skewness.py +3 -1
- validmind/tests/data_validation/SpreadPlot.py +3 -1
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +4 -1
- validmind/tests/data_validation/TabularDateTimeHistograms.py +3 -1
- validmind/tests/data_validation/TabularDescriptionTables.py +4 -1
- validmind/tests/data_validation/TabularNumericalHistograms.py +3 -1
- validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
- validmind/tests/data_validation/TimeSeriesDescription.py +1 -1
- validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +1 -1
- validmind/tests/data_validation/TimeSeriesFrequency.py +5 -1
- validmind/tests/data_validation/TimeSeriesHistogram.py +4 -1
- validmind/tests/data_validation/TimeSeriesLinePlot.py +3 -1
- validmind/tests/data_validation/TimeSeriesMissingValues.py +6 -1
- validmind/tests/data_validation/TimeSeriesOutliers.py +5 -1
- validmind/tests/data_validation/TooManyZeroValues.py +6 -1
- validmind/tests/data_validation/UniqueRows.py +5 -1
- validmind/tests/data_validation/WOEBinPlots.py +4 -1
- validmind/tests/data_validation/WOEBinTable.py +5 -1
- validmind/tests/data_validation/ZivotAndrewsArch.py +3 -1
- validmind/tests/data_validation/nlp/CommonWords.py +2 -1
- validmind/tests/data_validation/nlp/Hashtags.py +2 -1
- validmind/tests/data_validation/nlp/LanguageDetection.py +4 -1
- validmind/tests/data_validation/nlp/Mentions.py +3 -1
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +6 -1
- validmind/tests/data_validation/nlp/Punctuations.py +2 -1
- validmind/tests/data_validation/nlp/Sentiment.py +3 -1
- validmind/tests/data_validation/nlp/StopWords.py +2 -1
- validmind/tests/data_validation/nlp/TextDescription.py +3 -1
- validmind/tests/data_validation/nlp/Toxicity.py +3 -1
- validmind/tests/load.py +91 -17
- validmind/tests/model_validation/BertScore.py +6 -3
- validmind/tests/model_validation/BleuScore.py +6 -1
- validmind/tests/model_validation/ClusterSizeDistribution.py +5 -1
- validmind/tests/model_validation/ContextualRecall.py +6 -1
- validmind/tests/model_validation/FeaturesAUC.py +5 -1
- validmind/tests/model_validation/MeteorScore.py +6 -1
- validmind/tests/model_validation/ModelMetadata.py +2 -1
- validmind/tests/model_validation/ModelPredictionResiduals.py +10 -2
- validmind/tests/model_validation/RegardScore.py +7 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +5 -1
- validmind/tests/model_validation/RougeScore.py +8 -1
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +8 -1
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +7 -1
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -1
- validmind/tests/model_validation/TokenDisparity.py +6 -1
- validmind/tests/model_validation/ToxicityScore.py +6 -1
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +7 -3
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +6 -1
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +4 -3
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -1
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +7 -3
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +6 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -2
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +4 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +9 -6
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +8 -5
- validmind/tests/model_validation/ragas/AspectCritic.py +11 -8
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +5 -2
- validmind/tests/model_validation/ragas/ContextPrecision.py +5 -2
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +5 -2
- validmind/tests/model_validation/ragas/ContextRecall.py +6 -2
- validmind/tests/model_validation/ragas/Faithfulness.py +9 -5
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +10 -7
- validmind/tests/model_validation/ragas/ResponseRelevancy.py +9 -6
- validmind/tests/model_validation/ragas/SemanticSimilarity.py +7 -4
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -1
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -1
- validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -1
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +5 -1
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +5 -1
- validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -1
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +4 -1
- validmind/tests/model_validation/sklearn/FeatureImportance.py +5 -1
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -1
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -1
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -4
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +3 -3
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +5 -1
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +5 -1
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +5 -1
- validmind/tests/model_validation/sklearn/ModelParameters.py +6 -1
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -1
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -2
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +4 -4
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +2 -2
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +5 -1
- validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
- validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +5 -1
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +6 -1
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +2 -2
- validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +3 -1
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +6 -1
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +2 -2
- validmind/tests/model_validation/sklearn/VMeasure.py +5 -1
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +6 -5
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +3 -1
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +6 -1
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
- validmind/tests/model_validation/statsmodels/GINITable.py +4 -1
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +5 -1
- validmind/tests/model_validation/statsmodels/Lilliefors.py +3 -1
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +4 -1
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -2
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +5 -4
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +4 -1
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +3 -2
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +5 -1
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +6 -1
- validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +2 -2
- validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +2 -2
- validmind/tests/ongoing_monitoring/FeatureDrift.py +5 -2
- validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +6 -1
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +8 -1
- validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +2 -2
- validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +6 -1
- validmind/tests/ongoing_monitoring/ROCCurveDrift.py +4 -2
- validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +2 -2
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +8 -1
- validmind/tests/output.py +9 -2
- validmind/tests/plots/BoxPlot.py +260 -0
- validmind/tests/plots/CorrelationHeatmap.py +235 -0
- validmind/tests/plots/HistogramPlot.py +233 -0
- validmind/tests/plots/ViolinPlot.py +125 -0
- validmind/tests/plots/__init__.py +0 -0
- validmind/tests/prompt_validation/Bias.py +5 -1
- validmind/tests/prompt_validation/Clarity.py +5 -1
- validmind/tests/prompt_validation/Conciseness.py +5 -1
- validmind/tests/prompt_validation/Delimitation.py +5 -1
- validmind/tests/prompt_validation/NegativeInstruction.py +5 -1
- validmind/tests/prompt_validation/Robustness.py +5 -1
- validmind/tests/prompt_validation/Specificity.py +5 -1
- validmind/tests/stats/CorrelationAnalysis.py +251 -0
- validmind/tests/stats/DescriptiveStats.py +197 -0
- validmind/tests/stats/NormalityTests.py +147 -0
- validmind/tests/stats/OutlierDetection.py +173 -0
- validmind/tests/stats/__init__.py +0 -0
- validmind/unit_metrics/classification/Accuracy.py +2 -1
- validmind/unit_metrics/classification/F1.py +2 -1
- validmind/unit_metrics/classification/Precision.py +2 -1
- validmind/unit_metrics/classification/ROC_AUC.py +2 -1
- validmind/unit_metrics/classification/Recall.py +2 -1
- validmind/unit_metrics/classification/individual/AbsoluteError.py +42 -0
- validmind/unit_metrics/classification/individual/BrierScore.py +56 -0
- validmind/unit_metrics/classification/individual/CalibrationError.py +77 -0
- validmind/unit_metrics/classification/individual/ClassBalance.py +65 -0
- validmind/unit_metrics/classification/individual/Confidence.py +52 -0
- validmind/unit_metrics/classification/individual/Correctness.py +41 -0
- validmind/unit_metrics/classification/individual/LogLoss.py +61 -0
- validmind/unit_metrics/classification/individual/OutlierScore.py +86 -0
- validmind/unit_metrics/classification/individual/ProbabilityError.py +54 -0
- validmind/unit_metrics/classification/individual/Uncertainty.py +60 -0
- validmind/unit_metrics/classification/individual/__init__.py +0 -0
- validmind/unit_metrics/regression/AdjustedRSquaredScore.py +2 -1
- validmind/unit_metrics/regression/GiniCoefficient.py +2 -1
- validmind/unit_metrics/regression/HuberLoss.py +2 -1
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +2 -1
- validmind/unit_metrics/regression/MeanAbsoluteError.py +2 -1
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +2 -1
- validmind/unit_metrics/regression/MeanBiasDeviation.py +2 -1
- validmind/unit_metrics/regression/MeanSquaredError.py +2 -1
- validmind/unit_metrics/regression/QuantileLoss.py +1 -1
- validmind/unit_metrics/regression/RSquaredScore.py +2 -1
- validmind/unit_metrics/regression/RootMeanSquaredError.py +2 -1
- validmind/vm_models/dataset/dataset.py +291 -38
- validmind/vm_models/result/result.py +26 -4
- {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/METADATA +2 -2
- {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/RECORD +233 -212
- {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/LICENSE +0 -0
- {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/WHEEL +0 -0
- {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/entry_points.txt +0 -0
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
from statsmodels.stats.diagnostic import acorr_ljungbox
|
7
9
|
|
@@ -10,7 +12,7 @@ from validmind import RawData, tags, tasks
|
|
10
12
|
|
11
13
|
@tasks("regression")
|
12
14
|
@tags("time_series_data", "forecasting", "statistical_test", "statsmodels")
|
13
|
-
def LJungBox(dataset):
|
15
|
+
def LJungBox(dataset) -> Tuple[pd.DataFrame, RawData]:
|
14
16
|
"""
|
15
17
|
Assesses autocorrelations in dataset features by performing a Ljung-Box test on each feature.
|
16
18
|
|
@@ -2,9 +2,12 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import numpy as np
|
6
8
|
import pandas as pd
|
7
9
|
import plotly.figure_factory as ff
|
10
|
+
import plotly.graph_objects as go
|
8
11
|
|
9
12
|
from validmind import RawData, tags, tasks
|
10
13
|
from validmind.vm_models import VMDataset
|
@@ -15,7 +18,9 @@ COOLWARM = [[0, "rgb(95,5,255)"], [0.5, "rgb(255,255,255)"], [1, "rgb(255,5,0)"]
|
|
15
18
|
|
16
19
|
@tags("time_series_data", "visualization")
|
17
20
|
@tasks("regression")
|
18
|
-
def LaggedCorrelationHeatmap(
|
21
|
+
def LaggedCorrelationHeatmap(
|
22
|
+
dataset: VMDataset, num_lags: int = 10
|
23
|
+
) -> Tuple[go.Figure, RawData]:
|
19
24
|
"""
|
20
25
|
Assesses and visualizes correlation between target variable and lagged independent variables in a time-series
|
21
26
|
dataset.
|
@@ -2,13 +2,17 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict, List, Tuple
|
6
|
+
|
5
7
|
from validmind import RawData, tags, tasks
|
6
8
|
from validmind.vm_models import VMDataset
|
7
9
|
|
8
10
|
|
9
11
|
@tags("tabular_data", "data_quality")
|
10
12
|
@tasks("classification", "regression")
|
11
|
-
def MissingValues(
|
13
|
+
def MissingValues(
|
14
|
+
dataset: VMDataset, min_threshold: int = 1
|
15
|
+
) -> Tuple[List[Dict[str, Any]], bool, RawData]:
|
12
16
|
"""
|
13
17
|
Evaluates dataset quality by ensuring missing value ratio across all features does not exceed a set threshold.
|
14
18
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import plotly.graph_objects as go
|
6
8
|
|
7
9
|
from validmind import RawData, tags, tasks
|
@@ -12,7 +14,7 @@ from validmind.vm_models import VMDataset
|
|
12
14
|
@tasks("classification", "regression")
|
13
15
|
def MissingValuesBarPlot(
|
14
16
|
dataset: VMDataset, threshold: int = 80, fig_height: int = 600
|
15
|
-
):
|
17
|
+
) -> Tuple[go.Figure, RawData]:
|
16
18
|
"""
|
17
19
|
Assesses the percentage and distribution of missing values in the dataset via a bar plot, with emphasis on
|
18
20
|
identifying high-risk columns based on a user-defined threshold.
|
@@ -2,6 +2,9 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
|
6
|
+
from typing import Tuple
|
7
|
+
|
5
8
|
import plotly.graph_objects as go
|
6
9
|
from sklearn.feature_selection import mutual_info_classif, mutual_info_regression
|
7
10
|
|
@@ -14,7 +17,7 @@ from validmind.vm_models.result import RawData
|
|
14
17
|
@tasks("classification", "regression")
|
15
18
|
def MutualInformation(
|
16
19
|
dataset: VMDataset, min_threshold: float = 0.01, task: str = "classification"
|
17
|
-
):
|
20
|
+
) -> Tuple[go.Figure, RawData]:
|
18
21
|
"""
|
19
22
|
Calculates mutual information scores between features and target variable to evaluate feature relevance.
|
20
23
|
|
@@ -3,6 +3,8 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
|
6
|
+
from typing import Tuple
|
7
|
+
|
6
8
|
import plotly.graph_objects as go
|
7
9
|
|
8
10
|
from validmind import RawData, tags, tasks
|
@@ -10,7 +12,7 @@ from validmind import RawData, tags, tasks
|
|
10
12
|
|
11
13
|
@tags("tabular_data", "numerical_data", "correlation")
|
12
14
|
@tasks("classification", "regression")
|
13
|
-
def PearsonCorrelationMatrix(dataset):
|
15
|
+
def PearsonCorrelationMatrix(dataset) -> Tuple[go.Figure, RawData]:
|
14
16
|
"""
|
15
17
|
Evaluates linear dependency between numerical variables in a dataset via a Pearson Correlation coefficient heat map.
|
16
18
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict
|
6
|
+
|
5
7
|
import numpy as np
|
6
8
|
import pandas as pd
|
7
9
|
from arch.unitroot import PhillipsPerron
|
@@ -17,7 +19,7 @@ logger = get_logger(__name__)
|
|
17
19
|
|
18
20
|
@tags("time_series_data", "forecasting", "statistical_test", "unit_root_test")
|
19
21
|
@tasks("regression")
|
20
|
-
def PhillipsPerronArch(dataset: VMDataset):
|
22
|
+
def PhillipsPerronArch(dataset: VMDataset) -> Dict[str, Any]:
|
21
23
|
"""
|
22
24
|
Assesses the stationarity of time series data in each feature of the ML model using the Phillips-Perron test.
|
23
25
|
|
@@ -3,6 +3,7 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import sys
|
6
|
+
from typing import Dict, Tuple
|
6
7
|
|
7
8
|
import pandas as pd
|
8
9
|
import plotly.graph_objects as go
|
@@ -11,6 +12,7 @@ import plotly.subplots as sp
|
|
11
12
|
from validmind import RawData, tags, tasks
|
12
13
|
from validmind.errors import MissingDependencyError
|
13
14
|
from validmind.logging import get_logger
|
15
|
+
from validmind.vm_models import VMDataset, VMModel
|
14
16
|
|
15
17
|
try:
|
16
18
|
from fairlearn.metrics import (
|
@@ -33,7 +35,9 @@ logger = get_logger(__name__)
|
|
33
35
|
|
34
36
|
@tags("bias_and_fairness")
|
35
37
|
@tasks("classification", "regression")
|
36
|
-
def ProtectedClassesCombination(
|
38
|
+
def ProtectedClassesCombination(
|
39
|
+
dataset: VMDataset, model: VMModel, protected_classes=None
|
40
|
+
) -> Tuple[Dict[str, pd.DataFrame], Dict[str, pd.DataFrame], go.Figure, RawData]:
|
37
41
|
"""
|
38
42
|
Visualizes combinations of protected classes and their corresponding error metric differences.
|
39
43
|
|
@@ -3,6 +3,8 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
|
6
|
+
from typing import List, Tuple, Union
|
7
|
+
|
6
8
|
import pandas as pd
|
7
9
|
import plotly.graph_objects as go
|
8
10
|
|
@@ -14,7 +16,9 @@ logger = get_logger(__name__)
|
|
14
16
|
|
15
17
|
@tags("bias_and_fairness", "descriptive_statistics")
|
16
18
|
@tasks("classification", "regression")
|
17
|
-
def ProtectedClassesDescription(
|
19
|
+
def ProtectedClassesDescription(
|
20
|
+
dataset, protected_classes=None
|
21
|
+
) -> Tuple[pd.DataFrame, Union[go.Figure, List[go.Figure]], RawData]:
|
18
22
|
"""
|
19
23
|
Visualizes the distribution of protected classes in the dataset relative to the target variable
|
20
24
|
and provides descriptive statistics.
|
@@ -4,12 +4,14 @@
|
|
4
4
|
|
5
5
|
import io
|
6
6
|
import sys
|
7
|
+
from typing import Any, List, Tuple
|
7
8
|
|
8
9
|
import pandas as pd
|
9
10
|
|
10
11
|
from validmind import RawData, tags, tasks
|
11
12
|
from validmind.errors import MissingDependencyError
|
12
13
|
from validmind.logging import get_logger
|
14
|
+
from validmind.vm_models import VMDataset, VMModel
|
13
15
|
|
14
16
|
try:
|
15
17
|
import aequitas.plot as ap
|
@@ -28,12 +30,12 @@ logger = get_logger(__name__)
|
|
28
30
|
@tags("bias_and_fairness")
|
29
31
|
@tasks("classification", "regression")
|
30
32
|
def ProtectedClassesDisparity(
|
31
|
-
dataset,
|
32
|
-
model,
|
33
|
+
dataset: VMDataset,
|
34
|
+
model: VMModel,
|
33
35
|
protected_classes=None,
|
34
36
|
disparity_tolerance=1.25,
|
35
37
|
metrics=["fnr", "fpr", "tpr"],
|
36
|
-
):
|
38
|
+
) -> Tuple[pd.DataFrame, List[bytes], Any, RawData]:
|
37
39
|
"""
|
38
40
|
Investigates disparities in model performance across different protected class segments.
|
39
41
|
|
@@ -4,13 +4,16 @@
|
|
4
4
|
|
5
5
|
import json
|
6
6
|
import sys
|
7
|
+
from typing import Any, Dict, Tuple
|
7
8
|
|
9
|
+
import matplotlib.figure
|
8
10
|
import matplotlib.pyplot as plt
|
9
11
|
import pandas as pd
|
10
12
|
|
11
13
|
from validmind import RawData, tags, tasks
|
12
14
|
from validmind.errors import MissingDependencyError
|
13
15
|
from validmind.logging import get_logger
|
16
|
+
from validmind.vm_models import VMDataset
|
14
17
|
|
15
18
|
try:
|
16
19
|
from fairlearn.metrics import (
|
@@ -35,8 +38,12 @@ logger = get_logger(__name__)
|
|
35
38
|
@tags("bias_and_fairness")
|
36
39
|
@tasks("classification", "regression")
|
37
40
|
def ProtectedClassesThresholdOptimizer(
|
38
|
-
dataset
|
39
|
-
|
41
|
+
dataset: VMDataset,
|
42
|
+
pipeline=None,
|
43
|
+
protected_classes=None,
|
44
|
+
X_train=None,
|
45
|
+
y_train=None,
|
46
|
+
) -> Tuple[Dict[str, Any], matplotlib.figure.Figure, RawData]:
|
40
47
|
"""
|
41
48
|
Obtains a classifier by applying group-specific thresholds to the provided estimator.
|
42
49
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import matplotlib.pyplot as plt
|
6
8
|
import pandas as pd
|
7
9
|
|
@@ -42,7 +44,9 @@ def plot_rolling_statistics(df, col, window_size):
|
|
42
44
|
|
43
45
|
@tags("time_series_data", "visualization", "stationarity")
|
44
46
|
@tasks("regression")
|
45
|
-
def RollingStatsPlot(
|
47
|
+
def RollingStatsPlot(
|
48
|
+
dataset: VMDataset, window_size: int = 12
|
49
|
+
) -> Tuple[plt.Figure, RawData]:
|
46
50
|
"""
|
47
51
|
Evaluates the stationarity of time series data by plotting its rolling mean and standard deviation over a specified
|
48
52
|
window.
|
@@ -10,7 +10,7 @@ from validmind import tags, tasks
|
|
10
10
|
|
11
11
|
@tasks("classification", "regression")
|
12
12
|
@tags("tabular_data", "statistical_test", "statsmodels")
|
13
|
-
def RunsTest(dataset):
|
13
|
+
def RunsTest(dataset) -> pd.DataFrame:
|
14
14
|
"""
|
15
15
|
Executes Runs Test on ML model to detect non-random patterns in output data sequence.
|
16
16
|
|
@@ -2,6 +2,7 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
import matplotlib.figure
|
5
6
|
import matplotlib.pyplot as plt
|
6
7
|
import seaborn as sns
|
7
8
|
|
@@ -10,7 +11,7 @@ from validmind import tags, tasks
|
|
10
11
|
|
11
12
|
@tags("tabular_data", "visualization")
|
12
13
|
@tasks("classification", "regression")
|
13
|
-
def ScatterPlot(dataset):
|
14
|
+
def ScatterPlot(dataset) -> matplotlib.figure.Figure:
|
14
15
|
"""
|
15
16
|
Assesses visual relationships, patterns, and outliers among features in a dataset through scatter plot matrices.
|
16
17
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import numpy as np
|
6
8
|
import pandas as pd
|
7
9
|
|
@@ -16,7 +18,7 @@ def ScoreBandDefaultRates(
|
|
16
18
|
model: VMModel,
|
17
19
|
score_column: str = "score",
|
18
20
|
score_bands: list = None,
|
19
|
-
):
|
21
|
+
) -> Tuple[pd.DataFrame, RawData]:
|
20
22
|
"""
|
21
23
|
Analyzes default rates and population distribution across credit score bands.
|
22
24
|
|
@@ -2,6 +2,9 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
|
6
|
+
from typing import Tuple
|
7
|
+
|
5
8
|
import numpy as np
|
6
9
|
import pandas as pd
|
7
10
|
import plotly.graph_objects as go
|
@@ -19,7 +22,9 @@ logger = get_logger(__name__)
|
|
19
22
|
|
20
23
|
@tags("time_series_data", "seasonality", "statsmodels")
|
21
24
|
@tasks("regression")
|
22
|
-
def SeasonalDecompose(
|
25
|
+
def SeasonalDecompose(
|
26
|
+
dataset: VMDataset, seasonal_model: str = "additive"
|
27
|
+
) -> Tuple[go.Figure, RawData]:
|
23
28
|
"""
|
24
29
|
Assesses patterns and seasonality in a time series dataset by decomposing its features into foundational components.
|
25
30
|
|
@@ -2,6 +2,9 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
|
6
|
+
from typing import Tuple
|
7
|
+
|
5
8
|
import pandas as pd
|
6
9
|
from scipy import stats
|
7
10
|
|
@@ -10,7 +13,7 @@ from validmind import RawData, tags, tasks
|
|
10
13
|
|
11
14
|
@tasks("classification", "regression")
|
12
15
|
@tags("tabular_data", "data_distribution", "statistical_test")
|
13
|
-
def ShapiroWilk(dataset):
|
16
|
+
def ShapiroWilk(dataset) -> Tuple[pd.DataFrame, RawData]:
|
14
17
|
"""
|
15
18
|
Evaluates feature-wise normality of training data using the Shapiro-Wilk test.
|
16
19
|
|
@@ -2,13 +2,15 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict, List, Tuple
|
6
|
+
|
5
7
|
from validmind import tags, tasks
|
6
8
|
from validmind.utils import infer_datatypes
|
7
9
|
|
8
10
|
|
9
11
|
@tags("data_quality", "tabular_data")
|
10
12
|
@tasks("classification", "regression")
|
11
|
-
def Skewness(dataset, max_threshold=1):
|
13
|
+
def Skewness(dataset, max_threshold=1) -> Tuple[Dict[str, List[Dict[str, Any]]], bool]:
|
12
14
|
"""
|
13
15
|
Evaluates the skewness of numerical data in a dataset to check against a defined threshold, aiming to ensure data
|
14
16
|
quality and optimize model performance.
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import matplotlib.pyplot as plt
|
6
8
|
import pandas as pd
|
7
9
|
import seaborn as sns
|
@@ -13,7 +15,7 @@ from validmind.vm_models import VMDataset
|
|
13
15
|
|
14
16
|
@tags("time_series_data", "visualization")
|
15
17
|
@tasks("regression")
|
16
|
-
def SpreadPlot(dataset: VMDataset):
|
18
|
+
def SpreadPlot(dataset: VMDataset) -> Tuple[plt.Figure, RawData]:
|
17
19
|
"""
|
18
20
|
Assesses potential correlations between pairs of time series variables through visualization to enhance
|
19
21
|
understanding of their relationships.
|
@@ -2,6 +2,9 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
|
6
|
+
from typing import Tuple
|
7
|
+
|
5
8
|
import plotly.graph_objs as go
|
6
9
|
|
7
10
|
from validmind import RawData, tags, tasks
|
@@ -11,7 +14,7 @@ from validmind.vm_models import VMDataset
|
|
11
14
|
|
12
15
|
@tags("tabular_data", "visualization")
|
13
16
|
@tasks("classification", "regression")
|
14
|
-
def TabularCategoricalBarPlots(dataset: VMDataset):
|
17
|
+
def TabularCategoricalBarPlots(dataset: VMDataset) -> Tuple[go.Figure, RawData]:
|
15
18
|
"""
|
16
19
|
Generates and visualizes bar plots for each category in categorical features to evaluate the dataset's composition.
|
17
20
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
import plotly.graph_objects as go
|
7
9
|
|
@@ -12,7 +14,7 @@ from validmind.vm_models import VMDataset
|
|
12
14
|
|
13
15
|
@tags("time_series_data", "visualization")
|
14
16
|
@tasks("classification", "regression")
|
15
|
-
def TabularDateTimeHistograms(dataset: VMDataset):
|
17
|
+
def TabularDateTimeHistograms(dataset: VMDataset) -> Tuple[go.Figure, RawData]:
|
16
18
|
"""
|
17
19
|
Generates histograms to provide graphical insight into the distribution of time intervals in a model's datetime
|
18
20
|
data.
|
@@ -2,6 +2,9 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
|
6
|
+
from typing import Tuple
|
7
|
+
|
5
8
|
import pandas as pd
|
6
9
|
|
7
10
|
from validmind import tags, tasks
|
@@ -9,7 +12,7 @@ from validmind import tags, tasks
|
|
9
12
|
|
10
13
|
@tags("tabular_data")
|
11
14
|
@tasks("classification", "regression")
|
12
|
-
def TabularDescriptionTables(dataset):
|
15
|
+
def TabularDescriptionTables(dataset) -> Tuple[pd.DataFrame]:
|
13
16
|
"""
|
14
17
|
Summarizes key descriptive statistics for numerical, categorical, and datetime variables in a dataset.
|
15
18
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import plotly.graph_objs as go
|
6
8
|
|
7
9
|
from validmind import tags, tasks
|
@@ -10,7 +12,7 @@ from validmind.vm_models import VMDataset
|
|
10
12
|
|
11
13
|
@tags("tabular_data", "visualization")
|
12
14
|
@tasks("classification", "regression")
|
13
|
-
def TabularNumericalHistograms(dataset: VMDataset):
|
15
|
+
def TabularNumericalHistograms(dataset: VMDataset) -> Tuple[go.Figure]:
|
14
16
|
"""
|
15
17
|
Generates histograms for each numerical feature in a dataset to provide visual insights into data distribution and
|
16
18
|
detect potential issues.
|
@@ -2,6 +2,9 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
|
6
|
+
from typing import Tuple
|
7
|
+
|
5
8
|
import numpy as np
|
6
9
|
import plotly.graph_objs as go
|
7
10
|
from plotly.subplots import make_subplots
|
@@ -13,7 +16,7 @@ from validmind.vm_models import VMDataset
|
|
13
16
|
|
14
17
|
@tags("tabular_data", "visualization", "categorical_data")
|
15
18
|
@tasks("classification")
|
16
|
-
def TargetRateBarPlots(dataset: VMDataset):
|
19
|
+
def TargetRateBarPlots(dataset: VMDataset) -> Tuple[go.Figure, RawData]:
|
17
20
|
"""
|
18
21
|
Generates bar plots visualizing the default rates of categorical features for a classification machine learning
|
19
22
|
model.
|
@@ -9,7 +9,7 @@ from validmind import tags, tasks
|
|
9
9
|
|
10
10
|
@tags("time_series_data", "analysis")
|
11
11
|
@tasks("regression")
|
12
|
-
def TimeSeriesDescription(dataset):
|
12
|
+
def TimeSeriesDescription(dataset) -> pd.DataFrame:
|
13
13
|
"""
|
14
14
|
Generates a detailed analysis for the provided time series dataset, summarizing key statistics to identify trends,
|
15
15
|
patterns, and data quality issues.
|
@@ -10,7 +10,7 @@ from validmind import tags, tasks
|
|
10
10
|
|
11
11
|
@tags("time_series_data", "analysis")
|
12
12
|
@tasks("regression")
|
13
|
-
def TimeSeriesDescriptiveStatistics(dataset):
|
13
|
+
def TimeSeriesDescriptiveStatistics(dataset) -> pd.DataFrame:
|
14
14
|
"""
|
15
15
|
Evaluates the descriptive statistics of a time series dataset to identify trends, patterns, and data quality issues.
|
16
16
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict, List, Tuple
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
import plotly.graph_objects as go
|
7
9
|
|
@@ -12,7 +14,9 @@ from validmind.vm_models import VMDataset
|
|
12
14
|
|
13
15
|
@tags("time_series_data")
|
14
16
|
@tasks("regression")
|
15
|
-
def TimeSeriesFrequency(
|
17
|
+
def TimeSeriesFrequency(
|
18
|
+
dataset: VMDataset,
|
19
|
+
) -> Tuple[List[Dict[str, Any]], go.Figure, bool, RawData]:
|
16
20
|
"""
|
17
21
|
Evaluates consistency of time series data frequency and generates a frequency plot.
|
18
22
|
|
@@ -2,8 +2,11 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
import plotly.express as px
|
9
|
+
import plotly.graph_objects as go
|
7
10
|
|
8
11
|
from validmind import tags, tasks
|
9
12
|
from validmind.logging import get_logger
|
@@ -13,7 +16,7 @@ logger = get_logger(__name__)
|
|
13
16
|
|
14
17
|
@tags("data_validation", "visualization", "time_series_data")
|
15
18
|
@tasks("regression", "time_series_forecasting")
|
16
|
-
def TimeSeriesHistogram(dataset, nbins=30):
|
19
|
+
def TimeSeriesHistogram(dataset, nbins=30) -> Tuple[go.Figure]:
|
17
20
|
"""
|
18
21
|
Visualizes distribution of time-series data using histograms and Kernel Density Estimation (KDE) lines.
|
19
22
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
import plotly.graph_objects as go
|
7
9
|
|
@@ -12,7 +14,7 @@ from validmind.vm_models import VMDataset
|
|
12
14
|
|
13
15
|
@tags("time_series_data", "visualization")
|
14
16
|
@tasks("regression")
|
15
|
-
def TimeSeriesLinePlot(dataset: VMDataset):
|
17
|
+
def TimeSeriesLinePlot(dataset: VMDataset) -> Tuple[go.Figure]:
|
16
18
|
"""
|
17
19
|
Generates and analyses time-series data through line plots revealing trends, patterns, anomalies over time.
|
18
20
|
|
@@ -2,9 +2,12 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict, List, Tuple
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
import plotly.express as px
|
7
9
|
import plotly.figure_factory as ff
|
10
|
+
import plotly.graph_objects as go
|
8
11
|
|
9
12
|
from validmind import RawData, tags, tasks
|
10
13
|
from validmind.errors import SkipTestError
|
@@ -13,7 +16,9 @@ from validmind.vm_models import VMDataset
|
|
13
16
|
|
14
17
|
@tags("time_series_data")
|
15
18
|
@tasks("regression")
|
16
|
-
def TimeSeriesMissingValues(
|
19
|
+
def TimeSeriesMissingValues(
|
20
|
+
dataset: VMDataset, min_threshold: int = 1
|
21
|
+
) -> Tuple[List[Dict[str, Any]], go.Figure, go.Figure, bool, RawData]:
|
17
22
|
"""
|
18
23
|
Validates time-series data quality by confirming the count of missing values is below a certain threshold.
|
19
24
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import List, Tuple
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
import plotly.graph_objects as go
|
7
9
|
|
@@ -12,7 +14,9 @@ from validmind.vm_models import VMDataset
|
|
12
14
|
|
13
15
|
@tags("time_series_data")
|
14
16
|
@tasks("regression")
|
15
|
-
def TimeSeriesOutliers(
|
17
|
+
def TimeSeriesOutliers(
|
18
|
+
dataset: VMDataset, zscore_threshold: int = 3
|
19
|
+
) -> Tuple[pd.DataFrame, List[go.Figure], bool, RawData]:
|
16
20
|
"""
|
17
21
|
Identifies and visualizes outliers in time-series data using the z-score method.
|
18
22
|
|
@@ -2,13 +2,18 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
|
6
|
+
from typing import Any, Dict, List, Tuple
|
7
|
+
|
5
8
|
from validmind.tests import tags, tasks
|
6
9
|
from validmind.vm_models import VMDataset
|
7
10
|
|
8
11
|
|
9
12
|
@tags("tabular_data")
|
10
13
|
@tasks("regression", "classification")
|
11
|
-
def TooManyZeroValues(
|
14
|
+
def TooManyZeroValues(
|
15
|
+
dataset: VMDataset, max_percent_threshold: float = 0.03
|
16
|
+
) -> Tuple[List[Dict[str, Any]], bool]:
|
12
17
|
"""
|
13
18
|
Identifies numerical columns in a dataset that contain an excessive number of zero values, defined by a threshold
|
14
19
|
percentage.
|
@@ -2,13 +2,17 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict, List, Tuple
|
6
|
+
|
5
7
|
from validmind import tags, tasks
|
6
8
|
from validmind.vm_models import VMDataset
|
7
9
|
|
8
10
|
|
9
11
|
@tags("tabular_data")
|
10
12
|
@tasks("regression", "classification")
|
11
|
-
def UniqueRows(
|
13
|
+
def UniqueRows(
|
14
|
+
dataset: VMDataset, min_percent_threshold: float = 1
|
15
|
+
) -> Tuple[List[Dict[str, Any]], bool]:
|
12
16
|
"""
|
13
17
|
Verifies the diversity of the dataset by ensuring that the count of unique rows exceeds a prescribed threshold.
|
14
18
|
|