validmind 2.8.27__py3-none-any.whl → 2.8.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai/utils.py +1 -1
- validmind/models/function.py +11 -3
- validmind/tests/data_validation/ACFandPACFPlot.py +3 -1
- validmind/tests/data_validation/ADF.py +3 -1
- validmind/tests/data_validation/AutoAR.py +3 -1
- validmind/tests/data_validation/AutoMA.py +5 -1
- validmind/tests/data_validation/AutoStationarity.py +5 -1
- validmind/tests/data_validation/BivariateScatterPlots.py +3 -1
- validmind/tests/data_validation/BoxPierce.py +4 -1
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +1 -1
- validmind/tests/data_validation/ClassImbalance.py +1 -1
- validmind/tests/data_validation/DatasetDescription.py +4 -1
- validmind/tests/data_validation/DatasetSplit.py +3 -2
- validmind/tests/data_validation/DescriptiveStatistics.py +3 -1
- validmind/tests/data_validation/DickeyFullerGLS.py +3 -1
- validmind/tests/data_validation/Duplicates.py +3 -1
- validmind/tests/data_validation/EngleGrangerCoint.py +6 -1
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
- validmind/tests/data_validation/HighCardinality.py +3 -1
- validmind/tests/data_validation/HighPearsonCorrelation.py +4 -1
- validmind/tests/data_validation/IQROutliersBarPlot.py +4 -1
- validmind/tests/data_validation/IQROutliersTable.py +6 -1
- validmind/tests/data_validation/IsolationForestOutliers.py +3 -1
- validmind/tests/data_validation/JarqueBera.py +3 -1
- validmind/tests/data_validation/KPSS.py +3 -1
- validmind/tests/data_validation/LJungBox.py +3 -1
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +6 -1
- validmind/tests/data_validation/MissingValues.py +5 -1
- validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
- validmind/tests/data_validation/MutualInformation.py +4 -1
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +3 -1
- validmind/tests/data_validation/PhillipsPerronArch.py +3 -1
- validmind/tests/data_validation/ProtectedClassesCombination.py +5 -1
- validmind/tests/data_validation/ProtectedClassesDescription.py +5 -1
- validmind/tests/data_validation/ProtectedClassesDisparity.py +5 -3
- validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +9 -2
- validmind/tests/data_validation/RollingStatsPlot.py +5 -1
- validmind/tests/data_validation/RunsTest.py +1 -1
- validmind/tests/data_validation/ScatterPlot.py +2 -1
- validmind/tests/data_validation/ScoreBandDefaultRates.py +3 -1
- validmind/tests/data_validation/SeasonalDecompose.py +6 -1
- validmind/tests/data_validation/ShapiroWilk.py +4 -1
- validmind/tests/data_validation/Skewness.py +3 -1
- validmind/tests/data_validation/SpreadPlot.py +3 -1
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +4 -1
- validmind/tests/data_validation/TabularDateTimeHistograms.py +3 -1
- validmind/tests/data_validation/TabularDescriptionTables.py +4 -1
- validmind/tests/data_validation/TabularNumericalHistograms.py +3 -1
- validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
- validmind/tests/data_validation/TimeSeriesDescription.py +1 -1
- validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +1 -1
- validmind/tests/data_validation/TimeSeriesFrequency.py +5 -1
- validmind/tests/data_validation/TimeSeriesHistogram.py +4 -1
- validmind/tests/data_validation/TimeSeriesLinePlot.py +3 -1
- validmind/tests/data_validation/TimeSeriesMissingValues.py +6 -1
- validmind/tests/data_validation/TimeSeriesOutliers.py +5 -1
- validmind/tests/data_validation/TooManyZeroValues.py +6 -1
- validmind/tests/data_validation/UniqueRows.py +5 -1
- validmind/tests/data_validation/WOEBinPlots.py +4 -1
- validmind/tests/data_validation/WOEBinTable.py +5 -1
- validmind/tests/data_validation/ZivotAndrewsArch.py +3 -1
- validmind/tests/data_validation/nlp/CommonWords.py +2 -1
- validmind/tests/data_validation/nlp/Hashtags.py +2 -1
- validmind/tests/data_validation/nlp/LanguageDetection.py +4 -1
- validmind/tests/data_validation/nlp/Mentions.py +3 -1
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +6 -1
- validmind/tests/data_validation/nlp/Punctuations.py +2 -1
- validmind/tests/data_validation/nlp/Sentiment.py +3 -1
- validmind/tests/data_validation/nlp/StopWords.py +2 -1
- validmind/tests/data_validation/nlp/TextDescription.py +3 -1
- validmind/tests/data_validation/nlp/Toxicity.py +3 -1
- validmind/tests/load.py +91 -17
- validmind/tests/model_validation/BertScore.py +6 -3
- validmind/tests/model_validation/BleuScore.py +6 -1
- validmind/tests/model_validation/ClusterSizeDistribution.py +5 -1
- validmind/tests/model_validation/ContextualRecall.py +6 -1
- validmind/tests/model_validation/FeaturesAUC.py +5 -1
- validmind/tests/model_validation/MeteorScore.py +6 -1
- validmind/tests/model_validation/ModelMetadata.py +2 -1
- validmind/tests/model_validation/ModelPredictionResiduals.py +10 -2
- validmind/tests/model_validation/RegardScore.py +7 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +5 -1
- validmind/tests/model_validation/RougeScore.py +8 -1
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +8 -1
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +7 -1
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -1
- validmind/tests/model_validation/TokenDisparity.py +6 -1
- validmind/tests/model_validation/ToxicityScore.py +6 -1
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +7 -3
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +6 -1
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +4 -3
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -1
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +7 -3
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +6 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -2
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +4 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +9 -6
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +8 -5
- validmind/tests/model_validation/ragas/AspectCritic.py +11 -8
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +5 -2
- validmind/tests/model_validation/ragas/ContextPrecision.py +5 -2
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +5 -2
- validmind/tests/model_validation/ragas/ContextRecall.py +6 -2
- validmind/tests/model_validation/ragas/Faithfulness.py +9 -5
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +10 -7
- validmind/tests/model_validation/ragas/ResponseRelevancy.py +9 -6
- validmind/tests/model_validation/ragas/SemanticSimilarity.py +7 -4
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -1
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -1
- validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -1
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +5 -1
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +5 -1
- validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -1
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +4 -1
- validmind/tests/model_validation/sklearn/FeatureImportance.py +5 -1
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -1
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -1
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -4
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +3 -3
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +5 -1
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +5 -1
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +5 -1
- validmind/tests/model_validation/sklearn/ModelParameters.py +6 -1
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -1
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -2
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +4 -4
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +2 -2
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +5 -1
- validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
- validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +5 -1
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +6 -1
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +2 -2
- validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +3 -1
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +6 -1
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +2 -2
- validmind/tests/model_validation/sklearn/VMeasure.py +5 -1
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +6 -5
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +3 -1
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +6 -1
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
- validmind/tests/model_validation/statsmodels/GINITable.py +4 -1
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +5 -1
- validmind/tests/model_validation/statsmodels/Lilliefors.py +3 -1
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +4 -1
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -2
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +5 -4
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +4 -1
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +3 -2
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +5 -1
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +6 -1
- validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +2 -2
- validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +2 -2
- validmind/tests/ongoing_monitoring/FeatureDrift.py +5 -2
- validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +6 -1
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +8 -1
- validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +2 -2
- validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +6 -1
- validmind/tests/ongoing_monitoring/ROCCurveDrift.py +4 -2
- validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +2 -2
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +8 -1
- validmind/tests/prompt_validation/Bias.py +5 -1
- validmind/tests/prompt_validation/Clarity.py +5 -1
- validmind/tests/prompt_validation/Conciseness.py +5 -1
- validmind/tests/prompt_validation/Delimitation.py +5 -1
- validmind/tests/prompt_validation/NegativeInstruction.py +5 -1
- validmind/tests/prompt_validation/Robustness.py +5 -1
- validmind/tests/prompt_validation/Specificity.py +5 -1
- validmind/unit_metrics/classification/Accuracy.py +2 -1
- validmind/unit_metrics/classification/F1.py +2 -1
- validmind/unit_metrics/classification/Precision.py +2 -1
- validmind/unit_metrics/classification/ROC_AUC.py +2 -1
- validmind/unit_metrics/classification/Recall.py +2 -1
- validmind/unit_metrics/regression/AdjustedRSquaredScore.py +2 -1
- validmind/unit_metrics/regression/GiniCoefficient.py +2 -1
- validmind/unit_metrics/regression/HuberLoss.py +2 -1
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +2 -1
- validmind/unit_metrics/regression/MeanAbsoluteError.py +2 -1
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +2 -1
- validmind/unit_metrics/regression/MeanBiasDeviation.py +2 -1
- validmind/unit_metrics/regression/MeanSquaredError.py +2 -1
- validmind/unit_metrics/regression/QuantileLoss.py +1 -1
- validmind/unit_metrics/regression/RSquaredScore.py +2 -1
- validmind/unit_metrics/regression/RootMeanSquaredError.py +2 -1
- validmind/vm_models/dataset/dataset.py +145 -38
- validmind/vm_models/result/result.py +14 -12
- {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/METADATA +1 -1
- {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/RECORD +207 -207
- {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/LICENSE +0 -0
- {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/WHEEL +0 -0
- {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/entry_points.txt +0 -0
validmind/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "2.8.
|
1
|
+
__version__ = "2.8.28"
|
validmind/ai/utils.py
CHANGED
@@ -130,7 +130,7 @@ def get_judge_config(judge_llm=None, judge_embeddings=None):
|
|
130
130
|
"ensure that you are connected to the ValidMind API and confirm ValidMind AI is enabled for your account."
|
131
131
|
)
|
132
132
|
if isinstance(judge_embeddings, FunctionModel) and judge_embeddings is not None:
|
133
|
-
if isinstance(
|
133
|
+
if isinstance(judge_embeddings.model, Embeddings):
|
134
134
|
judge_embeddings = judge_embeddings.model
|
135
135
|
else:
|
136
136
|
raise ValueError(
|
validmind/models/function.py
CHANGED
@@ -35,7 +35,8 @@ class FunctionModel(VMModel):
|
|
35
35
|
|
36
36
|
Attributes:
|
37
37
|
predict_fn (callable): The predict function that should take a dictionary of
|
38
|
-
input features and return a prediction.
|
38
|
+
input features and return a prediction. Can return simple values or
|
39
|
+
dictionary objects.
|
39
40
|
input_id (str, optional): The input ID for the model. Defaults to None.
|
40
41
|
name (str, optional): The name of the model. Defaults to the name of the predict_fn.
|
41
42
|
prompt (Prompt, optional): If using a prompt, the prompt object that defines the template
|
@@ -55,6 +56,13 @@ class FunctionModel(VMModel):
|
|
55
56
|
X (pandas.DataFrame): The input features to predict on
|
56
57
|
|
57
58
|
Returns:
|
58
|
-
List[Any]: The predictions
|
59
|
+
List[Any]: The predictions. Can contain simple values or dictionary objects
|
60
|
+
depending on what the predict_fn returns.
|
59
61
|
"""
|
60
|
-
|
62
|
+
predictions = []
|
63
|
+
for x in X.to_dict(orient="records"):
|
64
|
+
result = self.predict_fn(x)
|
65
|
+
# Handle both simple values and complex dictionary returns
|
66
|
+
predictions.append(result)
|
67
|
+
|
68
|
+
return predictions
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
import plotly.graph_objects as go
|
7
9
|
from statsmodels.tsa.stattools import acf, pacf
|
@@ -12,7 +14,7 @@ from validmind.vm_models import VMDataset
|
|
12
14
|
|
13
15
|
@tags("time_series_data", "forecasting", "statistical_test", "visualization")
|
14
16
|
@tasks("regression")
|
15
|
-
def ACFandPACFPlot(dataset: VMDataset):
|
17
|
+
def ACFandPACFPlot(dataset: VMDataset) -> Tuple[go.Figure, RawData]:
|
16
18
|
"""
|
17
19
|
Analyzes time series data using Autocorrelation Function (ACF) and Partial Autocorrelation Function (PACF) plots to
|
18
20
|
reveal trends and correlations.
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Dict
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
from statsmodels.tsa.stattools import adfuller
|
7
9
|
|
@@ -16,7 +18,7 @@ logger = get_logger(__name__)
|
|
16
18
|
"time_series_data", "statsmodels", "forecasting", "statistical_test", "stationarity"
|
17
19
|
)
|
18
20
|
@tasks("regression")
|
19
|
-
def ADF(dataset: VMDataset):
|
21
|
+
def ADF(dataset: VMDataset) -> Dict[str, pd.DataFrame]:
|
20
22
|
"""
|
21
23
|
Assesses the stationarity of a time series dataset using the Augmented Dickey-Fuller (ADF) test.
|
22
24
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Dict
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
from statsmodels.tsa.ar_model import AutoReg
|
7
9
|
from statsmodels.tsa.stattools import adfuller
|
@@ -15,7 +17,7 @@ logger = get_logger(__name__)
|
|
15
17
|
|
16
18
|
@tags("time_series_data", "statsmodels", "forecasting", "statistical_test")
|
17
19
|
@tasks("regression")
|
18
|
-
def AutoAR(dataset: VMDataset, max_ar_order: int = 3):
|
20
|
+
def AutoAR(dataset: VMDataset, max_ar_order: int = 3) -> Dict[str, pd.DataFrame]:
|
19
21
|
"""
|
20
22
|
Automatically identifies the optimal Autoregressive (AR) order for a time series using BIC and AIC criteria.
|
21
23
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Dict, Tuple
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
from statsmodels.tsa.arima.model import ARIMA
|
7
9
|
from statsmodels.tsa.stattools import adfuller
|
@@ -15,7 +17,9 @@ logger = get_logger(__name__)
|
|
15
17
|
|
16
18
|
@tags("time_series_data", "statsmodels", "forecasting", "statistical_test")
|
17
19
|
@tasks("regression")
|
18
|
-
def AutoMA(
|
20
|
+
def AutoMA(
|
21
|
+
dataset: VMDataset, max_ma_order: int = 3
|
22
|
+
) -> Tuple[Dict[str, pd.DataFrame], RawData]:
|
19
23
|
"""
|
20
24
|
Automatically selects the optimal Moving Average (MA) order for each variable in a time series dataset based on
|
21
25
|
minimal BIC and AIC values.
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Dict
|
6
|
+
|
5
7
|
import numpy as np
|
6
8
|
import pandas as pd
|
7
9
|
from statsmodels.tsa.stattools import adfuller
|
@@ -12,7 +14,9 @@ from validmind.vm_models import VMDataset
|
|
12
14
|
|
13
15
|
@tags("time_series_data", "statsmodels", "forecasting", "statistical_test")
|
14
16
|
@tasks("regression")
|
15
|
-
def AutoStationarity(
|
17
|
+
def AutoStationarity(
|
18
|
+
dataset: VMDataset, max_order: int = 5, threshold: float = 0.05
|
19
|
+
) -> Dict[str, pd.DataFrame]:
|
16
20
|
"""
|
17
21
|
Automates Augmented Dickey-Fuller test to assess stationarity across multiple time series in a DataFrame.
|
18
22
|
|
@@ -3,15 +3,17 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import itertools
|
6
|
+
from typing import Tuple
|
6
7
|
|
7
8
|
import plotly.express as px
|
9
|
+
import plotly.graph_objects as go
|
8
10
|
|
9
11
|
from validmind import RawData, tags, tasks
|
10
12
|
|
11
13
|
|
12
14
|
@tags("tabular_data", "numerical_data", "visualization")
|
13
15
|
@tasks("classification")
|
14
|
-
def BivariateScatterPlots(dataset):
|
16
|
+
def BivariateScatterPlots(dataset) -> Tuple[go.Figure, RawData]:
|
15
17
|
"""
|
16
18
|
Generates bivariate scatterplots to visually inspect relationships between pairs of numerical predictor variables
|
17
19
|
in machine learning classification tasks.
|
@@ -2,6 +2,9 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
|
6
|
+
from typing import Tuple
|
7
|
+
|
5
8
|
import pandas as pd
|
6
9
|
from statsmodels.stats.diagnostic import acorr_ljungbox
|
7
10
|
|
@@ -10,7 +13,7 @@ from validmind import RawData, tags, tasks
|
|
10
13
|
|
11
14
|
@tasks("regression")
|
12
15
|
@tags("time_series_data", "forecasting", "statistical_test", "statsmodels")
|
13
|
-
def BoxPierce(dataset):
|
16
|
+
def BoxPierce(dataset) -> Tuple[pd.DataFrame, RawData]:
|
14
17
|
"""
|
15
18
|
Detects autocorrelation in time-series data through the Box-Pierce test to validate model performance.
|
16
19
|
|
@@ -12,7 +12,7 @@ from validmind.errors import SkipTestError
|
|
12
12
|
|
13
13
|
@tags("tabular_data", "categorical_data", "statistical_test")
|
14
14
|
@tasks("classification")
|
15
|
-
def ChiSquaredFeaturesTable(dataset, p_threshold=0.05):
|
15
|
+
def ChiSquaredFeaturesTable(dataset, p_threshold=0.05) -> pd.DataFrame:
|
16
16
|
"""
|
17
17
|
Assesses the statistical association between categorical features and a target variable using the Chi-Squared test.
|
18
18
|
|
@@ -20,7 +20,7 @@ from validmind.vm_models import VMDataset
|
|
20
20
|
@tasks("classification")
|
21
21
|
def ClassImbalance(
|
22
22
|
dataset: VMDataset, min_percent_threshold: int = 10
|
23
|
-
) -> Tuple[Dict[str, Any], go.Figure, bool]:
|
23
|
+
) -> Tuple[Dict[str, Any], go.Figure, bool, RawData]:
|
24
24
|
"""
|
25
25
|
Evaluates and quantifies class distribution imbalance in a dataset used by a machine learning model.
|
26
26
|
|
@@ -4,6 +4,7 @@
|
|
4
4
|
|
5
5
|
import re
|
6
6
|
from collections import Counter
|
7
|
+
from typing import Any, Dict, List, Tuple
|
7
8
|
|
8
9
|
import numpy as np
|
9
10
|
|
@@ -142,7 +143,9 @@ def describe_column(df, column):
|
|
142
143
|
|
143
144
|
@tags("tabular_data", "time_series_data", "text_data")
|
144
145
|
@tasks("classification", "regression", "text_classification", "text_summarization")
|
145
|
-
def DatasetDescription(
|
146
|
+
def DatasetDescription(
|
147
|
+
dataset: VMDataset,
|
148
|
+
) -> Tuple[Dict[str, List[Dict[str, Any]]], RawData]:
|
146
149
|
"""
|
147
150
|
Provides comprehensive analysis and statistical summaries of each column in a machine learning model's dataset.
|
148
151
|
|
@@ -2,7 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
|
5
|
+
|
6
|
+
from typing import Any, Dict, List, Tuple
|
6
7
|
|
7
8
|
from validmind import RawData, tags, tasks
|
8
9
|
from validmind.vm_models import VMDataset
|
@@ -17,7 +18,7 @@ DATASET_LABELS = {
|
|
17
18
|
|
18
19
|
@tags("tabular_data", "time_series_data", "text_data")
|
19
20
|
@tasks("classification", "regression", "text_classification", "text_summarization")
|
20
|
-
def DatasetSplit(datasets: List[VMDataset]):
|
21
|
+
def DatasetSplit(datasets: List[VMDataset]) -> Tuple[List[Dict[str, Any]], RawData]:
|
21
22
|
"""
|
22
23
|
Evaluates and visualizes the distribution proportions among training, testing, and validation datasets of an ML
|
23
24
|
model.
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
|
7
9
|
from validmind import tags, tasks
|
@@ -46,7 +48,7 @@ def get_summary_statistics_categorical(df, categorical_fields):
|
|
46
48
|
|
47
49
|
@tags("tabular_data", "time_series_data", "data_quality")
|
48
50
|
@tasks("classification", "regression")
|
49
|
-
def DescriptiveStatistics(dataset: VMDataset):
|
51
|
+
def DescriptiveStatistics(dataset: VMDataset) -> Dict[str, Any]:
|
50
52
|
"""
|
51
53
|
Performs a detailed descriptive statistical analysis of both numerical and categorical data within a model's
|
52
54
|
dataset.
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict, Tuple
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
from arch.unitroot import DFGLS
|
7
9
|
from numpy.linalg import LinAlgError
|
@@ -16,7 +18,7 @@ logger = get_logger(__name__)
|
|
16
18
|
|
17
19
|
@tags("time_series_data", "forecasting", "unit_root_test")
|
18
20
|
@tasks("regression")
|
19
|
-
def DickeyFullerGLS(dataset: VMDataset):
|
21
|
+
def DickeyFullerGLS(dataset: VMDataset) -> Tuple[Dict[str, Any], RawData]:
|
20
22
|
"""
|
21
23
|
Assesses stationarity in time series data using the Dickey-Fuller GLS test to determine the order of integration.
|
22
24
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict, Tuple
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
|
7
9
|
from validmind import tags, tasks
|
@@ -9,7 +11,7 @@ from validmind import tags, tasks
|
|
9
11
|
|
10
12
|
@tags("tabular_data", "data_quality", "text_data")
|
11
13
|
@tasks("classification", "regression")
|
12
|
-
def Duplicates(dataset, min_threshold=1):
|
14
|
+
def Duplicates(dataset, min_threshold=1) -> Tuple[Dict[str, Any], bool]:
|
13
15
|
"""
|
14
16
|
Tests dataset for duplicate entries, ensuring model reliability via data quality verification.
|
15
17
|
|
@@ -2,6 +2,9 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
|
6
|
+
from typing import Dict
|
7
|
+
|
5
8
|
import pandas as pd
|
6
9
|
from statsmodels.tsa.stattools import coint
|
7
10
|
|
@@ -12,7 +15,9 @@ from validmind.vm_models import VMDataset
|
|
12
15
|
|
13
16
|
@tags("time_series_data", "statistical_test", "forecasting")
|
14
17
|
@tasks("regression")
|
15
|
-
def EngleGrangerCoint(
|
18
|
+
def EngleGrangerCoint(
|
19
|
+
dataset: VMDataset, threshold: float = 0.05
|
20
|
+
) -> Dict[str, pd.DataFrame]:
|
16
21
|
"""
|
17
22
|
Assesses the degree of co-movement between pairs of time series data using the Engle-Granger cointegration test.
|
18
23
|
|
@@ -3,6 +3,8 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
|
6
|
+
from typing import Tuple
|
7
|
+
|
6
8
|
import numpy as np
|
7
9
|
import plotly.graph_objects as go
|
8
10
|
|
@@ -11,7 +13,7 @@ from validmind import RawData, tags, tasks
|
|
11
13
|
|
12
14
|
@tags("tabular_data", "visualization", "correlation")
|
13
15
|
@tasks("classification", "regression")
|
14
|
-
def FeatureTargetCorrelationPlot(dataset, fig_height=600):
|
16
|
+
def FeatureTargetCorrelationPlot(dataset, fig_height=600) -> Tuple[go.Figure, RawData]:
|
15
17
|
"""
|
16
18
|
Visualizes the correlation between input features and the model's target output in a color-coded horizontal bar
|
17
19
|
plot.
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict, List, Tuple
|
6
|
+
|
5
7
|
from validmind import RawData, tags, tasks
|
6
8
|
from validmind.vm_models import VMDataset
|
7
9
|
|
@@ -13,7 +15,7 @@ def HighCardinality(
|
|
13
15
|
num_threshold: int = 100,
|
14
16
|
percent_threshold: float = 0.1,
|
15
17
|
threshold_type: str = "percent",
|
16
|
-
):
|
18
|
+
) -> Tuple[List[Dict[str, Any]], bool, RawData]:
|
17
19
|
"""
|
18
20
|
Assesses the number of unique values in categorical columns to detect high cardinality and potential overfitting.
|
19
21
|
|
@@ -2,6 +2,9 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
|
6
|
+
from typing import Any, Dict, List, Tuple
|
7
|
+
|
5
8
|
from validmind import RawData, tags, tasks
|
6
9
|
from validmind.vm_models import VMDataset
|
7
10
|
|
@@ -13,7 +16,7 @@ def HighPearsonCorrelation(
|
|
13
16
|
max_threshold: float = 0.3,
|
14
17
|
top_n_correlations: int = 10,
|
15
18
|
feature_columns: list = None,
|
16
|
-
):
|
19
|
+
) -> Tuple[List[Dict[str, Any]], bool, RawData]:
|
17
20
|
"""
|
18
21
|
Identifies highly correlated feature pairs in a dataset suggesting feature redundancy or multicollinearity.
|
19
22
|
|
@@ -2,6 +2,9 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
|
6
|
+
from typing import Tuple
|
7
|
+
|
5
8
|
import plotly.graph_objects as go
|
6
9
|
|
7
10
|
from validmind import RawData, tags, tasks
|
@@ -22,7 +25,7 @@ def compute_outliers(series, threshold):
|
|
22
25
|
@tasks("classification", "regression")
|
23
26
|
def IQROutliersBarPlot(
|
24
27
|
dataset: VMDataset, threshold: float = 1.5, fig_width: int = 800
|
25
|
-
):
|
28
|
+
) -> Tuple[go.Figure, RawData]:
|
26
29
|
"""
|
27
30
|
Visualizes outlier distribution across percentiles in numerical data using the Interquartile Range (IQR) method.
|
28
31
|
|
@@ -2,6 +2,9 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
|
6
|
+
from typing import Any, Dict, Tuple
|
7
|
+
|
5
8
|
from validmind import RawData, tags, tasks
|
6
9
|
from validmind.vm_models import VMDataset
|
7
10
|
|
@@ -18,7 +21,9 @@ def compute_outliers(series, threshold=1.5):
|
|
18
21
|
|
19
22
|
@tags("tabular_data", "numerical_data")
|
20
23
|
@tasks("classification", "regression")
|
21
|
-
def IQROutliersTable(
|
24
|
+
def IQROutliersTable(
|
25
|
+
dataset: VMDataset, threshold: float = 1.5
|
26
|
+
) -> Tuple[Dict[str, Any], RawData]:
|
22
27
|
"""
|
23
28
|
Determines and summarizes outliers in numerical features using the Interquartile Range method.
|
24
29
|
|
@@ -3,7 +3,9 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import itertools
|
6
|
+
from typing import Tuple
|
6
7
|
|
8
|
+
import matplotlib.figure
|
7
9
|
import matplotlib.pyplot as plt
|
8
10
|
import seaborn as sns
|
9
11
|
from sklearn.ensemble import IsolationForest
|
@@ -19,7 +21,7 @@ def IsolationForestOutliers(
|
|
19
21
|
random_state: int = 0,
|
20
22
|
contamination: float = 0.1,
|
21
23
|
feature_columns: list = None,
|
22
|
-
):
|
24
|
+
) -> Tuple[matplotlib.figure.Figure, RawData]:
|
23
25
|
"""
|
24
26
|
Detects outliers in a dataset using the Isolation Forest algorithm and visualizes results through scatter plots.
|
25
27
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
from statsmodels.stats.stattools import jarque_bera
|
7
9
|
|
@@ -10,7 +12,7 @@ from validmind import RawData, tags, tasks
|
|
10
12
|
|
11
13
|
@tasks("classification", "regression")
|
12
14
|
@tags("tabular_data", "data_distribution", "statistical_test", "statsmodels")
|
13
|
-
def JarqueBera(dataset):
|
15
|
+
def JarqueBera(dataset) -> Tuple[pd.DataFrame, RawData]:
|
14
16
|
"""
|
15
17
|
Assesses normality of dataset features in an ML model using the Jarque-Bera test.
|
16
18
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
from statsmodels.tsa.stattools import kpss
|
7
9
|
|
@@ -15,7 +17,7 @@ logger = get_logger(__name__)
|
|
15
17
|
|
16
18
|
@tags("time_series_data", "stationarity", "unit_root_test", "statsmodels")
|
17
19
|
@tasks("data_validation")
|
18
|
-
def KPSS(dataset: VMDataset):
|
20
|
+
def KPSS(dataset: VMDataset) -> Dict[str, Any]:
|
19
21
|
"""
|
20
22
|
Assesses the stationarity of time-series data in a machine learning model using the KPSS unit root test.
|
21
23
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
from statsmodels.stats.diagnostic import acorr_ljungbox
|
7
9
|
|
@@ -10,7 +12,7 @@ from validmind import RawData, tags, tasks
|
|
10
12
|
|
11
13
|
@tasks("regression")
|
12
14
|
@tags("time_series_data", "forecasting", "statistical_test", "statsmodels")
|
13
|
-
def LJungBox(dataset):
|
15
|
+
def LJungBox(dataset) -> Tuple[pd.DataFrame, RawData]:
|
14
16
|
"""
|
15
17
|
Assesses autocorrelations in dataset features by performing a Ljung-Box test on each feature.
|
16
18
|
|
@@ -2,9 +2,12 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import numpy as np
|
6
8
|
import pandas as pd
|
7
9
|
import plotly.figure_factory as ff
|
10
|
+
import plotly.graph_objects as go
|
8
11
|
|
9
12
|
from validmind import RawData, tags, tasks
|
10
13
|
from validmind.vm_models import VMDataset
|
@@ -15,7 +18,9 @@ COOLWARM = [[0, "rgb(95,5,255)"], [0.5, "rgb(255,255,255)"], [1, "rgb(255,5,0)"]
|
|
15
18
|
|
16
19
|
@tags("time_series_data", "visualization")
|
17
20
|
@tasks("regression")
|
18
|
-
def LaggedCorrelationHeatmap(
|
21
|
+
def LaggedCorrelationHeatmap(
|
22
|
+
dataset: VMDataset, num_lags: int = 10
|
23
|
+
) -> Tuple[go.Figure, RawData]:
|
19
24
|
"""
|
20
25
|
Assesses and visualizes correlation between target variable and lagged independent variables in a time-series
|
21
26
|
dataset.
|
@@ -2,13 +2,17 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict, List, Tuple
|
6
|
+
|
5
7
|
from validmind import RawData, tags, tasks
|
6
8
|
from validmind.vm_models import VMDataset
|
7
9
|
|
8
10
|
|
9
11
|
@tags("tabular_data", "data_quality")
|
10
12
|
@tasks("classification", "regression")
|
11
|
-
def MissingValues(
|
13
|
+
def MissingValues(
|
14
|
+
dataset: VMDataset, min_threshold: int = 1
|
15
|
+
) -> Tuple[List[Dict[str, Any]], bool, RawData]:
|
12
16
|
"""
|
13
17
|
Evaluates dataset quality by ensuring missing value ratio across all features does not exceed a set threshold.
|
14
18
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import plotly.graph_objects as go
|
6
8
|
|
7
9
|
from validmind import RawData, tags, tasks
|
@@ -12,7 +14,7 @@ from validmind.vm_models import VMDataset
|
|
12
14
|
@tasks("classification", "regression")
|
13
15
|
def MissingValuesBarPlot(
|
14
16
|
dataset: VMDataset, threshold: int = 80, fig_height: int = 600
|
15
|
-
):
|
17
|
+
) -> Tuple[go.Figure, RawData]:
|
16
18
|
"""
|
17
19
|
Assesses the percentage and distribution of missing values in the dataset via a bar plot, with emphasis on
|
18
20
|
identifying high-risk columns based on a user-defined threshold.
|
@@ -2,6 +2,9 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
|
6
|
+
from typing import Tuple
|
7
|
+
|
5
8
|
import plotly.graph_objects as go
|
6
9
|
from sklearn.feature_selection import mutual_info_classif, mutual_info_regression
|
7
10
|
|
@@ -14,7 +17,7 @@ from validmind.vm_models.result import RawData
|
|
14
17
|
@tasks("classification", "regression")
|
15
18
|
def MutualInformation(
|
16
19
|
dataset: VMDataset, min_threshold: float = 0.01, task: str = "classification"
|
17
|
-
):
|
20
|
+
) -> Tuple[go.Figure, RawData]:
|
18
21
|
"""
|
19
22
|
Calculates mutual information scores between features and target variable to evaluate feature relevance.
|
20
23
|
|
@@ -3,6 +3,8 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
|
6
|
+
from typing import Tuple
|
7
|
+
|
6
8
|
import plotly.graph_objects as go
|
7
9
|
|
8
10
|
from validmind import RawData, tags, tasks
|
@@ -10,7 +12,7 @@ from validmind import RawData, tags, tasks
|
|
10
12
|
|
11
13
|
@tags("tabular_data", "numerical_data", "correlation")
|
12
14
|
@tasks("classification", "regression")
|
13
|
-
def PearsonCorrelationMatrix(dataset):
|
15
|
+
def PearsonCorrelationMatrix(dataset) -> Tuple[go.Figure, RawData]:
|
14
16
|
"""
|
15
17
|
Evaluates linear dependency between numerical variables in a dataset via a Pearson Correlation coefficient heat map.
|
16
18
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict
|
6
|
+
|
5
7
|
import numpy as np
|
6
8
|
import pandas as pd
|
7
9
|
from arch.unitroot import PhillipsPerron
|
@@ -17,7 +19,7 @@ logger = get_logger(__name__)
|
|
17
19
|
|
18
20
|
@tags("time_series_data", "forecasting", "statistical_test", "unit_root_test")
|
19
21
|
@tasks("regression")
|
20
|
-
def PhillipsPerronArch(dataset: VMDataset):
|
22
|
+
def PhillipsPerronArch(dataset: VMDataset) -> Dict[str, Any]:
|
21
23
|
"""
|
22
24
|
Assesses the stationarity of time series data in each feature of the ML model using the Phillips-Perron test.
|
23
25
|
|
@@ -3,6 +3,7 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import sys
|
6
|
+
from typing import Dict, Tuple
|
6
7
|
|
7
8
|
import pandas as pd
|
8
9
|
import plotly.graph_objects as go
|
@@ -11,6 +12,7 @@ import plotly.subplots as sp
|
|
11
12
|
from validmind import RawData, tags, tasks
|
12
13
|
from validmind.errors import MissingDependencyError
|
13
14
|
from validmind.logging import get_logger
|
15
|
+
from validmind.vm_models import VMDataset, VMModel
|
14
16
|
|
15
17
|
try:
|
16
18
|
from fairlearn.metrics import (
|
@@ -33,7 +35,9 @@ logger = get_logger(__name__)
|
|
33
35
|
|
34
36
|
@tags("bias_and_fairness")
|
35
37
|
@tasks("classification", "regression")
|
36
|
-
def ProtectedClassesCombination(
|
38
|
+
def ProtectedClassesCombination(
|
39
|
+
dataset: VMDataset, model: VMModel, protected_classes=None
|
40
|
+
) -> Tuple[Dict[str, pd.DataFrame], Dict[str, pd.DataFrame], go.Figure, RawData]:
|
37
41
|
"""
|
38
42
|
Visualizes combinations of protected classes and their corresponding error metric differences.
|
39
43
|
|
@@ -3,6 +3,8 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
|
6
|
+
from typing import List, Tuple, Union
|
7
|
+
|
6
8
|
import pandas as pd
|
7
9
|
import plotly.graph_objects as go
|
8
10
|
|
@@ -14,7 +16,9 @@ logger = get_logger(__name__)
|
|
14
16
|
|
15
17
|
@tags("bias_and_fairness", "descriptive_statistics")
|
16
18
|
@tasks("classification", "regression")
|
17
|
-
def ProtectedClassesDescription(
|
19
|
+
def ProtectedClassesDescription(
|
20
|
+
dataset, protected_classes=None
|
21
|
+
) -> Tuple[pd.DataFrame, Union[go.Figure, List[go.Figure]], RawData]:
|
18
22
|
"""
|
19
23
|
Visualizes the distribution of protected classes in the dataset relative to the target variable
|
20
24
|
and provides descriptive statistics.
|