validmind 2.8.27__py3-none-any.whl → 2.8.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai/utils.py +1 -1
- validmind/models/function.py +11 -3
- validmind/tests/data_validation/ACFandPACFPlot.py +3 -1
- validmind/tests/data_validation/ADF.py +3 -1
- validmind/tests/data_validation/AutoAR.py +3 -1
- validmind/tests/data_validation/AutoMA.py +5 -1
- validmind/tests/data_validation/AutoStationarity.py +5 -1
- validmind/tests/data_validation/BivariateScatterPlots.py +3 -1
- validmind/tests/data_validation/BoxPierce.py +4 -1
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +1 -1
- validmind/tests/data_validation/ClassImbalance.py +1 -1
- validmind/tests/data_validation/DatasetDescription.py +4 -1
- validmind/tests/data_validation/DatasetSplit.py +3 -2
- validmind/tests/data_validation/DescriptiveStatistics.py +3 -1
- validmind/tests/data_validation/DickeyFullerGLS.py +3 -1
- validmind/tests/data_validation/Duplicates.py +3 -1
- validmind/tests/data_validation/EngleGrangerCoint.py +6 -1
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
- validmind/tests/data_validation/HighCardinality.py +3 -1
- validmind/tests/data_validation/HighPearsonCorrelation.py +4 -1
- validmind/tests/data_validation/IQROutliersBarPlot.py +4 -1
- validmind/tests/data_validation/IQROutliersTable.py +6 -1
- validmind/tests/data_validation/IsolationForestOutliers.py +3 -1
- validmind/tests/data_validation/JarqueBera.py +3 -1
- validmind/tests/data_validation/KPSS.py +3 -1
- validmind/tests/data_validation/LJungBox.py +3 -1
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +6 -1
- validmind/tests/data_validation/MissingValues.py +5 -1
- validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
- validmind/tests/data_validation/MutualInformation.py +4 -1
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +3 -1
- validmind/tests/data_validation/PhillipsPerronArch.py +3 -1
- validmind/tests/data_validation/ProtectedClassesCombination.py +5 -1
- validmind/tests/data_validation/ProtectedClassesDescription.py +5 -1
- validmind/tests/data_validation/ProtectedClassesDisparity.py +5 -3
- validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +9 -2
- validmind/tests/data_validation/RollingStatsPlot.py +5 -1
- validmind/tests/data_validation/RunsTest.py +1 -1
- validmind/tests/data_validation/ScatterPlot.py +2 -1
- validmind/tests/data_validation/ScoreBandDefaultRates.py +3 -1
- validmind/tests/data_validation/SeasonalDecompose.py +6 -1
- validmind/tests/data_validation/ShapiroWilk.py +4 -1
- validmind/tests/data_validation/Skewness.py +3 -1
- validmind/tests/data_validation/SpreadPlot.py +3 -1
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +4 -1
- validmind/tests/data_validation/TabularDateTimeHistograms.py +3 -1
- validmind/tests/data_validation/TabularDescriptionTables.py +4 -1
- validmind/tests/data_validation/TabularNumericalHistograms.py +3 -1
- validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
- validmind/tests/data_validation/TimeSeriesDescription.py +1 -1
- validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +1 -1
- validmind/tests/data_validation/TimeSeriesFrequency.py +5 -1
- validmind/tests/data_validation/TimeSeriesHistogram.py +4 -1
- validmind/tests/data_validation/TimeSeriesLinePlot.py +3 -1
- validmind/tests/data_validation/TimeSeriesMissingValues.py +6 -1
- validmind/tests/data_validation/TimeSeriesOutliers.py +5 -1
- validmind/tests/data_validation/TooManyZeroValues.py +6 -1
- validmind/tests/data_validation/UniqueRows.py +5 -1
- validmind/tests/data_validation/WOEBinPlots.py +4 -1
- validmind/tests/data_validation/WOEBinTable.py +5 -1
- validmind/tests/data_validation/ZivotAndrewsArch.py +3 -1
- validmind/tests/data_validation/nlp/CommonWords.py +2 -1
- validmind/tests/data_validation/nlp/Hashtags.py +2 -1
- validmind/tests/data_validation/nlp/LanguageDetection.py +4 -1
- validmind/tests/data_validation/nlp/Mentions.py +3 -1
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +6 -1
- validmind/tests/data_validation/nlp/Punctuations.py +2 -1
- validmind/tests/data_validation/nlp/Sentiment.py +3 -1
- validmind/tests/data_validation/nlp/StopWords.py +2 -1
- validmind/tests/data_validation/nlp/TextDescription.py +3 -1
- validmind/tests/data_validation/nlp/Toxicity.py +3 -1
- validmind/tests/load.py +91 -17
- validmind/tests/model_validation/BertScore.py +6 -3
- validmind/tests/model_validation/BleuScore.py +6 -1
- validmind/tests/model_validation/ClusterSizeDistribution.py +5 -1
- validmind/tests/model_validation/ContextualRecall.py +6 -1
- validmind/tests/model_validation/FeaturesAUC.py +5 -1
- validmind/tests/model_validation/MeteorScore.py +6 -1
- validmind/tests/model_validation/ModelMetadata.py +2 -1
- validmind/tests/model_validation/ModelPredictionResiduals.py +10 -2
- validmind/tests/model_validation/RegardScore.py +7 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +5 -1
- validmind/tests/model_validation/RougeScore.py +8 -1
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +8 -1
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +7 -1
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -1
- validmind/tests/model_validation/TokenDisparity.py +6 -1
- validmind/tests/model_validation/ToxicityScore.py +6 -1
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +7 -3
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +6 -1
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +4 -3
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -1
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +7 -3
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +6 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -2
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +4 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +9 -6
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +8 -5
- validmind/tests/model_validation/ragas/AspectCritic.py +11 -8
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +5 -2
- validmind/tests/model_validation/ragas/ContextPrecision.py +5 -2
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +5 -2
- validmind/tests/model_validation/ragas/ContextRecall.py +6 -2
- validmind/tests/model_validation/ragas/Faithfulness.py +9 -5
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +10 -7
- validmind/tests/model_validation/ragas/ResponseRelevancy.py +9 -6
- validmind/tests/model_validation/ragas/SemanticSimilarity.py +7 -4
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -1
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -1
- validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -1
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +5 -1
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +5 -1
- validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -1
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +4 -1
- validmind/tests/model_validation/sklearn/FeatureImportance.py +5 -1
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -1
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -1
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -4
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +3 -3
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +5 -1
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +5 -1
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +5 -1
- validmind/tests/model_validation/sklearn/ModelParameters.py +6 -1
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -1
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -2
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +4 -4
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +2 -2
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +5 -1
- validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
- validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +5 -1
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +6 -1
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +2 -2
- validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +3 -1
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +6 -1
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +2 -2
- validmind/tests/model_validation/sklearn/VMeasure.py +5 -1
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +6 -5
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +3 -1
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +6 -1
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
- validmind/tests/model_validation/statsmodels/GINITable.py +4 -1
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +5 -1
- validmind/tests/model_validation/statsmodels/Lilliefors.py +3 -1
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +4 -1
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -2
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +5 -4
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +4 -1
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +3 -2
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +5 -1
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +6 -1
- validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +2 -2
- validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +2 -2
- validmind/tests/ongoing_monitoring/FeatureDrift.py +5 -2
- validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +6 -1
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +8 -1
- validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +2 -2
- validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +6 -1
- validmind/tests/ongoing_monitoring/ROCCurveDrift.py +4 -2
- validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +2 -2
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +8 -1
- validmind/tests/prompt_validation/Bias.py +5 -1
- validmind/tests/prompt_validation/Clarity.py +5 -1
- validmind/tests/prompt_validation/Conciseness.py +5 -1
- validmind/tests/prompt_validation/Delimitation.py +5 -1
- validmind/tests/prompt_validation/NegativeInstruction.py +5 -1
- validmind/tests/prompt_validation/Robustness.py +5 -1
- validmind/tests/prompt_validation/Specificity.py +5 -1
- validmind/unit_metrics/classification/Accuracy.py +2 -1
- validmind/unit_metrics/classification/F1.py +2 -1
- validmind/unit_metrics/classification/Precision.py +2 -1
- validmind/unit_metrics/classification/ROC_AUC.py +2 -1
- validmind/unit_metrics/classification/Recall.py +2 -1
- validmind/unit_metrics/regression/AdjustedRSquaredScore.py +2 -1
- validmind/unit_metrics/regression/GiniCoefficient.py +2 -1
- validmind/unit_metrics/regression/HuberLoss.py +2 -1
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +2 -1
- validmind/unit_metrics/regression/MeanAbsoluteError.py +2 -1
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +2 -1
- validmind/unit_metrics/regression/MeanBiasDeviation.py +2 -1
- validmind/unit_metrics/regression/MeanSquaredError.py +2 -1
- validmind/unit_metrics/regression/QuantileLoss.py +1 -1
- validmind/unit_metrics/regression/RSquaredScore.py +2 -1
- validmind/unit_metrics/regression/RootMeanSquaredError.py +2 -1
- validmind/vm_models/dataset/dataset.py +145 -38
- validmind/vm_models/result/result.py +14 -12
- {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/METADATA +1 -1
- {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/RECORD +207 -207
- {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/LICENSE +0 -0
- {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/WHEEL +0 -0
- {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/entry_points.txt +0 -0
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
import plotly.graph_objects as go
|
7
9
|
|
@@ -13,7 +15,7 @@ from validmind.vm_models import VMDataset, VMModel
|
|
13
15
|
@tasks("classification")
|
14
16
|
def ScoreProbabilityAlignment(
|
15
17
|
model: VMModel, dataset: VMDataset, score_column: str = "score", n_bins: int = 10
|
16
|
-
):
|
18
|
+
) -> Tuple[pd.DataFrame, go.Figure, RawData]:
|
17
19
|
"""
|
18
20
|
Analyzes the alignment between credit scores and predicted probabilities.
|
19
21
|
|
@@ -2,8 +2,11 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Dict, Tuple
|
6
|
+
|
5
7
|
import matplotlib.pyplot as plt
|
6
8
|
import numpy as np
|
9
|
+
import plotly.graph_objects as go
|
7
10
|
from sklearn.metrics import silhouette_samples, silhouette_score
|
8
11
|
|
9
12
|
from validmind import RawData, tags, tasks
|
@@ -12,7 +15,9 @@ from validmind.vm_models import VMDataset, VMModel
|
|
12
15
|
|
13
16
|
@tags("sklearn", "model_performance")
|
14
17
|
@tasks("clustering")
|
15
|
-
def SilhouettePlot(
|
18
|
+
def SilhouettePlot(
|
19
|
+
model: VMModel, dataset: VMDataset
|
20
|
+
) -> Tuple[Dict[str, float], go.Figure, RawData]:
|
16
21
|
"""
|
17
22
|
Calculates and visualizes Silhouette Score, assessing the degree of data point suitability to its cluster in ML
|
18
23
|
models.
|
@@ -2,7 +2,7 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from typing import List
|
5
|
+
from typing import Dict, List, Tuple
|
6
6
|
|
7
7
|
from numpy import unique
|
8
8
|
from sklearn.metrics import classification_report
|
@@ -22,7 +22,7 @@ from validmind.vm_models import VMDataset, VMModel
|
|
22
22
|
@tasks("classification", "text_classification")
|
23
23
|
def TrainingTestDegradation(
|
24
24
|
datasets: List[VMDataset], model: VMModel, max_threshold: float = 0.10
|
25
|
-
):
|
25
|
+
) -> Tuple[List[Dict[str, float]], bool, RawData]:
|
26
26
|
"""
|
27
27
|
Tests if model performance degradation between training and test datasets exceeds a predefined threshold.
|
28
28
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Dict, List, Tuple
|
6
|
+
|
5
7
|
from sklearn import metrics
|
6
8
|
|
7
9
|
from validmind import RawData, tags, tasks
|
@@ -10,7 +12,9 @@ from validmind.vm_models import VMDataset, VMModel
|
|
10
12
|
|
11
13
|
@tags("sklearn", "model_performance")
|
12
14
|
@tasks("clustering")
|
13
|
-
def VMeasure(
|
15
|
+
def VMeasure(
|
16
|
+
dataset: VMDataset, model: VMModel
|
17
|
+
) -> Tuple[List[Dict[str, float]], RawData]:
|
14
18
|
"""
|
15
19
|
Evaluates homogeneity and completeness of a clustering model using the V Measure Score.
|
16
20
|
|
@@ -2,10 +2,11 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from typing import Callable, Dict, List,
|
5
|
+
from typing import Callable, Dict, List, Optional, Tuple
|
6
6
|
|
7
7
|
import matplotlib.pyplot as plt
|
8
8
|
import pandas as pd
|
9
|
+
import plotly.graph_objects as go
|
9
10
|
import seaborn as sns
|
10
11
|
from sklearn import metrics
|
11
12
|
|
@@ -157,10 +158,10 @@ def _plot_weak_spots(
|
|
157
158
|
def WeakspotsDiagnosis(
|
158
159
|
datasets: List[VMDataset],
|
159
160
|
model: VMModel,
|
160
|
-
features_columns:
|
161
|
-
metrics:
|
162
|
-
thresholds:
|
163
|
-
):
|
161
|
+
features_columns: Optional[List[str]] = None,
|
162
|
+
metrics: Optional[Dict[str, Callable]] = None,
|
163
|
+
thresholds: Optional[Dict[str, float]] = None,
|
164
|
+
) -> Tuple[pd.DataFrame, go.Figure, bool]:
|
164
165
|
"""
|
165
166
|
Identifies and visualizes weak spots in a machine learning model's performance across various sections of the
|
166
167
|
feature space.
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Dict, List
|
6
|
+
|
5
7
|
from statsmodels.tsa.arima.model import ARIMA
|
6
8
|
from statsmodels.tsa.stattools import adfuller
|
7
9
|
|
@@ -14,7 +16,7 @@ logger = get_logger(__name__)
|
|
14
16
|
|
15
17
|
@tags("time_series_data", "forecasting", "model_selection", "statsmodels")
|
16
18
|
@tasks("regression")
|
17
|
-
def AutoARIMA(model: VMModel, dataset: VMDataset):
|
19
|
+
def AutoARIMA(model: VMModel, dataset: VMDataset) -> List[Dict[str, float]]:
|
18
20
|
"""
|
19
21
|
Evaluates ARIMA models for time-series forecasting, ranking them using Bayesian and Akaike Information Criteria.
|
20
22
|
|
@@ -2,16 +2,21 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import numpy as np
|
6
8
|
import plotly.graph_objects as go
|
7
9
|
from matplotlib import cm
|
8
10
|
|
9
11
|
from validmind import RawData, tags, tasks
|
12
|
+
from validmind.vm_models import VMDataset, VMModel
|
10
13
|
|
11
14
|
|
12
15
|
@tags("visualization", "credit_risk")
|
13
16
|
@tasks("classification")
|
14
|
-
def CumulativePredictionProbabilities(
|
17
|
+
def CumulativePredictionProbabilities(
|
18
|
+
dataset: VMDataset, model: VMModel, title: str = "Cumulative Probabilities"
|
19
|
+
) -> Tuple[go.Figure, RawData]:
|
15
20
|
"""
|
16
21
|
Visualizes cumulative probabilities of positive and negative classes for both training and testing in classification models.
|
17
22
|
|
@@ -2,15 +2,20 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import List, Tuple
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
from statsmodels.stats.stattools import durbin_watson
|
7
9
|
|
8
10
|
from validmind import RawData, tags, tasks
|
11
|
+
from validmind.vm_models import VMDataset, VMModel
|
9
12
|
|
10
13
|
|
11
14
|
@tasks("regression")
|
12
15
|
@tags("time_series_data", "forecasting", "statistical_test", "statsmodels")
|
13
|
-
def DurbinWatsonTest(
|
16
|
+
def DurbinWatsonTest(
|
17
|
+
dataset: VMDataset, model: VMModel, threshold: List[float] = [1.5, 2.5]
|
18
|
+
) -> Tuple[pd.DataFrame, RawData]:
|
14
19
|
"""
|
15
20
|
Assesses autocorrelation in time series data features using the Durbin-Watson statistic.
|
16
21
|
|
@@ -2,16 +2,19 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import numpy as np
|
6
8
|
import pandas as pd
|
7
9
|
from sklearn.metrics import roc_auc_score, roc_curve
|
8
10
|
|
9
11
|
from validmind import RawData, tags, tasks
|
12
|
+
from validmind.vm_models import VMDataset, VMModel
|
10
13
|
|
11
14
|
|
12
15
|
@tags("model_performance")
|
13
16
|
@tasks("classification")
|
14
|
-
def GINITable(dataset, model):
|
17
|
+
def GINITable(dataset: VMDataset, model: VMModel) -> Tuple[pd.DataFrame, RawData]:
|
15
18
|
"""
|
16
19
|
Evaluates classification model performance using AUC, GINI, and KS metrics for training and test datasets.
|
17
20
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Dict, List, Tuple
|
6
|
+
|
5
7
|
from statsmodels.stats.diagnostic import kstest_normal
|
6
8
|
|
7
9
|
from validmind import RawData, tags, tasks
|
@@ -11,7 +13,9 @@ from validmind.vm_models import VMDataset, VMModel
|
|
11
13
|
|
12
14
|
@tags("tabular_data", "data_distribution", "statistical_test", "statsmodels")
|
13
15
|
@tasks("classification", "regression")
|
14
|
-
def KolmogorovSmirnov(
|
16
|
+
def KolmogorovSmirnov(
|
17
|
+
model: VMModel, dataset: VMDataset, dist: str = "norm"
|
18
|
+
) -> Tuple[List[Dict[str, float]], RawData]:
|
15
19
|
"""
|
16
20
|
Assesses whether each feature in the dataset aligns with a normal distribution using the Kolmogorov-Smirnov test.
|
17
21
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Dict, List
|
6
|
+
|
5
7
|
from statsmodels.stats.diagnostic import lilliefors
|
6
8
|
|
7
9
|
from validmind import tags, tasks
|
@@ -10,7 +12,7 @@ from validmind.vm_models import VMDataset
|
|
10
12
|
|
11
13
|
@tags("tabular_data", "data_distribution", "statistical_test", "statsmodels")
|
12
14
|
@tasks("classification", "regression")
|
13
|
-
def Lilliefors(dataset: VMDataset):
|
15
|
+
def Lilliefors(dataset: VMDataset) -> List[Dict[str, float]]:
|
14
16
|
"""
|
15
17
|
Assesses the normality of feature distributions in an ML model's training dataset using the Lilliefors test.
|
16
18
|
|
@@ -2,18 +2,22 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
5
6
|
|
6
7
|
import plotly.graph_objects as go
|
7
8
|
from matplotlib import cm
|
8
9
|
|
9
10
|
from validmind import RawData, tags, tasks
|
11
|
+
from validmind.vm_models import VMDataset, VMModel
|
10
12
|
|
11
13
|
|
12
14
|
@tags("visualization", "credit_risk")
|
13
15
|
@tasks("classification")
|
14
16
|
def PredictionProbabilitiesHistogram(
|
15
|
-
dataset
|
16
|
-
|
17
|
+
dataset: VMDataset,
|
18
|
+
model: VMModel,
|
19
|
+
title: str = "Histogram of Predictive Probabilities",
|
20
|
+
) -> Tuple[go.Figure, RawData]:
|
17
21
|
"""
|
18
22
|
Assesses the predictive probability distribution for binary classification to evaluate model performance and
|
19
23
|
potential overfitting or bias.
|
@@ -3,17 +3,20 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
|
6
|
+
from typing import Tuple
|
7
|
+
|
6
8
|
import pandas as pd
|
7
9
|
import plotly.graph_objects as go
|
8
10
|
from scipy import stats
|
9
11
|
|
10
12
|
from validmind import RawData, tags, tasks
|
11
13
|
from validmind.errors import SkipTestError
|
14
|
+
from validmind.vm_models import VMModel
|
12
15
|
|
13
16
|
|
14
17
|
@tags("tabular_data", "visualization", "model_training")
|
15
18
|
@tasks("regression")
|
16
|
-
def RegressionCoeffs(model):
|
19
|
+
def RegressionCoeffs(model: VMModel) -> Tuple[go.Figure, RawData, pd.DataFrame]:
|
17
20
|
"""
|
18
21
|
Assesses the significance and uncertainty of predictor variables in a regression model through visualization of
|
19
22
|
coefficients and their 95% confidence intervals.
|
@@ -2,7 +2,10 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import matplotlib.pyplot as plt
|
8
|
+
import plotly.graph_objects as go
|
6
9
|
import seaborn as sns
|
7
10
|
|
8
11
|
from validmind import RawData, tags, tasks
|
@@ -16,8 +19,10 @@ logger = get_logger(__name__)
|
|
16
19
|
@tags("statistical_test", "model_interpretation", "visualization", "feature_importance")
|
17
20
|
@tasks("regression")
|
18
21
|
def RegressionFeatureSignificance(
|
19
|
-
model: VMModel,
|
20
|
-
|
22
|
+
model: VMModel,
|
23
|
+
fontsize: int = 10,
|
24
|
+
p_threshold: float = 0.05,
|
25
|
+
) -> Tuple[go.Figure, RawData]:
|
21
26
|
"""
|
22
27
|
Assesses and visualizes the statistical significance of features in a regression model.
|
23
28
|
|
@@ -2,10 +2,11 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from typing import
|
5
|
+
from typing import Optional, Tuple
|
6
6
|
|
7
7
|
import matplotlib.pyplot as plt
|
8
8
|
import pandas as pd
|
9
|
+
import plotly.graph_objects as go
|
9
10
|
|
10
11
|
from validmind import RawData, tags, tasks
|
11
12
|
from validmind.logging import get_logger
|
@@ -19,9 +20,9 @@ logger = get_logger(__name__)
|
|
19
20
|
def RegressionModelForecastPlot(
|
20
21
|
model: VMModel,
|
21
22
|
dataset: VMDataset,
|
22
|
-
start_date:
|
23
|
-
end_date:
|
24
|
-
):
|
23
|
+
start_date: Optional[str] = None,
|
24
|
+
end_date: Optional[str] = None,
|
25
|
+
) -> Tuple[go.Figure, RawData]:
|
25
26
|
"""
|
26
27
|
Generates plots to visually compare the forecasted outcomes of a regression model against actual observed values over
|
27
28
|
a specified date range.
|
@@ -2,9 +2,12 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import matplotlib.pyplot as plt
|
6
8
|
import numpy as np
|
7
9
|
import pandas as pd
|
10
|
+
import plotly.graph_objects as go
|
8
11
|
|
9
12
|
from validmind import RawData, tags, tasks
|
10
13
|
from validmind.vm_models import VMDataset, VMModel
|
@@ -23,7 +26,7 @@ def integrate_diff(series_diff, start_value):
|
|
23
26
|
def RegressionModelForecastPlotLevels(
|
24
27
|
model: VMModel,
|
25
28
|
dataset: VMDataset,
|
26
|
-
):
|
29
|
+
) -> Tuple[go.Figure, RawData]:
|
27
30
|
"""
|
28
31
|
Assesses the alignment between forecasted and observed values in regression models through visual plots
|
29
32
|
|
@@ -2,10 +2,11 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from typing import List, Union
|
5
|
+
from typing import List, Tuple, Union
|
6
6
|
|
7
7
|
import matplotlib.pyplot as plt
|
8
8
|
import numpy as np
|
9
|
+
import plotly.graph_objects as go
|
9
10
|
|
10
11
|
from validmind import RawData, tags, tasks
|
11
12
|
from validmind.logging import get_logger
|
@@ -29,7 +30,7 @@ def RegressionModelSensitivityPlot(
|
|
29
30
|
model: VMModel,
|
30
31
|
shocks: List[float] = [0.1],
|
31
32
|
transformation: Union[str, None] = None,
|
32
|
-
):
|
33
|
+
) -> Tuple[go.Figure, RawData]:
|
33
34
|
"""
|
34
35
|
Assesses the sensitivity of a regression model to changes in independent variables by applying shocks and
|
35
36
|
visualizing the impact.
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Dict, List, Tuple
|
6
|
+
|
5
7
|
from sklearn.metrics import mean_squared_error, r2_score
|
6
8
|
|
7
9
|
from validmind import RawData, tags, tasks
|
@@ -12,7 +14,9 @@ from .statsutils import adj_r2_score
|
|
12
14
|
|
13
15
|
@tags("model_performance", "regression")
|
14
16
|
@tasks("regression")
|
15
|
-
def RegressionModelSummary(
|
17
|
+
def RegressionModelSummary(
|
18
|
+
dataset: VMDataset, model: VMModel
|
19
|
+
) -> Tuple[List[Dict[str, float]], RawData]:
|
16
20
|
"""
|
17
21
|
Evaluates regression model performance using metrics including R-Squared, Adjusted R-Squared, MSE, and RMSE.
|
18
22
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import numpy as np
|
6
8
|
import pandas as pd
|
7
9
|
import plotly.graph_objects as go
|
@@ -19,7 +21,7 @@ logger = get_logger(__name__)
|
|
19
21
|
@tasks("regression")
|
20
22
|
def RegressionPermutationFeatureImportance(
|
21
23
|
dataset: VMDataset, model: VMModel, fontsize: int = 12, figure_height: int = 500
|
22
|
-
):
|
24
|
+
) -> Tuple[go.Figure, RawData]:
|
23
25
|
"""
|
24
26
|
Assesses the significance of each feature in a model by evaluating the impact on model performance when feature
|
25
27
|
values are randomly rearranged.
|
@@ -6,11 +6,16 @@ import plotly.graph_objects as go
|
|
6
6
|
from matplotlib import cm
|
7
7
|
|
8
8
|
from validmind import tags, tasks
|
9
|
+
from validmind.vm_models import VMDataset
|
9
10
|
|
10
11
|
|
11
12
|
@tags("visualization", "credit_risk", "logistic_regression")
|
12
13
|
@tasks("classification")
|
13
|
-
def ScorecardHistogram(
|
14
|
+
def ScorecardHistogram(
|
15
|
+
dataset: VMDataset,
|
16
|
+
title: str = "Histogram of Scores",
|
17
|
+
score_column: str = "score",
|
18
|
+
) -> go.Figure:
|
14
19
|
"""
|
15
20
|
The Scorecard Histogram test evaluates the distribution of credit scores between default and non-default instances,
|
16
21
|
providing critical insights into the performance and generalizability of credit-risk models.
|
@@ -2,7 +2,7 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from typing import List
|
5
|
+
from typing import Dict, List, Tuple
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import pandas as pd
|
@@ -26,7 +26,7 @@ def CalibrationCurveDrift(
|
|
26
26
|
model: VMModel,
|
27
27
|
n_bins: int = 10,
|
28
28
|
drift_pct_threshold: float = 20,
|
29
|
-
):
|
29
|
+
) -> Tuple[go.Figure, Dict[str, pd.DataFrame], bool, RawData]:
|
30
30
|
"""
|
31
31
|
Evaluates changes in probability calibration between reference and monitoring datasets.
|
32
32
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from typing import List
|
5
|
+
from typing import Dict, List, Tuple
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import pandas as pd
|
@@ -38,7 +38,7 @@ def calculate_ks_statistic(y_true, y_prob):
|
|
38
38
|
@tasks("classification", "text_classification")
|
39
39
|
def ClassDiscriminationDrift(
|
40
40
|
datasets: List[VMDataset], model: VMModel, drift_pct_threshold=20
|
41
|
-
):
|
41
|
+
) -> Tuple[Dict[str, pd.DataFrame], bool]:
|
42
42
|
"""
|
43
43
|
Compares classification discrimination metrics between reference and monitoring datasets.
|
44
44
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from typing import List
|
5
|
+
from typing import Dict, List, Tuple
|
6
6
|
|
7
7
|
import pandas as pd
|
8
8
|
import plotly.graph_objs as go
|
@@ -18,7 +18,7 @@ def ClassImbalanceDrift(
|
|
18
18
|
datasets: List[VMDataset],
|
19
19
|
drift_pct_threshold: float = 5.0,
|
20
20
|
title: str = "Class Distribution Drift",
|
21
|
-
):
|
21
|
+
) -> Tuple[go.Figure, Dict[str, pd.DataFrame], bool]:
|
22
22
|
"""
|
23
23
|
Evaluates drift in class distribution between reference and monitoring datasets.
|
24
24
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from typing import List
|
5
|
+
from typing import Dict, List, Tuple
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import pandas as pd
|
@@ -18,7 +18,7 @@ from validmind.vm_models import VMDataset, VMModel
|
|
18
18
|
@tasks("classification", "text_classification")
|
19
19
|
def ClassificationAccuracyDrift(
|
20
20
|
datasets: List[VMDataset], model: VMModel, drift_pct_threshold=20
|
21
|
-
):
|
21
|
+
) -> Tuple[Dict[str, pd.DataFrame], bool, RawData]:
|
22
22
|
"""
|
23
23
|
Compares classification accuracy metrics between reference and monitoring datasets.
|
24
24
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from typing import List
|
5
|
+
from typing import Dict, List, Tuple
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import pandas as pd
|
@@ -18,7 +18,7 @@ from validmind.vm_models import VMDataset, VMModel
|
|
18
18
|
@tasks("classification", "text_classification")
|
19
19
|
def ConfusionMatrixDrift(
|
20
20
|
datasets: List[VMDataset], model: VMModel, drift_pct_threshold=20
|
21
|
-
):
|
21
|
+
) -> Tuple[Dict[str, pd.DataFrame], bool, RawData]:
|
22
22
|
"""
|
23
23
|
Compares confusion matrix metrics between reference and monitoring datasets.
|
24
24
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from typing import List
|
5
|
+
from typing import List, Tuple
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import plotly.graph_objects as go
|
@@ -17,7 +17,7 @@ from validmind.vm_models import VMDataset, VMModel
|
|
17
17
|
def CumulativePredictionProbabilitiesDrift(
|
18
18
|
datasets: List[VMDataset],
|
19
19
|
model: VMModel,
|
20
|
-
):
|
20
|
+
) -> Tuple[go.Figure, RawData]:
|
21
21
|
"""
|
22
22
|
Compares cumulative prediction probability distributions between reference and monitoring datasets.
|
23
23
|
|
@@ -2,11 +2,14 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Dict, List, Tuple
|
6
|
+
|
5
7
|
import numpy as np
|
6
8
|
import pandas as pd
|
7
9
|
import plotly.graph_objects as go
|
8
10
|
|
9
11
|
from validmind import RawData, tags, tasks
|
12
|
+
from validmind.vm_models import VMDataset
|
10
13
|
|
11
14
|
|
12
15
|
def calculate_psi_score(actual, expected):
|
@@ -92,11 +95,11 @@ def create_distribution_plot(feature_name, reference_dist, monitoring_dist, bins
|
|
92
95
|
@tags("visualization")
|
93
96
|
@tasks("monitoring")
|
94
97
|
def FeatureDrift(
|
95
|
-
datasets,
|
98
|
+
datasets: List[VMDataset],
|
96
99
|
bins=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
|
97
100
|
feature_columns=None,
|
98
101
|
psi_threshold=0.2,
|
99
|
-
):
|
102
|
+
) -> Tuple[Dict[str, pd.DataFrame], go.Figure, bool, RawData]:
|
100
103
|
"""
|
101
104
|
Evaluates changes in feature distribution over time to identify potential model drift.
|
102
105
|
|
@@ -3,14 +3,19 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
|
6
|
+
from typing import List, Tuple
|
7
|
+
|
6
8
|
import matplotlib.pyplot as plt
|
7
9
|
|
8
10
|
from validmind import RawData, tags, tasks
|
11
|
+
from validmind.vm_models import VMDataset, VMModel
|
9
12
|
|
10
13
|
|
11
14
|
@tags("visualization")
|
12
15
|
@tasks("monitoring")
|
13
|
-
def PredictionAcrossEachFeature(
|
16
|
+
def PredictionAcrossEachFeature(
|
17
|
+
datasets: List[VMDataset], model: VMModel
|
18
|
+
) -> Tuple[plt.Figure, RawData]:
|
14
19
|
"""
|
15
20
|
Assesses differences in model predictions across individual features between reference and monitoring datasets
|
16
21
|
through visual analysis.
|
@@ -2,15 +2,22 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Dict, List, Tuple
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
import plotly.graph_objects as go
|
7
9
|
|
8
10
|
from validmind import RawData, tags, tasks
|
11
|
+
from validmind.vm_models import VMDataset, VMModel
|
9
12
|
|
10
13
|
|
11
14
|
@tags("visualization")
|
12
15
|
@tasks("monitoring")
|
13
|
-
def PredictionCorrelation(
|
16
|
+
def PredictionCorrelation(
|
17
|
+
datasets: List[VMDataset],
|
18
|
+
model: VMModel,
|
19
|
+
drift_pct_threshold: float = 20,
|
20
|
+
) -> Tuple[Dict[str, pd.DataFrame], go.Figure, bool, RawData]:
|
14
21
|
"""
|
15
22
|
Assesses correlation changes between model predictions from reference and monitoring datasets to detect potential
|
16
23
|
target drift.
|
@@ -2,7 +2,7 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from typing import List
|
5
|
+
from typing import Dict, List, Tuple
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import pandas as pd
|
@@ -21,7 +21,7 @@ def PredictionProbabilitiesHistogramDrift(
|
|
21
21
|
model: VMModel,
|
22
22
|
title="Prediction Probabilities Histogram Drift",
|
23
23
|
drift_pct_threshold: float = 20.0,
|
24
|
-
):
|
24
|
+
) -> Tuple[go.Figure, Dict[str, pd.DataFrame], bool, RawData]:
|
25
25
|
"""
|
26
26
|
Compares prediction probability distributions between reference and monitoring datasets.
|
27
27
|
|