validmind 2.8.28__py3-none-any.whl → 2.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai/utils.py +4 -24
- validmind/api_client.py +6 -17
- validmind/logging.py +48 -0
- validmind/models/function.py +11 -3
- validmind/tests/__init__.py +2 -0
- validmind/tests/__types__.py +18 -0
- validmind/tests/data_validation/ACFandPACFPlot.py +3 -1
- validmind/tests/data_validation/ADF.py +3 -1
- validmind/tests/data_validation/AutoAR.py +3 -1
- validmind/tests/data_validation/AutoMA.py +5 -1
- validmind/tests/data_validation/AutoStationarity.py +5 -1
- validmind/tests/data_validation/BivariateScatterPlots.py +3 -1
- validmind/tests/data_validation/BoxPierce.py +4 -1
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +1 -1
- validmind/tests/data_validation/ClassImbalance.py +1 -1
- validmind/tests/data_validation/DatasetDescription.py +4 -1
- validmind/tests/data_validation/DatasetSplit.py +3 -2
- validmind/tests/data_validation/DescriptiveStatistics.py +3 -1
- validmind/tests/data_validation/DickeyFullerGLS.py +3 -1
- validmind/tests/data_validation/Duplicates.py +3 -1
- validmind/tests/data_validation/EngleGrangerCoint.py +6 -1
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
- validmind/tests/data_validation/HighCardinality.py +3 -1
- validmind/tests/data_validation/HighPearsonCorrelation.py +4 -1
- validmind/tests/data_validation/IQROutliersBarPlot.py +4 -1
- validmind/tests/data_validation/IQROutliersTable.py +6 -1
- validmind/tests/data_validation/IsolationForestOutliers.py +3 -1
- validmind/tests/data_validation/JarqueBera.py +3 -1
- validmind/tests/data_validation/KPSS.py +3 -1
- validmind/tests/data_validation/LJungBox.py +3 -1
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +6 -1
- validmind/tests/data_validation/MissingValues.py +5 -1
- validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
- validmind/tests/data_validation/MutualInformation.py +4 -1
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +3 -1
- validmind/tests/data_validation/PhillipsPerronArch.py +3 -1
- validmind/tests/data_validation/ProtectedClassesCombination.py +5 -1
- validmind/tests/data_validation/ProtectedClassesDescription.py +5 -1
- validmind/tests/data_validation/ProtectedClassesDisparity.py +5 -3
- validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +9 -2
- validmind/tests/data_validation/RollingStatsPlot.py +5 -1
- validmind/tests/data_validation/RunsTest.py +1 -1
- validmind/tests/data_validation/ScatterPlot.py +2 -1
- validmind/tests/data_validation/ScoreBandDefaultRates.py +3 -1
- validmind/tests/data_validation/SeasonalDecompose.py +6 -1
- validmind/tests/data_validation/ShapiroWilk.py +4 -1
- validmind/tests/data_validation/Skewness.py +3 -1
- validmind/tests/data_validation/SpreadPlot.py +3 -1
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +4 -1
- validmind/tests/data_validation/TabularDateTimeHistograms.py +3 -1
- validmind/tests/data_validation/TabularDescriptionTables.py +4 -1
- validmind/tests/data_validation/TabularNumericalHistograms.py +3 -1
- validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
- validmind/tests/data_validation/TimeSeriesDescription.py +1 -1
- validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +1 -1
- validmind/tests/data_validation/TimeSeriesFrequency.py +5 -1
- validmind/tests/data_validation/TimeSeriesHistogram.py +4 -1
- validmind/tests/data_validation/TimeSeriesLinePlot.py +3 -1
- validmind/tests/data_validation/TimeSeriesMissingValues.py +6 -1
- validmind/tests/data_validation/TimeSeriesOutliers.py +5 -1
- validmind/tests/data_validation/TooManyZeroValues.py +6 -1
- validmind/tests/data_validation/UniqueRows.py +5 -1
- validmind/tests/data_validation/WOEBinPlots.py +4 -1
- validmind/tests/data_validation/WOEBinTable.py +5 -1
- validmind/tests/data_validation/ZivotAndrewsArch.py +3 -1
- validmind/tests/data_validation/nlp/CommonWords.py +2 -1
- validmind/tests/data_validation/nlp/Hashtags.py +2 -1
- validmind/tests/data_validation/nlp/LanguageDetection.py +4 -1
- validmind/tests/data_validation/nlp/Mentions.py +3 -1
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +6 -1
- validmind/tests/data_validation/nlp/Punctuations.py +2 -1
- validmind/tests/data_validation/nlp/Sentiment.py +3 -1
- validmind/tests/data_validation/nlp/StopWords.py +2 -1
- validmind/tests/data_validation/nlp/TextDescription.py +3 -1
- validmind/tests/data_validation/nlp/Toxicity.py +3 -1
- validmind/tests/load.py +91 -17
- validmind/tests/model_validation/BertScore.py +6 -3
- validmind/tests/model_validation/BleuScore.py +6 -1
- validmind/tests/model_validation/ClusterSizeDistribution.py +5 -1
- validmind/tests/model_validation/ContextualRecall.py +6 -1
- validmind/tests/model_validation/FeaturesAUC.py +5 -1
- validmind/tests/model_validation/MeteorScore.py +6 -1
- validmind/tests/model_validation/ModelMetadata.py +2 -1
- validmind/tests/model_validation/ModelPredictionResiduals.py +10 -2
- validmind/tests/model_validation/RegardScore.py +7 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +5 -1
- validmind/tests/model_validation/RougeScore.py +8 -1
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +8 -1
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +7 -1
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -1
- validmind/tests/model_validation/TokenDisparity.py +6 -1
- validmind/tests/model_validation/ToxicityScore.py +6 -1
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +7 -3
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +6 -1
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +4 -3
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -1
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +7 -3
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +6 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -2
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +4 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +9 -6
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +8 -5
- validmind/tests/model_validation/ragas/AspectCritic.py +11 -8
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +5 -2
- validmind/tests/model_validation/ragas/ContextPrecision.py +5 -2
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +5 -2
- validmind/tests/model_validation/ragas/ContextRecall.py +6 -2
- validmind/tests/model_validation/ragas/Faithfulness.py +9 -5
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +10 -7
- validmind/tests/model_validation/ragas/ResponseRelevancy.py +9 -6
- validmind/tests/model_validation/ragas/SemanticSimilarity.py +7 -4
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -1
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -1
- validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -1
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +5 -1
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +5 -1
- validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -1
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +4 -1
- validmind/tests/model_validation/sklearn/FeatureImportance.py +5 -1
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -1
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -1
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -4
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +3 -3
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +5 -1
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +5 -1
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +5 -1
- validmind/tests/model_validation/sklearn/ModelParameters.py +6 -1
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -1
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -2
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +4 -4
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +2 -2
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +5 -1
- validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
- validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +5 -1
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +6 -1
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +2 -2
- validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +3 -1
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +6 -1
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +2 -2
- validmind/tests/model_validation/sklearn/VMeasure.py +5 -1
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +6 -5
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +3 -1
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +6 -1
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
- validmind/tests/model_validation/statsmodels/GINITable.py +4 -1
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +5 -1
- validmind/tests/model_validation/statsmodels/Lilliefors.py +3 -1
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +4 -1
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -2
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +5 -4
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +4 -1
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +3 -2
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +5 -1
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +6 -1
- validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +2 -2
- validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +2 -2
- validmind/tests/ongoing_monitoring/FeatureDrift.py +5 -2
- validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +6 -1
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +8 -1
- validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +2 -2
- validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +6 -1
- validmind/tests/ongoing_monitoring/ROCCurveDrift.py +4 -2
- validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +2 -2
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +8 -1
- validmind/tests/output.py +9 -2
- validmind/tests/plots/BoxPlot.py +260 -0
- validmind/tests/plots/CorrelationHeatmap.py +235 -0
- validmind/tests/plots/HistogramPlot.py +233 -0
- validmind/tests/plots/ViolinPlot.py +125 -0
- validmind/tests/plots/__init__.py +0 -0
- validmind/tests/prompt_validation/Bias.py +5 -1
- validmind/tests/prompt_validation/Clarity.py +5 -1
- validmind/tests/prompt_validation/Conciseness.py +5 -1
- validmind/tests/prompt_validation/Delimitation.py +5 -1
- validmind/tests/prompt_validation/NegativeInstruction.py +5 -1
- validmind/tests/prompt_validation/Robustness.py +5 -1
- validmind/tests/prompt_validation/Specificity.py +5 -1
- validmind/tests/stats/CorrelationAnalysis.py +251 -0
- validmind/tests/stats/DescriptiveStats.py +197 -0
- validmind/tests/stats/NormalityTests.py +147 -0
- validmind/tests/stats/OutlierDetection.py +173 -0
- validmind/tests/stats/__init__.py +0 -0
- validmind/unit_metrics/classification/Accuracy.py +2 -1
- validmind/unit_metrics/classification/F1.py +2 -1
- validmind/unit_metrics/classification/Precision.py +2 -1
- validmind/unit_metrics/classification/ROC_AUC.py +2 -1
- validmind/unit_metrics/classification/Recall.py +2 -1
- validmind/unit_metrics/classification/individual/AbsoluteError.py +42 -0
- validmind/unit_metrics/classification/individual/BrierScore.py +56 -0
- validmind/unit_metrics/classification/individual/CalibrationError.py +77 -0
- validmind/unit_metrics/classification/individual/ClassBalance.py +65 -0
- validmind/unit_metrics/classification/individual/Confidence.py +52 -0
- validmind/unit_metrics/classification/individual/Correctness.py +41 -0
- validmind/unit_metrics/classification/individual/LogLoss.py +61 -0
- validmind/unit_metrics/classification/individual/OutlierScore.py +86 -0
- validmind/unit_metrics/classification/individual/ProbabilityError.py +54 -0
- validmind/unit_metrics/classification/individual/Uncertainty.py +60 -0
- validmind/unit_metrics/classification/individual/__init__.py +0 -0
- validmind/unit_metrics/regression/AdjustedRSquaredScore.py +2 -1
- validmind/unit_metrics/regression/GiniCoefficient.py +2 -1
- validmind/unit_metrics/regression/HuberLoss.py +2 -1
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +2 -1
- validmind/unit_metrics/regression/MeanAbsoluteError.py +2 -1
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +2 -1
- validmind/unit_metrics/regression/MeanBiasDeviation.py +2 -1
- validmind/unit_metrics/regression/MeanSquaredError.py +2 -1
- validmind/unit_metrics/regression/QuantileLoss.py +1 -1
- validmind/unit_metrics/regression/RSquaredScore.py +2 -1
- validmind/unit_metrics/regression/RootMeanSquaredError.py +2 -1
- validmind/vm_models/dataset/dataset.py +291 -38
- validmind/vm_models/result/result.py +26 -4
- {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/METADATA +2 -2
- {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/RECORD +233 -212
- {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/LICENSE +0 -0
- {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/WHEEL +0 -0
- {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,173 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from typing import Any, Dict, List, Optional
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
import pandas as pd
|
9
|
+
from scipy import stats
|
10
|
+
from sklearn.ensemble import IsolationForest
|
11
|
+
|
12
|
+
from validmind import tags, tasks
|
13
|
+
from validmind.errors import SkipTestError
|
14
|
+
from validmind.utils import format_records
|
15
|
+
from validmind.vm_models import VMDataset
|
16
|
+
|
17
|
+
|
18
|
+
def _validate_columns(dataset: VMDataset, columns: Optional[List[str]]):
|
19
|
+
"""Validate and return numerical columns."""
|
20
|
+
if columns is None:
|
21
|
+
columns = dataset.feature_columns_numeric
|
22
|
+
else:
|
23
|
+
available_columns = set(dataset.feature_columns_numeric)
|
24
|
+
columns = [col for col in columns if col in available_columns]
|
25
|
+
|
26
|
+
# Filter out boolean columns as they can't be used for outlier detection
|
27
|
+
numeric_columns = []
|
28
|
+
for col in columns:
|
29
|
+
if col in dataset.df.columns:
|
30
|
+
col_dtype = dataset.df[col].dtype
|
31
|
+
# Exclude boolean and object types, keep only true numeric types
|
32
|
+
if pd.api.types.is_numeric_dtype(col_dtype) and col_dtype != bool:
|
33
|
+
numeric_columns.append(col)
|
34
|
+
|
35
|
+
columns = numeric_columns
|
36
|
+
|
37
|
+
if not columns:
|
38
|
+
raise SkipTestError("No suitable numerical columns found for outlier detection")
|
39
|
+
|
40
|
+
return columns
|
41
|
+
|
42
|
+
|
43
|
+
def _detect_iqr_outliers(data, iqr_threshold: float):
|
44
|
+
"""Detect outliers using IQR method."""
|
45
|
+
q1, q3 = data.quantile(0.25), data.quantile(0.75)
|
46
|
+
iqr = q3 - q1
|
47
|
+
lower_bound = q1 - iqr_threshold * iqr
|
48
|
+
upper_bound = q3 + iqr_threshold * iqr
|
49
|
+
# Fix numpy boolean operation error by using pandas boolean indexing properly
|
50
|
+
outlier_mask = (data < lower_bound) | (data > upper_bound)
|
51
|
+
iqr_outliers = data[outlier_mask]
|
52
|
+
return len(iqr_outliers), (len(iqr_outliers) / len(data)) * 100
|
53
|
+
|
54
|
+
|
55
|
+
def _detect_zscore_outliers(data, zscore_threshold: float):
|
56
|
+
"""Detect outliers using Z-score method."""
|
57
|
+
z_scores = np.abs(stats.zscore(data))
|
58
|
+
# Fix potential numpy boolean operation error
|
59
|
+
outlier_mask = z_scores > zscore_threshold
|
60
|
+
zscore_outliers = data[outlier_mask]
|
61
|
+
return len(zscore_outliers), (len(zscore_outliers) / len(data)) * 100
|
62
|
+
|
63
|
+
|
64
|
+
def _detect_isolation_forest_outliers(data, contamination: float):
|
65
|
+
"""Detect outliers using Isolation Forest method."""
|
66
|
+
if len(data) <= 10:
|
67
|
+
return 0, 0
|
68
|
+
|
69
|
+
try:
|
70
|
+
iso_forest = IsolationForest(contamination=contamination, random_state=42)
|
71
|
+
outlier_pred = iso_forest.fit_predict(data.values.reshape(-1, 1))
|
72
|
+
iso_outliers = data[outlier_pred == -1]
|
73
|
+
return len(iso_outliers), (len(iso_outliers) / len(data)) * 100
|
74
|
+
except Exception:
|
75
|
+
return 0, 0
|
76
|
+
|
77
|
+
|
78
|
+
def _process_column_outliers(
|
79
|
+
column: str,
|
80
|
+
data,
|
81
|
+
methods: List[str],
|
82
|
+
iqr_threshold: float,
|
83
|
+
zscore_threshold: float,
|
84
|
+
contamination: float,
|
85
|
+
):
|
86
|
+
"""Process outlier detection for a single column."""
|
87
|
+
outliers_dict = {"Feature": column, "Total Count": len(data)}
|
88
|
+
|
89
|
+
# IQR method
|
90
|
+
if "iqr" in methods:
|
91
|
+
count, percentage = _detect_iqr_outliers(data, iqr_threshold)
|
92
|
+
outliers_dict["IQR Outliers"] = count
|
93
|
+
outliers_dict["IQR %"] = percentage
|
94
|
+
|
95
|
+
# Z-score method
|
96
|
+
if "zscore" in methods:
|
97
|
+
count, percentage = _detect_zscore_outliers(data, zscore_threshold)
|
98
|
+
outliers_dict["Z-Score Outliers"] = count
|
99
|
+
outliers_dict["Z-Score %"] = percentage
|
100
|
+
|
101
|
+
# Isolation Forest method
|
102
|
+
if "isolation_forest" in methods:
|
103
|
+
count, percentage = _detect_isolation_forest_outliers(data, contamination)
|
104
|
+
outliers_dict["Isolation Forest Outliers"] = count
|
105
|
+
outliers_dict["Isolation Forest %"] = percentage
|
106
|
+
|
107
|
+
return outliers_dict
|
108
|
+
|
109
|
+
|
110
|
+
@tags("tabular_data", "statistics", "outliers")
|
111
|
+
@tasks("classification", "regression", "clustering")
|
112
|
+
def OutlierDetection(
|
113
|
+
dataset: VMDataset,
|
114
|
+
columns: Optional[List[str]] = None,
|
115
|
+
methods: List[str] = ["iqr", "zscore", "isolation_forest"],
|
116
|
+
iqr_threshold: float = 1.5,
|
117
|
+
zscore_threshold: float = 3.0,
|
118
|
+
contamination: float = 0.1,
|
119
|
+
) -> Dict[str, Any]:
|
120
|
+
"""
|
121
|
+
Detects outliers in numerical features using multiple statistical methods.
|
122
|
+
|
123
|
+
### Purpose
|
124
|
+
|
125
|
+
This test identifies outliers in numerical features using various statistical
|
126
|
+
methods including IQR, Z-score, and Isolation Forest. It provides comprehensive
|
127
|
+
outlier detection to help identify data quality issues and potential anomalies.
|
128
|
+
|
129
|
+
### Test Mechanism
|
130
|
+
|
131
|
+
The test applies multiple outlier detection methods:
|
132
|
+
- IQR method: Values beyond Q1 - 1.5*IQR or Q3 + 1.5*IQR
|
133
|
+
- Z-score method: Values with |z-score| > threshold
|
134
|
+
- Isolation Forest: ML-based anomaly detection
|
135
|
+
|
136
|
+
### Signs of High Risk
|
137
|
+
|
138
|
+
- High percentage of outliers indicating data quality issues
|
139
|
+
- Inconsistent outlier detection across methods
|
140
|
+
- Extreme outliers that significantly deviate from normal patterns
|
141
|
+
|
142
|
+
### Strengths
|
143
|
+
|
144
|
+
- Multiple detection methods for robust outlier identification
|
145
|
+
- Customizable thresholds for different sensitivity levels
|
146
|
+
- Clear summary of outlier patterns across features
|
147
|
+
|
148
|
+
### Limitations
|
149
|
+
|
150
|
+
- Limited to numerical features only
|
151
|
+
- Some methods assume normal distributions
|
152
|
+
- Threshold selection can be subjective
|
153
|
+
"""
|
154
|
+
# Validate inputs
|
155
|
+
columns = _validate_columns(dataset, columns)
|
156
|
+
|
157
|
+
# Process each column
|
158
|
+
outlier_summary = []
|
159
|
+
for column in columns:
|
160
|
+
data = dataset._df[column].dropna()
|
161
|
+
|
162
|
+
if len(data) >= 3:
|
163
|
+
outliers_dict = _process_column_outliers(
|
164
|
+
column, data, methods, iqr_threshold, zscore_threshold, contamination
|
165
|
+
)
|
166
|
+
outlier_summary.append(outliers_dict)
|
167
|
+
|
168
|
+
# Format results
|
169
|
+
results = {}
|
170
|
+
if outlier_summary:
|
171
|
+
results["Outlier Summary"] = format_records(pd.DataFrame(outlier_summary))
|
172
|
+
|
173
|
+
return results
|
File without changes
|
@@ -5,10 +5,11 @@
|
|
5
5
|
from sklearn.metrics import accuracy_score
|
6
6
|
|
7
7
|
from validmind import tags, tasks
|
8
|
+
from validmind.vm_models import VMDataset, VMModel
|
8
9
|
|
9
10
|
|
10
11
|
@tasks("classification")
|
11
12
|
@tags("classification")
|
12
|
-
def Accuracy(dataset, model):
|
13
|
+
def Accuracy(dataset: VMDataset, model: VMModel) -> float:
|
13
14
|
"""Calculates the accuracy of a model"""
|
14
15
|
return accuracy_score(dataset.y, dataset.y_pred(model))
|
@@ -5,10 +5,11 @@
|
|
5
5
|
from sklearn.metrics import f1_score
|
6
6
|
|
7
7
|
from validmind import tags, tasks
|
8
|
+
from validmind.vm_models import VMDataset, VMModel
|
8
9
|
|
9
10
|
|
10
11
|
@tasks("classification")
|
11
12
|
@tags("classification")
|
12
|
-
def F1(model, dataset, **kwargs):
|
13
|
+
def F1(model: VMModel, dataset: VMDataset, **kwargs) -> float:
|
13
14
|
"""Calculates the F1 score for a classification model."""
|
14
15
|
return f1_score(dataset.y, dataset.y_pred(model), **kwargs)
|
@@ -5,10 +5,11 @@
|
|
5
5
|
from sklearn.metrics import precision_score
|
6
6
|
|
7
7
|
from validmind import tags, tasks
|
8
|
+
from validmind.vm_models import VMDataset, VMModel
|
8
9
|
|
9
10
|
|
10
11
|
@tasks("classification")
|
11
12
|
@tags("classification")
|
12
|
-
def Precision(model, dataset, **kwargs):
|
13
|
+
def Precision(model: VMModel, dataset: VMDataset, **kwargs) -> float:
|
13
14
|
"""Calculates the precision for a classification model."""
|
14
15
|
return precision_score(dataset.y, dataset.y_pred(model), **kwargs)
|
@@ -7,11 +7,12 @@ from sklearn.metrics import roc_auc_score
|
|
7
7
|
from sklearn.preprocessing import LabelBinarizer
|
8
8
|
|
9
9
|
from validmind import tags, tasks
|
10
|
+
from validmind.vm_models import VMDataset, VMModel
|
10
11
|
|
11
12
|
|
12
13
|
@tasks("classification")
|
13
14
|
@tags("classification")
|
14
|
-
def ROC_AUC(model, dataset, **kwargs):
|
15
|
+
def ROC_AUC(model: VMModel, dataset: VMDataset, **kwargs) -> float:
|
15
16
|
"""Calculates the ROC AUC for a classification model."""
|
16
17
|
y_true = dataset.y
|
17
18
|
|
@@ -5,10 +5,11 @@
|
|
5
5
|
from sklearn.metrics import recall_score
|
6
6
|
|
7
7
|
from validmind import tags, tasks
|
8
|
+
from validmind.vm_models import VMDataset, VMModel
|
8
9
|
|
9
10
|
|
10
11
|
@tasks("classification")
|
11
12
|
@tags("classification")
|
12
|
-
def Recall(model, dataset, **kwargs):
|
13
|
+
def Recall(model: VMModel, dataset: VMDataset, **kwargs) -> float:
|
13
14
|
"""Calculates the recall for a classification model."""
|
14
15
|
return recall_score(dataset.y, dataset.y_pred(model), **kwargs)
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from typing import List
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
|
9
|
+
from validmind import tags, tasks
|
10
|
+
from validmind.vm_models import VMDataset, VMModel
|
11
|
+
|
12
|
+
|
13
|
+
@tasks("classification")
|
14
|
+
@tags("classification")
|
15
|
+
def AbsoluteError(model: VMModel, dataset: VMDataset, **kwargs) -> List[float]:
|
16
|
+
"""Calculates the absolute error per row for a classification model.
|
17
|
+
|
18
|
+
For classification tasks, this computes the absolute difference between
|
19
|
+
the true class labels and predicted class labels for each individual row.
|
20
|
+
For binary classification with probabilities, it can also compute the
|
21
|
+
absolute difference between true labels and predicted probabilities.
|
22
|
+
|
23
|
+
Args:
|
24
|
+
model: The classification model to evaluate
|
25
|
+
dataset: The dataset containing true labels and predictions
|
26
|
+
**kwargs: Additional parameters (unused for compatibility)
|
27
|
+
|
28
|
+
Returns:
|
29
|
+
List[float]: Per-row absolute errors as a list of float values
|
30
|
+
"""
|
31
|
+
y_true = dataset.y
|
32
|
+
y_pred = dataset.y_pred(model)
|
33
|
+
|
34
|
+
# Convert to numpy arrays and ensure same data type
|
35
|
+
y_true = np.asarray(y_true)
|
36
|
+
y_pred = np.asarray(y_pred)
|
37
|
+
|
38
|
+
# For classification, compute absolute difference between true and predicted labels
|
39
|
+
absolute_errors = np.abs(y_true - y_pred)
|
40
|
+
|
41
|
+
# Return as a list of floats
|
42
|
+
return absolute_errors.astype(float).tolist()
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from typing import List
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
|
9
|
+
from validmind import tags, tasks
|
10
|
+
from validmind.vm_models import VMDataset, VMModel
|
11
|
+
|
12
|
+
|
13
|
+
@tasks("classification")
|
14
|
+
@tags("classification")
|
15
|
+
def BrierScore(model: VMModel, dataset: VMDataset, **kwargs) -> List[float]:
|
16
|
+
"""Calculates the Brier score per row for a classification model.
|
17
|
+
|
18
|
+
The Brier score is a proper score function that measures the accuracy of
|
19
|
+
probabilistic predictions. It is calculated as the mean squared difference
|
20
|
+
between predicted probabilities and the actual binary outcomes.
|
21
|
+
Lower scores indicate better calibration.
|
22
|
+
|
23
|
+
Args:
|
24
|
+
model: The classification model to evaluate
|
25
|
+
dataset: The dataset containing true labels and predicted probabilities
|
26
|
+
**kwargs: Additional parameters (unused for compatibility)
|
27
|
+
|
28
|
+
Returns:
|
29
|
+
List[float]: Per-row Brier scores as a list of float values
|
30
|
+
|
31
|
+
Raises:
|
32
|
+
ValueError: If probability column is not found for the model
|
33
|
+
"""
|
34
|
+
y_true = dataset.y
|
35
|
+
|
36
|
+
# Try to get probabilities
|
37
|
+
try:
|
38
|
+
y_prob = dataset.y_prob(model)
|
39
|
+
# For binary classification, use the positive class probability
|
40
|
+
if y_prob.ndim > 1 and y_prob.shape[1] > 1:
|
41
|
+
y_prob = y_prob[:, 1] # Use probability of positive class
|
42
|
+
except ValueError:
|
43
|
+
# Fall back to predictions if probabilities not available
|
44
|
+
# Convert predictions to "probabilities" (1.0 for predicted class, 0.0 for other)
|
45
|
+
y_pred = dataset.y_pred(model)
|
46
|
+
y_prob = y_pred.astype(float)
|
47
|
+
|
48
|
+
# Convert to numpy arrays and ensure same data type
|
49
|
+
y_true = np.asarray(y_true, dtype=float)
|
50
|
+
y_prob = np.asarray(y_prob, dtype=float)
|
51
|
+
|
52
|
+
# Calculate Brier score per row: (predicted_probability - actual_outcome)²
|
53
|
+
brier_scores = (y_prob - y_true) ** 2
|
54
|
+
|
55
|
+
# Return as a list of floats
|
56
|
+
return brier_scores.tolist()
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from typing import List
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
|
9
|
+
from validmind import tags, tasks
|
10
|
+
from validmind.vm_models import VMDataset, VMModel
|
11
|
+
|
12
|
+
|
13
|
+
@tasks("classification")
|
14
|
+
@tags("classification")
|
15
|
+
def CalibrationError(
|
16
|
+
model: VMModel, dataset: VMDataset, n_bins: int = 10, **kwargs
|
17
|
+
) -> List[float]:
|
18
|
+
"""Calculates the calibration error per row for a classification model.
|
19
|
+
|
20
|
+
Calibration error measures how well the predicted probabilities reflect the
|
21
|
+
actual likelihood of the positive class. For each prediction, this computes
|
22
|
+
the absolute difference between the predicted probability and the empirical
|
23
|
+
frequency of the positive class in the corresponding probability bin.
|
24
|
+
|
25
|
+
Args:
|
26
|
+
model: The classification model to evaluate
|
27
|
+
dataset: The dataset containing true labels and predicted probabilities
|
28
|
+
n_bins: Number of bins for probability calibration, defaults to 10
|
29
|
+
**kwargs: Additional parameters (unused for compatibility)
|
30
|
+
|
31
|
+
Returns:
|
32
|
+
List[float]: Per-row calibration errors as a list of float values
|
33
|
+
|
34
|
+
Raises:
|
35
|
+
ValueError: If probability column is not found for the model
|
36
|
+
"""
|
37
|
+
y_true = dataset.y
|
38
|
+
|
39
|
+
# Try to get probabilities
|
40
|
+
try:
|
41
|
+
y_prob = dataset.y_prob(model)
|
42
|
+
# For binary classification, use the positive class probability
|
43
|
+
if y_prob.ndim > 1 and y_prob.shape[1] > 1:
|
44
|
+
y_prob = y_prob[:, 1] # Use probability of positive class
|
45
|
+
except ValueError:
|
46
|
+
# If no probabilities available, return zeros (perfect calibration for hard predictions)
|
47
|
+
return [0.0] * len(y_true)
|
48
|
+
|
49
|
+
# Convert to numpy arrays
|
50
|
+
y_true = np.asarray(y_true, dtype=float)
|
51
|
+
y_prob = np.asarray(y_prob, dtype=float)
|
52
|
+
|
53
|
+
# Create probability bins
|
54
|
+
bin_boundaries = np.linspace(0, 1, n_bins + 1)
|
55
|
+
bin_lowers = bin_boundaries[:-1]
|
56
|
+
bin_uppers = bin_boundaries[1:]
|
57
|
+
|
58
|
+
# Calculate calibration error for each sample
|
59
|
+
calibration_errors = np.zeros_like(y_prob)
|
60
|
+
|
61
|
+
for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
|
62
|
+
# Find samples in this bin
|
63
|
+
in_bin = (y_prob > bin_lower) & (y_prob <= bin_upper)
|
64
|
+
if not np.any(in_bin):
|
65
|
+
continue
|
66
|
+
|
67
|
+
# Calculate empirical frequency for this bin
|
68
|
+
empirical_freq = np.mean(y_true[in_bin])
|
69
|
+
|
70
|
+
# Calculate average predicted probability for this bin
|
71
|
+
avg_predicted_prob = np.mean(y_prob[in_bin])
|
72
|
+
|
73
|
+
# Assign calibration error to all samples in this bin
|
74
|
+
calibration_errors[in_bin] = abs(avg_predicted_prob - empirical_freq)
|
75
|
+
|
76
|
+
# Return as a list of floats
|
77
|
+
return calibration_errors.tolist()
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from typing import List
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
|
9
|
+
from validmind import tags, tasks
|
10
|
+
from validmind.vm_models import VMDataset, VMModel
|
11
|
+
|
12
|
+
|
13
|
+
@tasks("classification")
|
14
|
+
@tags("classification")
|
15
|
+
def ClassBalance(model: VMModel, dataset: VMDataset, **kwargs) -> List[float]:
|
16
|
+
"""Calculates the class balance score per row for a classification model.
|
17
|
+
|
18
|
+
For each prediction, this returns how balanced the predicted class is in the
|
19
|
+
training distribution. Lower scores indicate predictions on rare classes,
|
20
|
+
higher scores indicate predictions on common classes. This helps understand
|
21
|
+
if model errors are more likely on imbalanced classes.
|
22
|
+
|
23
|
+
Args:
|
24
|
+
model: The classification model to evaluate
|
25
|
+
dataset: The dataset containing true labels and predictions
|
26
|
+
**kwargs: Additional parameters (unused for compatibility)
|
27
|
+
|
28
|
+
Returns:
|
29
|
+
List[float]: Per-row class balance scores as a list of float values
|
30
|
+
|
31
|
+
Note:
|
32
|
+
Scores range from 0 to 0.5, where 0.5 indicates perfectly balanced classes
|
33
|
+
and lower values indicate more imbalanced classes.
|
34
|
+
"""
|
35
|
+
y_true = dataset.y
|
36
|
+
y_pred = dataset.y_pred(model)
|
37
|
+
|
38
|
+
# Convert to numpy arrays
|
39
|
+
y_true = np.asarray(y_true)
|
40
|
+
y_pred = np.asarray(y_pred)
|
41
|
+
|
42
|
+
# Calculate class frequencies in the true labels (proxy for training distribution)
|
43
|
+
unique_classes, class_counts = np.unique(y_true, return_counts=True)
|
44
|
+
class_frequencies = class_counts / len(y_true)
|
45
|
+
|
46
|
+
# Create a mapping from class to frequency
|
47
|
+
class_to_freq = dict(zip(unique_classes, class_frequencies))
|
48
|
+
|
49
|
+
# Calculate balance score for each prediction
|
50
|
+
balance_scores = []
|
51
|
+
|
52
|
+
for pred in y_pred:
|
53
|
+
if pred in class_to_freq:
|
54
|
+
freq = class_to_freq[pred]
|
55
|
+
# Balance score: how close to 0.5 (perfectly balanced) the frequency is
|
56
|
+
# Score = 0.5 - |freq - 0.5| = min(freq, 1-freq)
|
57
|
+
balance_score = min(freq, 1 - freq)
|
58
|
+
else:
|
59
|
+
# Predicted class not seen in true labels (very rare)
|
60
|
+
balance_score = 0.0
|
61
|
+
|
62
|
+
balance_scores.append(balance_score)
|
63
|
+
|
64
|
+
# Return as a list of floats
|
65
|
+
return balance_scores
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from typing import List
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
|
9
|
+
from validmind import tags, tasks
|
10
|
+
from validmind.vm_models import VMDataset, VMModel
|
11
|
+
|
12
|
+
|
13
|
+
@tasks("classification")
|
14
|
+
@tags("classification")
|
15
|
+
def Confidence(model: VMModel, dataset: VMDataset, **kwargs) -> List[float]:
|
16
|
+
"""Calculates the prediction confidence per row for a classification model.
|
17
|
+
|
18
|
+
For binary classification, confidence is calculated as the maximum probability
|
19
|
+
across classes, or alternatively as the distance from the decision boundary (0.5).
|
20
|
+
Higher values indicate more confident predictions.
|
21
|
+
|
22
|
+
Args:
|
23
|
+
model: The classification model to evaluate
|
24
|
+
dataset: The dataset containing true labels and predicted probabilities
|
25
|
+
**kwargs: Additional parameters (unused for compatibility)
|
26
|
+
|
27
|
+
Returns:
|
28
|
+
List[float]: Per-row confidence scores as a list of float values
|
29
|
+
|
30
|
+
Raises:
|
31
|
+
ValueError: If probability column is not found for the model
|
32
|
+
"""
|
33
|
+
# Try to get probabilities, fall back to predictions if not available
|
34
|
+
try:
|
35
|
+
y_prob = dataset.y_prob(model)
|
36
|
+
# For binary classification, use max probability approach
|
37
|
+
if y_prob.ndim > 1 and y_prob.shape[1] > 1:
|
38
|
+
# Multi-class: confidence is the maximum probability
|
39
|
+
confidence = np.max(y_prob, axis=1)
|
40
|
+
else:
|
41
|
+
# Binary classification: confidence based on distance from 0.5
|
42
|
+
y_prob = np.asarray(y_prob, dtype=float)
|
43
|
+
confidence = np.abs(y_prob - 0.5) + 0.5
|
44
|
+
except ValueError:
|
45
|
+
# Fall back to binary correctness if probabilities not available
|
46
|
+
y_true = dataset.y
|
47
|
+
y_pred = dataset.y_pred(model)
|
48
|
+
# If no probabilities, confidence is 1.0 for correct, 0.0 for incorrect
|
49
|
+
confidence = (y_true == y_pred).astype(float)
|
50
|
+
|
51
|
+
# Return as a list of floats
|
52
|
+
return confidence.tolist()
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from typing import List
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
|
9
|
+
from validmind import tags, tasks
|
10
|
+
from validmind.vm_models import VMDataset, VMModel
|
11
|
+
|
12
|
+
|
13
|
+
@tasks("classification")
|
14
|
+
@tags("classification")
|
15
|
+
def Correctness(model: VMModel, dataset: VMDataset, **kwargs) -> List[int]:
|
16
|
+
"""Calculates the correctness per row for a classification model.
|
17
|
+
|
18
|
+
For classification tasks, this returns 1 for correctly classified rows
|
19
|
+
and 0 for incorrectly classified rows. This provides a binary indicator
|
20
|
+
of model performance for each individual prediction.
|
21
|
+
|
22
|
+
Args:
|
23
|
+
model: The classification model to evaluate
|
24
|
+
dataset: The dataset containing true labels and predictions
|
25
|
+
**kwargs: Additional parameters (unused for compatibility)
|
26
|
+
|
27
|
+
Returns:
|
28
|
+
List[int]: Per-row correctness as a list of 1s and 0s
|
29
|
+
"""
|
30
|
+
y_true = dataset.y
|
31
|
+
y_pred = dataset.y_pred(model)
|
32
|
+
|
33
|
+
# Convert to numpy arrays
|
34
|
+
y_true = np.asarray(y_true)
|
35
|
+
y_pred = np.asarray(y_pred)
|
36
|
+
|
37
|
+
# For classification, check if predictions match true labels
|
38
|
+
correctness = (y_true == y_pred).astype(int)
|
39
|
+
|
40
|
+
# Return as a list of integers
|
41
|
+
return correctness.tolist()
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from typing import List
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
|
9
|
+
from validmind import tags, tasks
|
10
|
+
from validmind.vm_models import VMDataset, VMModel
|
11
|
+
|
12
|
+
|
13
|
+
@tasks("classification")
|
14
|
+
@tags("classification")
|
15
|
+
def LogLoss(
|
16
|
+
model: VMModel, dataset: VMDataset, eps: float = 1e-15, **kwargs
|
17
|
+
) -> List[float]:
|
18
|
+
"""Calculates the logarithmic loss per row for a classification model.
|
19
|
+
|
20
|
+
Log loss measures the performance of a classification model where the prediction
|
21
|
+
is a probability value between 0 and 1. The log loss increases as the predicted
|
22
|
+
probability diverges from the actual label.
|
23
|
+
|
24
|
+
Args:
|
25
|
+
model: The classification model to evaluate
|
26
|
+
dataset: The dataset containing true labels and predicted probabilities
|
27
|
+
eps: Small value to avoid log(0), defaults to 1e-15
|
28
|
+
**kwargs: Additional parameters (unused for compatibility)
|
29
|
+
|
30
|
+
Returns:
|
31
|
+
List[float]: Per-row log loss values as a list of float values
|
32
|
+
|
33
|
+
Raises:
|
34
|
+
ValueError: If probability column is not found for the model
|
35
|
+
"""
|
36
|
+
y_true = dataset.y
|
37
|
+
|
38
|
+
# Try to get probabilities
|
39
|
+
try:
|
40
|
+
y_prob = dataset.y_prob(model)
|
41
|
+
# For binary classification, use the positive class probability
|
42
|
+
if y_prob.ndim > 1 and y_prob.shape[1] > 1:
|
43
|
+
y_prob = y_prob[:, 1] # Use probability of positive class
|
44
|
+
except ValueError:
|
45
|
+
# Fall back to predictions if probabilities not available
|
46
|
+
# Convert predictions to "probabilities" (0.99 for correct class, 0.01 for wrong)
|
47
|
+
y_pred = dataset.y_pred(model)
|
48
|
+
y_prob = np.where(y_true == y_pred, 0.99, 0.01)
|
49
|
+
|
50
|
+
# Convert to numpy arrays and ensure same data type
|
51
|
+
y_true = np.asarray(y_true, dtype=float)
|
52
|
+
y_prob = np.asarray(y_prob, dtype=float)
|
53
|
+
|
54
|
+
# Clip probabilities to avoid log(0) and log(1)
|
55
|
+
y_prob = np.clip(y_prob, eps, 1 - eps)
|
56
|
+
|
57
|
+
# Calculate log loss per row: -[y*log(p) + (1-y)*log(1-p)]
|
58
|
+
log_loss_per_row = -(y_true * np.log(y_prob) + (1 - y_true) * np.log(1 - y_prob))
|
59
|
+
|
60
|
+
# Return as a list of floats
|
61
|
+
return log_loss_per_row.tolist()
|