validmind 2.8.27__py3-none-any.whl → 2.8.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai/utils.py +1 -1
- validmind/models/function.py +11 -3
- validmind/tests/data_validation/ACFandPACFPlot.py +3 -1
- validmind/tests/data_validation/ADF.py +3 -1
- validmind/tests/data_validation/AutoAR.py +3 -1
- validmind/tests/data_validation/AutoMA.py +5 -1
- validmind/tests/data_validation/AutoStationarity.py +5 -1
- validmind/tests/data_validation/BivariateScatterPlots.py +3 -1
- validmind/tests/data_validation/BoxPierce.py +4 -1
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +1 -1
- validmind/tests/data_validation/ClassImbalance.py +1 -1
- validmind/tests/data_validation/DatasetDescription.py +4 -1
- validmind/tests/data_validation/DatasetSplit.py +3 -2
- validmind/tests/data_validation/DescriptiveStatistics.py +3 -1
- validmind/tests/data_validation/DickeyFullerGLS.py +3 -1
- validmind/tests/data_validation/Duplicates.py +3 -1
- validmind/tests/data_validation/EngleGrangerCoint.py +6 -1
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
- validmind/tests/data_validation/HighCardinality.py +3 -1
- validmind/tests/data_validation/HighPearsonCorrelation.py +4 -1
- validmind/tests/data_validation/IQROutliersBarPlot.py +4 -1
- validmind/tests/data_validation/IQROutliersTable.py +6 -1
- validmind/tests/data_validation/IsolationForestOutliers.py +3 -1
- validmind/tests/data_validation/JarqueBera.py +3 -1
- validmind/tests/data_validation/KPSS.py +3 -1
- validmind/tests/data_validation/LJungBox.py +3 -1
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +6 -1
- validmind/tests/data_validation/MissingValues.py +5 -1
- validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
- validmind/tests/data_validation/MutualInformation.py +4 -1
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +3 -1
- validmind/tests/data_validation/PhillipsPerronArch.py +3 -1
- validmind/tests/data_validation/ProtectedClassesCombination.py +5 -1
- validmind/tests/data_validation/ProtectedClassesDescription.py +5 -1
- validmind/tests/data_validation/ProtectedClassesDisparity.py +5 -3
- validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +9 -2
- validmind/tests/data_validation/RollingStatsPlot.py +5 -1
- validmind/tests/data_validation/RunsTest.py +1 -1
- validmind/tests/data_validation/ScatterPlot.py +2 -1
- validmind/tests/data_validation/ScoreBandDefaultRates.py +3 -1
- validmind/tests/data_validation/SeasonalDecompose.py +6 -1
- validmind/tests/data_validation/ShapiroWilk.py +4 -1
- validmind/tests/data_validation/Skewness.py +3 -1
- validmind/tests/data_validation/SpreadPlot.py +3 -1
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +4 -1
- validmind/tests/data_validation/TabularDateTimeHistograms.py +3 -1
- validmind/tests/data_validation/TabularDescriptionTables.py +4 -1
- validmind/tests/data_validation/TabularNumericalHistograms.py +3 -1
- validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
- validmind/tests/data_validation/TimeSeriesDescription.py +1 -1
- validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +1 -1
- validmind/tests/data_validation/TimeSeriesFrequency.py +5 -1
- validmind/tests/data_validation/TimeSeriesHistogram.py +4 -1
- validmind/tests/data_validation/TimeSeriesLinePlot.py +3 -1
- validmind/tests/data_validation/TimeSeriesMissingValues.py +6 -1
- validmind/tests/data_validation/TimeSeriesOutliers.py +5 -1
- validmind/tests/data_validation/TooManyZeroValues.py +6 -1
- validmind/tests/data_validation/UniqueRows.py +5 -1
- validmind/tests/data_validation/WOEBinPlots.py +4 -1
- validmind/tests/data_validation/WOEBinTable.py +5 -1
- validmind/tests/data_validation/ZivotAndrewsArch.py +3 -1
- validmind/tests/data_validation/nlp/CommonWords.py +2 -1
- validmind/tests/data_validation/nlp/Hashtags.py +2 -1
- validmind/tests/data_validation/nlp/LanguageDetection.py +4 -1
- validmind/tests/data_validation/nlp/Mentions.py +3 -1
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +6 -1
- validmind/tests/data_validation/nlp/Punctuations.py +2 -1
- validmind/tests/data_validation/nlp/Sentiment.py +3 -1
- validmind/tests/data_validation/nlp/StopWords.py +2 -1
- validmind/tests/data_validation/nlp/TextDescription.py +3 -1
- validmind/tests/data_validation/nlp/Toxicity.py +3 -1
- validmind/tests/load.py +91 -17
- validmind/tests/model_validation/BertScore.py +6 -3
- validmind/tests/model_validation/BleuScore.py +6 -1
- validmind/tests/model_validation/ClusterSizeDistribution.py +5 -1
- validmind/tests/model_validation/ContextualRecall.py +6 -1
- validmind/tests/model_validation/FeaturesAUC.py +5 -1
- validmind/tests/model_validation/MeteorScore.py +6 -1
- validmind/tests/model_validation/ModelMetadata.py +2 -1
- validmind/tests/model_validation/ModelPredictionResiduals.py +10 -2
- validmind/tests/model_validation/RegardScore.py +7 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +5 -1
- validmind/tests/model_validation/RougeScore.py +8 -1
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +8 -1
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +7 -1
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -1
- validmind/tests/model_validation/TokenDisparity.py +6 -1
- validmind/tests/model_validation/ToxicityScore.py +6 -1
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +7 -3
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +6 -1
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +4 -3
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -1
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +7 -3
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +6 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -2
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +4 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +9 -6
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +8 -5
- validmind/tests/model_validation/ragas/AspectCritic.py +11 -8
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +5 -2
- validmind/tests/model_validation/ragas/ContextPrecision.py +5 -2
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +5 -2
- validmind/tests/model_validation/ragas/ContextRecall.py +6 -2
- validmind/tests/model_validation/ragas/Faithfulness.py +9 -5
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +10 -7
- validmind/tests/model_validation/ragas/ResponseRelevancy.py +9 -6
- validmind/tests/model_validation/ragas/SemanticSimilarity.py +7 -4
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -1
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -1
- validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -1
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +5 -1
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +5 -1
- validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -1
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +4 -1
- validmind/tests/model_validation/sklearn/FeatureImportance.py +5 -1
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -1
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -1
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -4
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +3 -3
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +5 -1
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +5 -1
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +5 -1
- validmind/tests/model_validation/sklearn/ModelParameters.py +6 -1
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -1
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -2
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +4 -4
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +2 -2
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +5 -1
- validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
- validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +5 -1
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +6 -1
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +2 -2
- validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +3 -1
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +6 -1
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +2 -2
- validmind/tests/model_validation/sklearn/VMeasure.py +5 -1
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +6 -5
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +3 -1
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +6 -1
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
- validmind/tests/model_validation/statsmodels/GINITable.py +4 -1
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +5 -1
- validmind/tests/model_validation/statsmodels/Lilliefors.py +3 -1
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +4 -1
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -2
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +5 -4
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +4 -1
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +3 -2
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +5 -1
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +6 -1
- validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +2 -2
- validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +2 -2
- validmind/tests/ongoing_monitoring/FeatureDrift.py +5 -2
- validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +6 -1
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +8 -1
- validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +2 -2
- validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +6 -1
- validmind/tests/ongoing_monitoring/ROCCurveDrift.py +4 -2
- validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +2 -2
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +8 -1
- validmind/tests/prompt_validation/Bias.py +5 -1
- validmind/tests/prompt_validation/Clarity.py +5 -1
- validmind/tests/prompt_validation/Conciseness.py +5 -1
- validmind/tests/prompt_validation/Delimitation.py +5 -1
- validmind/tests/prompt_validation/NegativeInstruction.py +5 -1
- validmind/tests/prompt_validation/Robustness.py +5 -1
- validmind/tests/prompt_validation/Specificity.py +5 -1
- validmind/unit_metrics/classification/Accuracy.py +2 -1
- validmind/unit_metrics/classification/F1.py +2 -1
- validmind/unit_metrics/classification/Precision.py +2 -1
- validmind/unit_metrics/classification/ROC_AUC.py +2 -1
- validmind/unit_metrics/classification/Recall.py +2 -1
- validmind/unit_metrics/regression/AdjustedRSquaredScore.py +2 -1
- validmind/unit_metrics/regression/GiniCoefficient.py +2 -1
- validmind/unit_metrics/regression/HuberLoss.py +2 -1
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +2 -1
- validmind/unit_metrics/regression/MeanAbsoluteError.py +2 -1
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +2 -1
- validmind/unit_metrics/regression/MeanBiasDeviation.py +2 -1
- validmind/unit_metrics/regression/MeanSquaredError.py +2 -1
- validmind/unit_metrics/regression/QuantileLoss.py +1 -1
- validmind/unit_metrics/regression/RSquaredScore.py +2 -1
- validmind/unit_metrics/regression/RootMeanSquaredError.py +2 -1
- validmind/vm_models/dataset/dataset.py +145 -38
- validmind/vm_models/result/result.py +14 -12
- {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/METADATA +1 -1
- {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/RECORD +207 -207
- {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/LICENSE +0 -0
- {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/WHEEL +0 -0
- {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/entry_points.txt +0 -0
@@ -3,6 +3,8 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
|
6
|
+
from typing import Tuple
|
7
|
+
|
6
8
|
import matplotlib.pyplot as plt
|
7
9
|
import nltk
|
8
10
|
import seaborn as sns
|
@@ -13,7 +15,7 @@ from validmind import RawData, tags, tasks
|
|
13
15
|
|
14
16
|
@tags("nlp", "text_data", "data_validation")
|
15
17
|
@tasks("nlp")
|
16
|
-
def Sentiment(dataset):
|
18
|
+
def Sentiment(dataset) -> Tuple[plt.Figure, RawData]:
|
17
19
|
"""
|
18
20
|
Analyzes the sentiment of text data within a dataset using the VADER sentiment analysis tool.
|
19
21
|
|
@@ -7,6 +7,7 @@ Threshold based tests
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
from collections import defaultdict
|
10
|
+
from typing import Dict, Tuple
|
10
11
|
|
11
12
|
import nltk
|
12
13
|
import pandas as pd
|
@@ -21,7 +22,7 @@ from validmind.vm_models import VMDataset
|
|
21
22
|
@tasks("text_classification", "text_summarization")
|
22
23
|
def StopWords(
|
23
24
|
dataset: VMDataset, min_percent_threshold: float = 0.5, num_words: int = 25
|
24
|
-
):
|
25
|
+
) -> Tuple[Dict[str, pd.DataFrame], go.Figure, bool, RawData]:
|
25
26
|
"""
|
26
27
|
Evaluates and visualizes the frequency of English stop words in a text dataset against a defined threshold.
|
27
28
|
|
@@ -3,10 +3,12 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import string
|
6
|
+
from typing import Tuple
|
6
7
|
|
7
8
|
import nltk
|
8
9
|
import pandas as pd
|
9
10
|
import plotly.express as px
|
11
|
+
import plotly.graph_objects as go
|
10
12
|
from nltk.corpus import stopwords
|
11
13
|
|
12
14
|
from validmind import RawData, tags, tasks
|
@@ -94,7 +96,7 @@ def TextDescription(
|
|
94
96
|
"``",
|
95
97
|
},
|
96
98
|
lang: str = "english",
|
97
|
-
):
|
99
|
+
) -> Tuple[go.Figure, RawData]:
|
98
100
|
"""
|
99
101
|
Conducts comprehensive textual analysis on a dataset using NLTK to evaluate various parameters and generate
|
100
102
|
visualizations.
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import evaluate
|
6
8
|
import matplotlib.pyplot as plt
|
7
9
|
import seaborn as sns
|
@@ -11,7 +13,7 @@ from validmind import RawData, tags, tasks
|
|
11
13
|
|
12
14
|
@tags("nlp", "text_data", "data_validation")
|
13
15
|
@tasks("nlp")
|
14
|
-
def Toxicity(dataset):
|
16
|
+
def Toxicity(dataset) -> Tuple[plt.Figure, RawData]:
|
15
17
|
"""
|
16
18
|
Assesses the toxicity of text data within a dataset to visualize the distribution of toxicity scores.
|
17
19
|
|
validmind/tests/load.py
CHANGED
@@ -7,7 +7,17 @@
|
|
7
7
|
import inspect
|
8
8
|
import json
|
9
9
|
from pprint import pformat
|
10
|
-
from typing import
|
10
|
+
from typing import (
|
11
|
+
Any,
|
12
|
+
Callable,
|
13
|
+
Dict,
|
14
|
+
List,
|
15
|
+
Optional,
|
16
|
+
Tuple,
|
17
|
+
Union,
|
18
|
+
get_args,
|
19
|
+
get_origin,
|
20
|
+
)
|
11
21
|
from uuid import uuid4
|
12
22
|
|
13
23
|
import pandas as pd
|
@@ -18,12 +28,31 @@ from ..html_templates.content_blocks import test_content_block_html
|
|
18
28
|
from ..logging import get_logger
|
19
29
|
from ..utils import display, format_dataframe, fuzzy_match, md_to_html, test_id_to_name
|
20
30
|
from ..vm_models import VMDataset, VMModel
|
31
|
+
from ..vm_models.figure import Figure
|
32
|
+
from ..vm_models.result import ResultTable
|
21
33
|
from .__types__ import TestID
|
22
34
|
from ._store import test_provider_store, test_store
|
23
35
|
|
24
36
|
logger = get_logger(__name__)
|
25
37
|
|
26
38
|
|
39
|
+
try:
|
40
|
+
from matplotlib.figure import Figure as MatplotlibFigure
|
41
|
+
except ImportError:
|
42
|
+
MatplotlibFigure = None
|
43
|
+
|
44
|
+
try:
|
45
|
+
from plotly.graph_objects import Figure as PlotlyFigure
|
46
|
+
except ImportError:
|
47
|
+
PlotlyFigure = None
|
48
|
+
|
49
|
+
FIGURE_TYPES = tuple(
|
50
|
+
item for item in (Figure, MatplotlibFigure, PlotlyFigure) if inspect.isclass(item)
|
51
|
+
)
|
52
|
+
TABLE_TYPES = (pd.DataFrame, ResultTable)
|
53
|
+
GENERIC_TABLE_TYPES = (list, dict)
|
54
|
+
|
55
|
+
|
27
56
|
INPUT_TYPE_MAP = {
|
28
57
|
"dataset": VMDataset,
|
29
58
|
"datasets": List[VMDataset],
|
@@ -32,6 +61,45 @@ INPUT_TYPE_MAP = {
|
|
32
61
|
}
|
33
62
|
|
34
63
|
|
64
|
+
def _inspect_return_type(annotation: Any) -> Tuple[bool, bool]:
|
65
|
+
"""
|
66
|
+
Inspects a return type annotation to determine if it contains a Figure or Table.
|
67
|
+
|
68
|
+
Returns a tuple (has_figure, has_table).
|
69
|
+
"""
|
70
|
+
has_figure = False
|
71
|
+
has_table = False
|
72
|
+
|
73
|
+
origin = get_origin(annotation)
|
74
|
+
args = get_args(annotation)
|
75
|
+
|
76
|
+
# A Union means the return type could be one of several types.
|
77
|
+
# A tuple in a type hint means multiple return values.
|
78
|
+
# We recursively inspect the arguments of Union and tuple.
|
79
|
+
if origin is Union or origin is tuple:
|
80
|
+
for arg in args:
|
81
|
+
fig, table = _inspect_return_type(arg)
|
82
|
+
has_figure |= fig
|
83
|
+
has_table |= table
|
84
|
+
return has_figure, has_table
|
85
|
+
|
86
|
+
check_type = origin if origin is not None else annotation
|
87
|
+
|
88
|
+
if not inspect.isclass(check_type):
|
89
|
+
return has_figure, has_table # Can't do issubclass on non-class like Any
|
90
|
+
|
91
|
+
if FIGURE_TYPES and issubclass(check_type, FIGURE_TYPES):
|
92
|
+
has_figure = True
|
93
|
+
|
94
|
+
if TABLE_TYPES and issubclass(check_type, TABLE_TYPES):
|
95
|
+
has_table = True
|
96
|
+
|
97
|
+
if check_type in GENERIC_TABLE_TYPES:
|
98
|
+
has_table = True
|
99
|
+
|
100
|
+
return has_figure, has_table
|
101
|
+
|
102
|
+
|
35
103
|
def _inspect_signature(
|
36
104
|
test_func: Callable[..., Any],
|
37
105
|
) -> Tuple[Dict[str, Dict[str, Any]], Dict[str, Dict[str, Any]]]:
|
@@ -173,23 +241,29 @@ def _pretty_list_tests(
|
|
173
241
|
tests: Dict[str, Callable[..., Any]], truncate: bool = True
|
174
242
|
) -> None:
|
175
243
|
"""Pretty print a list of tests"""
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
244
|
+
rows = []
|
245
|
+
for test_id, test in tests.items():
|
246
|
+
has_figure, has_table = _inspect_return_type(
|
247
|
+
inspect.signature(test).return_annotation
|
248
|
+
)
|
249
|
+
rows.append(
|
250
|
+
{
|
251
|
+
"ID": test_id,
|
252
|
+
"Name": test_id_to_name(test_id),
|
253
|
+
"Description": _test_description(
|
254
|
+
inspect.getdoc(test),
|
255
|
+
num_lines=(5 if truncate else 999999),
|
256
|
+
),
|
257
|
+
"Has Figure": has_figure,
|
258
|
+
"Has Table": has_table,
|
259
|
+
"Required Inputs": list(test.inputs.keys()),
|
260
|
+
"Params": test.params,
|
261
|
+
"Tags": test.__tags__,
|
262
|
+
"Tasks": test.__tasks__,
|
263
|
+
}
|
264
|
+
)
|
191
265
|
|
192
|
-
return format_dataframe(pd.DataFrame(
|
266
|
+
return format_dataframe(pd.DataFrame(rows))
|
193
267
|
|
194
268
|
|
195
269
|
def list_tags() -> List[str]:
|
@@ -2,21 +2,24 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import evaluate
|
6
8
|
import pandas as pd
|
7
9
|
import plotly.graph_objects as go
|
8
10
|
|
9
11
|
from validmind import RawData, tags, tasks
|
10
12
|
from validmind.tests.utils import validate_prediction
|
13
|
+
from validmind.vm_models import VMDataset, VMModel
|
11
14
|
|
12
15
|
|
13
16
|
@tags("nlp", "text_data", "visualization")
|
14
17
|
@tasks("text_classification", "text_summarization")
|
15
18
|
def BertScore(
|
16
|
-
dataset,
|
17
|
-
model,
|
19
|
+
dataset: VMDataset,
|
20
|
+
model: VMModel,
|
18
21
|
evaluation_model="distilbert-base-uncased",
|
19
|
-
):
|
22
|
+
) -> Tuple[pd.DataFrame, go.Figure, RawData]:
|
20
23
|
"""
|
21
24
|
Assesses the quality of machine-generated text using BERTScore metrics and visualizes results through histograms
|
22
25
|
and bar charts, alongside compiling a comprehensive table of descriptive statistics.
|
@@ -2,17 +2,22 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import evaluate
|
6
8
|
import pandas as pd
|
7
9
|
import plotly.graph_objects as go
|
8
10
|
|
9
11
|
from validmind import RawData, tags, tasks
|
10
12
|
from validmind.tests.utils import validate_prediction
|
13
|
+
from validmind.vm_models import VMDataset, VMModel
|
11
14
|
|
12
15
|
|
13
16
|
@tags("nlp", "text_data", "visualization")
|
14
17
|
@tasks("text_classification", "text_summarization")
|
15
|
-
def BleuScore(
|
18
|
+
def BleuScore(
|
19
|
+
dataset: VMDataset, model: VMModel
|
20
|
+
) -> Tuple[pd.DataFrame, go.Figure, RawData]:
|
16
21
|
"""
|
17
22
|
Evaluates the quality of machine-generated text using BLEU metrics and visualizes the results through histograms
|
18
23
|
and bar charts, alongside compiling a comprehensive table of descriptive statistics for BLEU scores.
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
import plotly.graph_objects as go
|
7
9
|
|
@@ -11,7 +13,9 @@ from validmind.vm_models import VMDataset, VMModel
|
|
11
13
|
|
12
14
|
@tags("sklearn", "model_performance")
|
13
15
|
@tasks("clustering")
|
14
|
-
def ClusterSizeDistribution(
|
16
|
+
def ClusterSizeDistribution(
|
17
|
+
dataset: VMDataset, model: VMModel
|
18
|
+
) -> Tuple[go.Figure, RawData]:
|
15
19
|
"""
|
16
20
|
Assesses the performance of clustering models by comparing the distribution of cluster sizes in model predictions
|
17
21
|
with the actual data.
|
@@ -2,17 +2,22 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import nltk
|
6
8
|
import pandas as pd
|
7
9
|
import plotly.graph_objects as go
|
8
10
|
|
9
11
|
from validmind import RawData, tags, tasks
|
10
12
|
from validmind.tests.utils import validate_prediction
|
13
|
+
from validmind.vm_models import VMDataset, VMModel
|
11
14
|
|
12
15
|
|
13
16
|
@tags("nlp", "text_data", "visualization")
|
14
17
|
@tasks("text_classification", "text_summarization")
|
15
|
-
def ContextualRecall(
|
18
|
+
def ContextualRecall(
|
19
|
+
dataset: VMDataset, model: VMModel
|
20
|
+
) -> Tuple[pd.DataFrame, go.Figure, RawData]:
|
16
21
|
"""
|
17
22
|
Evaluates a Natural Language Generation model's ability to generate contextually relevant and factually correct
|
18
23
|
text, visualizing the results through histograms and bar charts, alongside compiling a comprehensive table of
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import numpy as np
|
6
8
|
import pandas as pd
|
7
9
|
import plotly.graph_objects as go
|
@@ -17,7 +19,9 @@ logger = get_logger(__name__)
|
|
17
19
|
|
18
20
|
@tags("feature_importance", "AUC", "visualization")
|
19
21
|
@tasks("classification")
|
20
|
-
def FeaturesAUC(
|
22
|
+
def FeaturesAUC(
|
23
|
+
dataset: VMDataset, fontsize: int = 12, figure_height: int = 500
|
24
|
+
) -> Tuple[go.Figure, RawData]:
|
21
25
|
"""
|
22
26
|
Evaluates the discriminatory power of each individual feature within a binary classification model by calculating
|
23
27
|
the Area Under the Curve (AUC) for each feature separately.
|
@@ -2,17 +2,22 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import evaluate
|
6
8
|
import pandas as pd
|
7
9
|
import plotly.graph_objects as go
|
8
10
|
|
9
11
|
from validmind import RawData, tags, tasks
|
10
12
|
from validmind.tests.utils import validate_prediction
|
13
|
+
from validmind.vm_models import VMDataset, VMModel
|
11
14
|
|
12
15
|
|
13
16
|
@tags("nlp", "text_data", "visualization")
|
14
17
|
@tasks("text_classification", "text_summarization")
|
15
|
-
def MeteorScore(
|
18
|
+
def MeteorScore(
|
19
|
+
dataset: VMDataset, model: VMModel
|
20
|
+
) -> Tuple[pd.DataFrame, go.Figure, RawData]:
|
16
21
|
"""
|
17
22
|
Assesses the quality of machine-generated translations by comparing them to human-produced references using the
|
18
23
|
METEOR score, which evaluates precision, recall, and word order.
|
@@ -6,11 +6,12 @@ import pandas as pd
|
|
6
6
|
|
7
7
|
from validmind import tags, tasks
|
8
8
|
from validmind.utils import get_model_info
|
9
|
+
from validmind.vm_models import VMModel
|
9
10
|
|
10
11
|
|
11
12
|
@tags("model_training", "metadata")
|
12
13
|
@tasks("regression", "time_series_forecasting")
|
13
|
-
def ModelMetadata(model):
|
14
|
+
def ModelMetadata(model: VMModel) -> pd.DataFrame:
|
14
15
|
"""
|
15
16
|
Compare metadata of different models and generate a summary table with the results.
|
16
17
|
|
@@ -2,18 +2,26 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Optional, Tuple
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
import plotly.graph_objects as go
|
7
9
|
from scipy.stats import kstest
|
8
10
|
|
9
11
|
from validmind import RawData, tags, tasks
|
12
|
+
from validmind.vm_models import VMDataset, VMModel
|
10
13
|
|
11
14
|
|
12
15
|
@tags("regression")
|
13
16
|
@tasks("residual_analysis", "visualization")
|
14
17
|
def ModelPredictionResiduals(
|
15
|
-
dataset
|
16
|
-
|
18
|
+
dataset: VMDataset,
|
19
|
+
model: VMModel,
|
20
|
+
nbins: int = 100,
|
21
|
+
p_value_threshold: float = 0.05,
|
22
|
+
start_date: Optional[str] = None,
|
23
|
+
end_date: Optional[str] = None,
|
24
|
+
) -> Tuple[pd.DataFrame, go.Figure, RawData]:
|
17
25
|
"""
|
18
26
|
Assesses normality and behavior of residuals in regression models through visualization and statistical tests.
|
19
27
|
|
@@ -2,17 +2,23 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import evaluate
|
6
8
|
import pandas as pd
|
7
9
|
import plotly.graph_objects as go
|
8
10
|
|
9
11
|
from validmind import RawData, tags, tasks
|
10
12
|
from validmind.tests.utils import validate_prediction
|
13
|
+
from validmind.vm_models import VMDataset, VMModel
|
11
14
|
|
12
15
|
|
13
16
|
@tags("nlp", "text_data", "visualization")
|
14
17
|
@tasks("text_classification", "text_summarization")
|
15
|
-
def RegardScore(
|
18
|
+
def RegardScore(
|
19
|
+
dataset: VMDataset,
|
20
|
+
model: VMModel,
|
21
|
+
) -> Tuple[pd.DataFrame, go.Figure, RawData]:
|
16
22
|
"""
|
17
23
|
Assesses the sentiment and potential biases in text generated by NLP models by computing and visualizing regard
|
18
24
|
scores.
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import numpy as np
|
6
8
|
import plotly.figure_factory as ff
|
7
9
|
import plotly.graph_objects as go
|
@@ -12,7 +14,9 @@ from validmind.vm_models import VMDataset, VMModel
|
|
12
14
|
|
13
15
|
@tags("model_performance", "visualization")
|
14
16
|
@tasks("regression")
|
15
|
-
def RegressionResidualsPlot(
|
17
|
+
def RegressionResidualsPlot(
|
18
|
+
model: VMModel, dataset: VMDataset, bin_size: float = 0.1
|
19
|
+
) -> Tuple[go.Figure, go.Figure, RawData]:
|
16
20
|
"""
|
17
21
|
Evaluates regression model performance using residual distribution and actual vs. predicted plots.
|
18
22
|
|
@@ -2,16 +2,23 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
import plotly.graph_objects as go
|
7
9
|
from rouge import Rouge
|
8
10
|
|
9
11
|
from validmind import RawData, tags, tasks
|
12
|
+
from validmind.vm_models import VMDataset, VMModel
|
10
13
|
|
11
14
|
|
12
15
|
@tags("nlp", "text_data", "visualization")
|
13
16
|
@tasks("text_classification", "text_summarization")
|
14
|
-
def RougeScore(
|
17
|
+
def RougeScore(
|
18
|
+
dataset: VMDataset,
|
19
|
+
model: VMModel,
|
20
|
+
metric: str = "rouge-1",
|
21
|
+
) -> Tuple[pd.DataFrame, go.Figure, RawData]:
|
15
22
|
"""
|
16
23
|
Assesses the quality of machine-generated text using ROUGE metrics and visualizes the results to provide
|
17
24
|
comprehensive performance insights.
|
@@ -2,17 +2,24 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import numpy as np
|
6
8
|
import pandas as pd
|
7
9
|
import plotly.graph_objects as go
|
8
10
|
from scipy.stats import norm
|
9
11
|
|
10
12
|
from validmind import RawData, tags, tasks
|
13
|
+
from validmind.vm_models import VMDataset, VMModel
|
11
14
|
|
12
15
|
|
13
16
|
@tags("model_predictions", "visualization")
|
14
17
|
@tasks("regression", "time_series_forecasting")
|
15
|
-
def TimeSeriesPredictionWithCI(
|
18
|
+
def TimeSeriesPredictionWithCI(
|
19
|
+
dataset: VMDataset,
|
20
|
+
model: VMModel,
|
21
|
+
confidence: float = 0.95,
|
22
|
+
) -> Tuple[pd.DataFrame, go.Figure, RawData]:
|
16
23
|
"""
|
17
24
|
Assesses predictive accuracy and uncertainty in time series models, highlighting breaches beyond confidence
|
18
25
|
intervals.
|
@@ -2,14 +2,20 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import plotly.graph_objects as go
|
6
8
|
|
7
9
|
from validmind import RawData, tags, tasks
|
10
|
+
from validmind.vm_models import VMDataset, VMModel
|
8
11
|
|
9
12
|
|
10
13
|
@tags("model_predictions", "visualization")
|
11
14
|
@tasks("regression", "time_series_forecasting")
|
12
|
-
def TimeSeriesPredictionsPlot(
|
15
|
+
def TimeSeriesPredictionsPlot(
|
16
|
+
dataset: VMDataset,
|
17
|
+
model: VMModel,
|
18
|
+
) -> Tuple[go.Figure, RawData]:
|
13
19
|
"""
|
14
20
|
Plot actual vs predicted values for time series data and generate a visual comparison for the model.
|
15
21
|
|
@@ -2,17 +2,22 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Optional, Tuple
|
5
6
|
|
6
7
|
import pandas as pd
|
7
8
|
import plotly.express as px
|
9
|
+
import plotly.graph_objects as go
|
8
10
|
from sklearn import metrics
|
9
11
|
|
10
12
|
from validmind import RawData, tags, tasks
|
13
|
+
from validmind.vm_models import VMDataset, VMModel
|
11
14
|
|
12
15
|
|
13
16
|
@tags("model_performance", "sklearn")
|
14
17
|
@tasks("regression", "time_series_forecasting")
|
15
|
-
def TimeSeriesR2SquareBySegments(
|
18
|
+
def TimeSeriesR2SquareBySegments(
|
19
|
+
dataset: VMDataset, model: VMModel, segments: Optional[int] = None
|
20
|
+
) -> Tuple[pd.DataFrame, go.Figure, RawData]:
|
16
21
|
"""
|
17
22
|
Evaluates the R-Squared values of regression models over specified time segments in time series data to assess
|
18
23
|
segment-wise model performance.
|
@@ -2,15 +2,20 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
import plotly.graph_objects as go
|
7
9
|
|
8
10
|
from validmind import RawData, tags, tasks
|
11
|
+
from validmind.vm_models import VMDataset, VMModel
|
9
12
|
|
10
13
|
|
11
14
|
@tags("nlp", "text_data", "visualization")
|
12
15
|
@tasks("text_classification", "text_summarization")
|
13
|
-
def TokenDisparity(
|
16
|
+
def TokenDisparity(
|
17
|
+
dataset: VMDataset, model: VMModel
|
18
|
+
) -> Tuple[pd.DataFrame, go.Figure, RawData]:
|
14
19
|
"""
|
15
20
|
Evaluates the token disparity between reference and generated texts, visualizing the results through histograms and
|
16
21
|
bar charts, alongside compiling a comprehensive table of descriptive statistics for token counts.
|
@@ -2,16 +2,21 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import evaluate
|
6
8
|
import pandas as pd
|
7
9
|
import plotly.graph_objects as go
|
8
10
|
|
9
11
|
from validmind import RawData, tags, tasks
|
12
|
+
from validmind.vm_models import VMDataset, VMModel
|
10
13
|
|
11
14
|
|
12
15
|
@tags("nlp", "text_data", "visualization")
|
13
16
|
@tasks("text_classification", "text_summarization")
|
14
|
-
def ToxicityScore(
|
17
|
+
def ToxicityScore(
|
18
|
+
dataset: VMDataset, model: VMModel
|
19
|
+
) -> Tuple[pd.DataFrame, go.Figure, RawData]:
|
15
20
|
"""
|
16
21
|
Assesses the toxicity levels of texts generated by NLP models to identify and mitigate harmful or offensive content.
|
17
22
|
|
@@ -2,7 +2,10 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import plotly.express as px
|
8
|
+
import plotly.graph_objects as go
|
6
9
|
from sklearn.cluster import KMeans
|
7
10
|
|
8
11
|
from validmind import RawData, tags, tasks
|
@@ -11,7 +14,9 @@ from validmind.vm_models import VMDataset, VMModel
|
|
11
14
|
|
12
15
|
@tags("llm", "text_data", "embeddings", "visualization")
|
13
16
|
@tasks("feature_extraction")
|
14
|
-
def ClusterDistribution(
|
17
|
+
def ClusterDistribution(
|
18
|
+
model: VMModel, dataset: VMDataset, num_clusters: int = 5
|
19
|
+
) -> Tuple[go.Figure, RawData]:
|
15
20
|
"""
|
16
21
|
Assesses the distribution of text embeddings across clusters produced by a model using KMeans clustering.
|
17
22
|
|
@@ -3,18 +3,23 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
from itertools import combinations
|
6
|
+
from typing import List, Tuple
|
6
7
|
|
7
8
|
import numpy as np
|
8
9
|
import pandas as pd
|
9
10
|
import plotly.express as px
|
11
|
+
import plotly.graph_objects as go
|
10
12
|
from sklearn.metrics.pairwise import cosine_similarity
|
11
13
|
|
12
14
|
from validmind import RawData, tags, tasks
|
15
|
+
from validmind.vm_models import VMDataset, VMModel
|
13
16
|
|
14
17
|
|
15
18
|
@tags("visualization", "dimensionality_reduction", "embeddings")
|
16
19
|
@tasks("text_qa", "text_generation", "text_summarization")
|
17
|
-
def CosineSimilarityComparison(
|
20
|
+
def CosineSimilarityComparison(
|
21
|
+
dataset: VMDataset, models: List[VMModel]
|
22
|
+
) -> Tuple[go.Figure, RawData, pd.DataFrame]:
|
18
23
|
"""
|
19
24
|
Assesses the similarity between embeddings generated by different models using Cosine Similarity, providing both
|
20
25
|
statistical and visual insights.
|
@@ -2,7 +2,10 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import plotly.express as px
|
8
|
+
import plotly.graph_objects as go
|
6
9
|
from sklearn.metrics.pairwise import cosine_similarity
|
7
10
|
|
8
11
|
from validmind import RawData, tags, tasks
|
@@ -11,7 +14,9 @@ from validmind.vm_models import VMDataset, VMModel
|
|
11
14
|
|
12
15
|
@tags("llm", "text_data", "embeddings", "visualization")
|
13
16
|
@tasks("feature_extraction")
|
14
|
-
def CosineSimilarityDistribution(
|
17
|
+
def CosineSimilarityDistribution(
|
18
|
+
dataset: VMDataset, model: VMModel
|
19
|
+
) -> Tuple[go.Figure, RawData]:
|
15
20
|
"""
|
16
21
|
Assesses the similarity between predicted text embeddings from a model using a Cosine Similarity distribution
|
17
22
|
histogram.
|