validmind 2.8.28__py3-none-any.whl → 2.8.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. validmind/models/function.py +11 -3
  2. validmind/tests/data_validation/ACFandPACFPlot.py +3 -1
  3. validmind/tests/data_validation/ADF.py +3 -1
  4. validmind/tests/data_validation/AutoAR.py +3 -1
  5. validmind/tests/data_validation/AutoMA.py +5 -1
  6. validmind/tests/data_validation/AutoStationarity.py +5 -1
  7. validmind/tests/data_validation/BivariateScatterPlots.py +3 -1
  8. validmind/tests/data_validation/BoxPierce.py +4 -1
  9. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +1 -1
  10. validmind/tests/data_validation/ClassImbalance.py +1 -1
  11. validmind/tests/data_validation/DatasetDescription.py +4 -1
  12. validmind/tests/data_validation/DatasetSplit.py +3 -2
  13. validmind/tests/data_validation/DescriptiveStatistics.py +3 -1
  14. validmind/tests/data_validation/DickeyFullerGLS.py +3 -1
  15. validmind/tests/data_validation/Duplicates.py +3 -1
  16. validmind/tests/data_validation/EngleGrangerCoint.py +6 -1
  17. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
  18. validmind/tests/data_validation/HighCardinality.py +3 -1
  19. validmind/tests/data_validation/HighPearsonCorrelation.py +4 -1
  20. validmind/tests/data_validation/IQROutliersBarPlot.py +4 -1
  21. validmind/tests/data_validation/IQROutliersTable.py +6 -1
  22. validmind/tests/data_validation/IsolationForestOutliers.py +3 -1
  23. validmind/tests/data_validation/JarqueBera.py +3 -1
  24. validmind/tests/data_validation/KPSS.py +3 -1
  25. validmind/tests/data_validation/LJungBox.py +3 -1
  26. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +6 -1
  27. validmind/tests/data_validation/MissingValues.py +5 -1
  28. validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
  29. validmind/tests/data_validation/MutualInformation.py +4 -1
  30. validmind/tests/data_validation/PearsonCorrelationMatrix.py +3 -1
  31. validmind/tests/data_validation/PhillipsPerronArch.py +3 -1
  32. validmind/tests/data_validation/ProtectedClassesCombination.py +5 -1
  33. validmind/tests/data_validation/ProtectedClassesDescription.py +5 -1
  34. validmind/tests/data_validation/ProtectedClassesDisparity.py +5 -3
  35. validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +9 -2
  36. validmind/tests/data_validation/RollingStatsPlot.py +5 -1
  37. validmind/tests/data_validation/RunsTest.py +1 -1
  38. validmind/tests/data_validation/ScatterPlot.py +2 -1
  39. validmind/tests/data_validation/ScoreBandDefaultRates.py +3 -1
  40. validmind/tests/data_validation/SeasonalDecompose.py +6 -1
  41. validmind/tests/data_validation/ShapiroWilk.py +4 -1
  42. validmind/tests/data_validation/Skewness.py +3 -1
  43. validmind/tests/data_validation/SpreadPlot.py +3 -1
  44. validmind/tests/data_validation/TabularCategoricalBarPlots.py +4 -1
  45. validmind/tests/data_validation/TabularDateTimeHistograms.py +3 -1
  46. validmind/tests/data_validation/TabularDescriptionTables.py +4 -1
  47. validmind/tests/data_validation/TabularNumericalHistograms.py +3 -1
  48. validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
  49. validmind/tests/data_validation/TimeSeriesDescription.py +1 -1
  50. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +1 -1
  51. validmind/tests/data_validation/TimeSeriesFrequency.py +5 -1
  52. validmind/tests/data_validation/TimeSeriesHistogram.py +4 -1
  53. validmind/tests/data_validation/TimeSeriesLinePlot.py +3 -1
  54. validmind/tests/data_validation/TimeSeriesMissingValues.py +6 -1
  55. validmind/tests/data_validation/TimeSeriesOutliers.py +5 -1
  56. validmind/tests/data_validation/TooManyZeroValues.py +6 -1
  57. validmind/tests/data_validation/UniqueRows.py +5 -1
  58. validmind/tests/data_validation/WOEBinPlots.py +4 -1
  59. validmind/tests/data_validation/WOEBinTable.py +5 -1
  60. validmind/tests/data_validation/ZivotAndrewsArch.py +3 -1
  61. validmind/tests/data_validation/nlp/CommonWords.py +2 -1
  62. validmind/tests/data_validation/nlp/Hashtags.py +2 -1
  63. validmind/tests/data_validation/nlp/LanguageDetection.py +4 -1
  64. validmind/tests/data_validation/nlp/Mentions.py +3 -1
  65. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +6 -1
  66. validmind/tests/data_validation/nlp/Punctuations.py +2 -1
  67. validmind/tests/data_validation/nlp/Sentiment.py +3 -1
  68. validmind/tests/data_validation/nlp/StopWords.py +2 -1
  69. validmind/tests/data_validation/nlp/TextDescription.py +3 -1
  70. validmind/tests/data_validation/nlp/Toxicity.py +3 -1
  71. validmind/tests/load.py +91 -17
  72. validmind/tests/model_validation/BertScore.py +6 -3
  73. validmind/tests/model_validation/BleuScore.py +6 -1
  74. validmind/tests/model_validation/ClusterSizeDistribution.py +5 -1
  75. validmind/tests/model_validation/ContextualRecall.py +6 -1
  76. validmind/tests/model_validation/FeaturesAUC.py +5 -1
  77. validmind/tests/model_validation/MeteorScore.py +6 -1
  78. validmind/tests/model_validation/ModelMetadata.py +2 -1
  79. validmind/tests/model_validation/ModelPredictionResiduals.py +10 -2
  80. validmind/tests/model_validation/RegardScore.py +7 -1
  81. validmind/tests/model_validation/RegressionResidualsPlot.py +5 -1
  82. validmind/tests/model_validation/RougeScore.py +8 -1
  83. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +8 -1
  84. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +7 -1
  85. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -1
  86. validmind/tests/model_validation/TokenDisparity.py +6 -1
  87. validmind/tests/model_validation/ToxicityScore.py +6 -1
  88. validmind/tests/model_validation/embeddings/ClusterDistribution.py +6 -1
  89. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +6 -1
  90. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +6 -1
  91. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +7 -3
  92. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +6 -1
  93. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +4 -3
  94. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -1
  95. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +7 -3
  96. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +6 -1
  97. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -2
  98. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
  99. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +4 -1
  100. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
  101. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +9 -6
  102. validmind/tests/model_validation/ragas/AnswerCorrectness.py +8 -5
  103. validmind/tests/model_validation/ragas/AspectCritic.py +11 -8
  104. validmind/tests/model_validation/ragas/ContextEntityRecall.py +5 -2
  105. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -2
  106. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +5 -2
  107. validmind/tests/model_validation/ragas/ContextRecall.py +6 -2
  108. validmind/tests/model_validation/ragas/Faithfulness.py +9 -5
  109. validmind/tests/model_validation/ragas/NoiseSensitivity.py +10 -7
  110. validmind/tests/model_validation/ragas/ResponseRelevancy.py +9 -6
  111. validmind/tests/model_validation/ragas/SemanticSimilarity.py +7 -4
  112. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -1
  113. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -1
  114. validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -1
  115. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +5 -1
  116. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
  117. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +5 -1
  118. validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -1
  119. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +4 -1
  120. validmind/tests/model_validation/sklearn/FeatureImportance.py +5 -1
  121. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -1
  122. validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -1
  123. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -4
  124. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +3 -3
  125. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +5 -1
  126. validmind/tests/model_validation/sklearn/MinimumF1Score.py +5 -1
  127. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +5 -1
  128. validmind/tests/model_validation/sklearn/ModelParameters.py +6 -1
  129. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -1
  130. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -2
  131. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +4 -4
  132. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +2 -2
  133. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +5 -1
  134. validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
  135. validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -1
  136. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +6 -1
  137. validmind/tests/model_validation/sklearn/RegressionPerformance.py +5 -1
  138. validmind/tests/model_validation/sklearn/RegressionR2Square.py +6 -1
  139. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +6 -1
  140. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +2 -2
  141. validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +3 -1
  142. validmind/tests/model_validation/sklearn/SilhouettePlot.py +6 -1
  143. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +2 -2
  144. validmind/tests/model_validation/sklearn/VMeasure.py +5 -1
  145. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +6 -5
  146. validmind/tests/model_validation/statsmodels/AutoARIMA.py +3 -1
  147. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +6 -1
  148. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
  149. validmind/tests/model_validation/statsmodels/GINITable.py +4 -1
  150. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +5 -1
  151. validmind/tests/model_validation/statsmodels/Lilliefors.py +3 -1
  152. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
  153. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +4 -1
  154. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -2
  155. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +5 -4
  156. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +4 -1
  157. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +3 -2
  158. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +5 -1
  159. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
  160. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +6 -1
  161. validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +2 -2
  162. validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +2 -2
  163. validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +2 -2
  164. validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +2 -2
  165. validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +2 -2
  166. validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +2 -2
  167. validmind/tests/ongoing_monitoring/FeatureDrift.py +5 -2
  168. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +6 -1
  169. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +8 -1
  170. validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +2 -2
  171. validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +6 -1
  172. validmind/tests/ongoing_monitoring/ROCCurveDrift.py +4 -2
  173. validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +2 -2
  174. validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +2 -2
  175. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +8 -1
  176. validmind/tests/prompt_validation/Bias.py +5 -1
  177. validmind/tests/prompt_validation/Clarity.py +5 -1
  178. validmind/tests/prompt_validation/Conciseness.py +5 -1
  179. validmind/tests/prompt_validation/Delimitation.py +5 -1
  180. validmind/tests/prompt_validation/NegativeInstruction.py +5 -1
  181. validmind/tests/prompt_validation/Robustness.py +5 -1
  182. validmind/tests/prompt_validation/Specificity.py +5 -1
  183. validmind/unit_metrics/classification/Accuracy.py +2 -1
  184. validmind/unit_metrics/classification/F1.py +2 -1
  185. validmind/unit_metrics/classification/Precision.py +2 -1
  186. validmind/unit_metrics/classification/ROC_AUC.py +2 -1
  187. validmind/unit_metrics/classification/Recall.py +2 -1
  188. validmind/unit_metrics/regression/AdjustedRSquaredScore.py +2 -1
  189. validmind/unit_metrics/regression/GiniCoefficient.py +2 -1
  190. validmind/unit_metrics/regression/HuberLoss.py +2 -1
  191. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +2 -1
  192. validmind/unit_metrics/regression/MeanAbsoluteError.py +2 -1
  193. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +2 -1
  194. validmind/unit_metrics/regression/MeanBiasDeviation.py +2 -1
  195. validmind/unit_metrics/regression/MeanSquaredError.py +2 -1
  196. validmind/unit_metrics/regression/QuantileLoss.py +1 -1
  197. validmind/unit_metrics/regression/RSquaredScore.py +2 -1
  198. validmind/unit_metrics/regression/RootMeanSquaredError.py +2 -1
  199. validmind/vm_models/dataset/dataset.py +145 -38
  200. {validmind-2.8.28.dist-info → validmind-2.8.29.dist-info}/METADATA +1 -1
  201. {validmind-2.8.28.dist-info → validmind-2.8.29.dist-info}/RECORD +204 -204
  202. {validmind-2.8.28.dist-info → validmind-2.8.29.dist-info}/LICENSE +0 -0
  203. {validmind-2.8.28.dist-info → validmind-2.8.29.dist-info}/WHEEL +0 -0
  204. {validmind-2.8.28.dist-info → validmind-2.8.29.dist-info}/entry_points.txt +0 -0
@@ -3,6 +3,8 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
 
6
+ from typing import Tuple
7
+
6
8
  import matplotlib.pyplot as plt
7
9
  import nltk
8
10
  import seaborn as sns
@@ -13,7 +15,7 @@ from validmind import RawData, tags, tasks
13
15
 
14
16
  @tags("nlp", "text_data", "data_validation")
15
17
  @tasks("nlp")
16
- def Sentiment(dataset):
18
+ def Sentiment(dataset) -> Tuple[plt.Figure, RawData]:
17
19
  """
18
20
  Analyzes the sentiment of text data within a dataset using the VADER sentiment analysis tool.
19
21
 
@@ -7,6 +7,7 @@ Threshold based tests
7
7
  """
8
8
 
9
9
  from collections import defaultdict
10
+ from typing import Dict, Tuple
10
11
 
11
12
  import nltk
12
13
  import pandas as pd
@@ -21,7 +22,7 @@ from validmind.vm_models import VMDataset
21
22
  @tasks("text_classification", "text_summarization")
22
23
  def StopWords(
23
24
  dataset: VMDataset, min_percent_threshold: float = 0.5, num_words: int = 25
24
- ):
25
+ ) -> Tuple[Dict[str, pd.DataFrame], go.Figure, bool, RawData]:
25
26
  """
26
27
  Evaluates and visualizes the frequency of English stop words in a text dataset against a defined threshold.
27
28
 
@@ -3,10 +3,12 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import string
6
+ from typing import Tuple
6
7
 
7
8
  import nltk
8
9
  import pandas as pd
9
10
  import plotly.express as px
11
+ import plotly.graph_objects as go
10
12
  from nltk.corpus import stopwords
11
13
 
12
14
  from validmind import RawData, tags, tasks
@@ -94,7 +96,7 @@ def TextDescription(
94
96
  "``",
95
97
  },
96
98
  lang: str = "english",
97
- ):
99
+ ) -> Tuple[go.Figure, RawData]:
98
100
  """
99
101
  Conducts comprehensive textual analysis on a dataset using NLTK to evaluate various parameters and generate
100
102
  visualizations.
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import evaluate
6
8
  import matplotlib.pyplot as plt
7
9
  import seaborn as sns
@@ -11,7 +13,7 @@ from validmind import RawData, tags, tasks
11
13
 
12
14
  @tags("nlp", "text_data", "data_validation")
13
15
  @tasks("nlp")
14
- def Toxicity(dataset):
16
+ def Toxicity(dataset) -> Tuple[plt.Figure, RawData]:
15
17
  """
16
18
  Assesses the toxicity of text data within a dataset to visualize the distribution of toxicity scores.
17
19
 
validmind/tests/load.py CHANGED
@@ -7,7 +7,17 @@
7
7
  import inspect
8
8
  import json
9
9
  from pprint import pformat
10
- from typing import Any, Callable, Dict, List, Optional, Tuple, Union
10
+ from typing import (
11
+ Any,
12
+ Callable,
13
+ Dict,
14
+ List,
15
+ Optional,
16
+ Tuple,
17
+ Union,
18
+ get_args,
19
+ get_origin,
20
+ )
11
21
  from uuid import uuid4
12
22
 
13
23
  import pandas as pd
@@ -18,12 +28,31 @@ from ..html_templates.content_blocks import test_content_block_html
18
28
  from ..logging import get_logger
19
29
  from ..utils import display, format_dataframe, fuzzy_match, md_to_html, test_id_to_name
20
30
  from ..vm_models import VMDataset, VMModel
31
+ from ..vm_models.figure import Figure
32
+ from ..vm_models.result import ResultTable
21
33
  from .__types__ import TestID
22
34
  from ._store import test_provider_store, test_store
23
35
 
24
36
  logger = get_logger(__name__)
25
37
 
26
38
 
39
+ try:
40
+ from matplotlib.figure import Figure as MatplotlibFigure
41
+ except ImportError:
42
+ MatplotlibFigure = None
43
+
44
+ try:
45
+ from plotly.graph_objects import Figure as PlotlyFigure
46
+ except ImportError:
47
+ PlotlyFigure = None
48
+
49
+ FIGURE_TYPES = tuple(
50
+ item for item in (Figure, MatplotlibFigure, PlotlyFigure) if inspect.isclass(item)
51
+ )
52
+ TABLE_TYPES = (pd.DataFrame, ResultTable)
53
+ GENERIC_TABLE_TYPES = (list, dict)
54
+
55
+
27
56
  INPUT_TYPE_MAP = {
28
57
  "dataset": VMDataset,
29
58
  "datasets": List[VMDataset],
@@ -32,6 +61,45 @@ INPUT_TYPE_MAP = {
32
61
  }
33
62
 
34
63
 
64
+ def _inspect_return_type(annotation: Any) -> Tuple[bool, bool]:
65
+ """
66
+ Inspects a return type annotation to determine if it contains a Figure or Table.
67
+
68
+ Returns a tuple (has_figure, has_table).
69
+ """
70
+ has_figure = False
71
+ has_table = False
72
+
73
+ origin = get_origin(annotation)
74
+ args = get_args(annotation)
75
+
76
+ # A Union means the return type could be one of several types.
77
+ # A tuple in a type hint means multiple return values.
78
+ # We recursively inspect the arguments of Union and tuple.
79
+ if origin is Union or origin is tuple:
80
+ for arg in args:
81
+ fig, table = _inspect_return_type(arg)
82
+ has_figure |= fig
83
+ has_table |= table
84
+ return has_figure, has_table
85
+
86
+ check_type = origin if origin is not None else annotation
87
+
88
+ if not inspect.isclass(check_type):
89
+ return has_figure, has_table # Can't do issubclass on non-class like Any
90
+
91
+ if FIGURE_TYPES and issubclass(check_type, FIGURE_TYPES):
92
+ has_figure = True
93
+
94
+ if TABLE_TYPES and issubclass(check_type, TABLE_TYPES):
95
+ has_table = True
96
+
97
+ if check_type in GENERIC_TABLE_TYPES:
98
+ has_table = True
99
+
100
+ return has_figure, has_table
101
+
102
+
35
103
  def _inspect_signature(
36
104
  test_func: Callable[..., Any],
37
105
  ) -> Tuple[Dict[str, Dict[str, Any]], Dict[str, Dict[str, Any]]]:
@@ -173,23 +241,29 @@ def _pretty_list_tests(
173
241
  tests: Dict[str, Callable[..., Any]], truncate: bool = True
174
242
  ) -> None:
175
243
  """Pretty print a list of tests"""
176
- table = [
177
- {
178
- "ID": test_id,
179
- "Name": test_id_to_name(test_id),
180
- "Description": _test_description(
181
- inspect.getdoc(test),
182
- num_lines=(5 if truncate else 999999),
183
- ),
184
- "Required Inputs": list(test.inputs.keys()),
185
- "Params": test.params,
186
- "Tags": test.__tags__,
187
- "Tasks": test.__tasks__,
188
- }
189
- for test_id, test in tests.items()
190
- ]
244
+ rows = []
245
+ for test_id, test in tests.items():
246
+ has_figure, has_table = _inspect_return_type(
247
+ inspect.signature(test).return_annotation
248
+ )
249
+ rows.append(
250
+ {
251
+ "ID": test_id,
252
+ "Name": test_id_to_name(test_id),
253
+ "Description": _test_description(
254
+ inspect.getdoc(test),
255
+ num_lines=(5 if truncate else 999999),
256
+ ),
257
+ "Has Figure": has_figure,
258
+ "Has Table": has_table,
259
+ "Required Inputs": list(test.inputs.keys()),
260
+ "Params": test.params,
261
+ "Tags": test.__tags__,
262
+ "Tasks": test.__tasks__,
263
+ }
264
+ )
191
265
 
192
- return format_dataframe(pd.DataFrame(table))
266
+ return format_dataframe(pd.DataFrame(rows))
193
267
 
194
268
 
195
269
  def list_tags() -> List[str]:
@@ -2,21 +2,24 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import evaluate
6
8
  import pandas as pd
7
9
  import plotly.graph_objects as go
8
10
 
9
11
  from validmind import RawData, tags, tasks
10
12
  from validmind.tests.utils import validate_prediction
13
+ from validmind.vm_models import VMDataset, VMModel
11
14
 
12
15
 
13
16
  @tags("nlp", "text_data", "visualization")
14
17
  @tasks("text_classification", "text_summarization")
15
18
  def BertScore(
16
- dataset,
17
- model,
19
+ dataset: VMDataset,
20
+ model: VMModel,
18
21
  evaluation_model="distilbert-base-uncased",
19
- ):
22
+ ) -> Tuple[pd.DataFrame, go.Figure, RawData]:
20
23
  """
21
24
  Assesses the quality of machine-generated text using BERTScore metrics and visualizes results through histograms
22
25
  and bar charts, alongside compiling a comprehensive table of descriptive statistics.
@@ -2,17 +2,22 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import evaluate
6
8
  import pandas as pd
7
9
  import plotly.graph_objects as go
8
10
 
9
11
  from validmind import RawData, tags, tasks
10
12
  from validmind.tests.utils import validate_prediction
13
+ from validmind.vm_models import VMDataset, VMModel
11
14
 
12
15
 
13
16
  @tags("nlp", "text_data", "visualization")
14
17
  @tasks("text_classification", "text_summarization")
15
- def BleuScore(dataset, model):
18
+ def BleuScore(
19
+ dataset: VMDataset, model: VMModel
20
+ ) -> Tuple[pd.DataFrame, go.Figure, RawData]:
16
21
  """
17
22
  Evaluates the quality of machine-generated text using BLEU metrics and visualizes the results through histograms
18
23
  and bar charts, alongside compiling a comprehensive table of descriptive statistics for BLEU scores.
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import pandas as pd
6
8
  import plotly.graph_objects as go
7
9
 
@@ -11,7 +13,9 @@ from validmind.vm_models import VMDataset, VMModel
11
13
 
12
14
  @tags("sklearn", "model_performance")
13
15
  @tasks("clustering")
14
- def ClusterSizeDistribution(dataset: VMDataset, model: VMModel):
16
+ def ClusterSizeDistribution(
17
+ dataset: VMDataset, model: VMModel
18
+ ) -> Tuple[go.Figure, RawData]:
15
19
  """
16
20
  Assesses the performance of clustering models by comparing the distribution of cluster sizes in model predictions
17
21
  with the actual data.
@@ -2,17 +2,22 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import nltk
6
8
  import pandas as pd
7
9
  import plotly.graph_objects as go
8
10
 
9
11
  from validmind import RawData, tags, tasks
10
12
  from validmind.tests.utils import validate_prediction
13
+ from validmind.vm_models import VMDataset, VMModel
11
14
 
12
15
 
13
16
  @tags("nlp", "text_data", "visualization")
14
17
  @tasks("text_classification", "text_summarization")
15
- def ContextualRecall(dataset, model):
18
+ def ContextualRecall(
19
+ dataset: VMDataset, model: VMModel
20
+ ) -> Tuple[pd.DataFrame, go.Figure, RawData]:
16
21
  """
17
22
  Evaluates a Natural Language Generation model's ability to generate contextually relevant and factually correct
18
23
  text, visualizing the results through histograms and bar charts, alongside compiling a comprehensive table of
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import numpy as np
6
8
  import pandas as pd
7
9
  import plotly.graph_objects as go
@@ -17,7 +19,9 @@ logger = get_logger(__name__)
17
19
 
18
20
  @tags("feature_importance", "AUC", "visualization")
19
21
  @tasks("classification")
20
- def FeaturesAUC(dataset: VMDataset, fontsize: int = 12, figure_height: int = 500):
22
+ def FeaturesAUC(
23
+ dataset: VMDataset, fontsize: int = 12, figure_height: int = 500
24
+ ) -> Tuple[go.Figure, RawData]:
21
25
  """
22
26
  Evaluates the discriminatory power of each individual feature within a binary classification model by calculating
23
27
  the Area Under the Curve (AUC) for each feature separately.
@@ -2,17 +2,22 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import evaluate
6
8
  import pandas as pd
7
9
  import plotly.graph_objects as go
8
10
 
9
11
  from validmind import RawData, tags, tasks
10
12
  from validmind.tests.utils import validate_prediction
13
+ from validmind.vm_models import VMDataset, VMModel
11
14
 
12
15
 
13
16
  @tags("nlp", "text_data", "visualization")
14
17
  @tasks("text_classification", "text_summarization")
15
- def MeteorScore(dataset, model):
18
+ def MeteorScore(
19
+ dataset: VMDataset, model: VMModel
20
+ ) -> Tuple[pd.DataFrame, go.Figure, RawData]:
16
21
  """
17
22
  Assesses the quality of machine-generated translations by comparing them to human-produced references using the
18
23
  METEOR score, which evaluates precision, recall, and word order.
@@ -6,11 +6,12 @@ import pandas as pd
6
6
 
7
7
  from validmind import tags, tasks
8
8
  from validmind.utils import get_model_info
9
+ from validmind.vm_models import VMModel
9
10
 
10
11
 
11
12
  @tags("model_training", "metadata")
12
13
  @tasks("regression", "time_series_forecasting")
13
- def ModelMetadata(model):
14
+ def ModelMetadata(model: VMModel) -> pd.DataFrame:
14
15
  """
15
16
  Compare metadata of different models and generate a summary table with the results.
16
17
 
@@ -2,18 +2,26 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Optional, Tuple
6
+
5
7
  import pandas as pd
6
8
  import plotly.graph_objects as go
7
9
  from scipy.stats import kstest
8
10
 
9
11
  from validmind import RawData, tags, tasks
12
+ from validmind.vm_models import VMDataset, VMModel
10
13
 
11
14
 
12
15
  @tags("regression")
13
16
  @tasks("residual_analysis", "visualization")
14
17
  def ModelPredictionResiduals(
15
- dataset, model, nbins=100, p_value_threshold=0.05, start_date=None, end_date=None
16
- ):
18
+ dataset: VMDataset,
19
+ model: VMModel,
20
+ nbins: int = 100,
21
+ p_value_threshold: float = 0.05,
22
+ start_date: Optional[str] = None,
23
+ end_date: Optional[str] = None,
24
+ ) -> Tuple[pd.DataFrame, go.Figure, RawData]:
17
25
  """
18
26
  Assesses normality and behavior of residuals in regression models through visualization and statistical tests.
19
27
 
@@ -2,17 +2,23 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import evaluate
6
8
  import pandas as pd
7
9
  import plotly.graph_objects as go
8
10
 
9
11
  from validmind import RawData, tags, tasks
10
12
  from validmind.tests.utils import validate_prediction
13
+ from validmind.vm_models import VMDataset, VMModel
11
14
 
12
15
 
13
16
  @tags("nlp", "text_data", "visualization")
14
17
  @tasks("text_classification", "text_summarization")
15
- def RegardScore(dataset, model):
18
+ def RegardScore(
19
+ dataset: VMDataset,
20
+ model: VMModel,
21
+ ) -> Tuple[pd.DataFrame, go.Figure, RawData]:
16
22
  """
17
23
  Assesses the sentiment and potential biases in text generated by NLP models by computing and visualizing regard
18
24
  scores.
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import numpy as np
6
8
  import plotly.figure_factory as ff
7
9
  import plotly.graph_objects as go
@@ -12,7 +14,9 @@ from validmind.vm_models import VMDataset, VMModel
12
14
 
13
15
  @tags("model_performance", "visualization")
14
16
  @tasks("regression")
15
- def RegressionResidualsPlot(model: VMModel, dataset: VMDataset, bin_size: float = 0.1):
17
+ def RegressionResidualsPlot(
18
+ model: VMModel, dataset: VMDataset, bin_size: float = 0.1
19
+ ) -> Tuple[go.Figure, go.Figure, RawData]:
16
20
  """
17
21
  Evaluates regression model performance using residual distribution and actual vs. predicted plots.
18
22
 
@@ -2,16 +2,23 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import pandas as pd
6
8
  import plotly.graph_objects as go
7
9
  from rouge import Rouge
8
10
 
9
11
  from validmind import RawData, tags, tasks
12
+ from validmind.vm_models import VMDataset, VMModel
10
13
 
11
14
 
12
15
  @tags("nlp", "text_data", "visualization")
13
16
  @tasks("text_classification", "text_summarization")
14
- def RougeScore(dataset, model, metric="rouge-1"):
17
+ def RougeScore(
18
+ dataset: VMDataset,
19
+ model: VMModel,
20
+ metric: str = "rouge-1",
21
+ ) -> Tuple[pd.DataFrame, go.Figure, RawData]:
15
22
  """
16
23
  Assesses the quality of machine-generated text using ROUGE metrics and visualizes the results to provide
17
24
  comprehensive performance insights.
@@ -2,17 +2,24 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import numpy as np
6
8
  import pandas as pd
7
9
  import plotly.graph_objects as go
8
10
  from scipy.stats import norm
9
11
 
10
12
  from validmind import RawData, tags, tasks
13
+ from validmind.vm_models import VMDataset, VMModel
11
14
 
12
15
 
13
16
  @tags("model_predictions", "visualization")
14
17
  @tasks("regression", "time_series_forecasting")
15
- def TimeSeriesPredictionWithCI(dataset, model, confidence=0.95):
18
+ def TimeSeriesPredictionWithCI(
19
+ dataset: VMDataset,
20
+ model: VMModel,
21
+ confidence: float = 0.95,
22
+ ) -> Tuple[pd.DataFrame, go.Figure, RawData]:
16
23
  """
17
24
  Assesses predictive accuracy and uncertainty in time series models, highlighting breaches beyond confidence
18
25
  intervals.
@@ -2,14 +2,20 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import plotly.graph_objects as go
6
8
 
7
9
  from validmind import RawData, tags, tasks
10
+ from validmind.vm_models import VMDataset, VMModel
8
11
 
9
12
 
10
13
  @tags("model_predictions", "visualization")
11
14
  @tasks("regression", "time_series_forecasting")
12
- def TimeSeriesPredictionsPlot(dataset, model):
15
+ def TimeSeriesPredictionsPlot(
16
+ dataset: VMDataset,
17
+ model: VMModel,
18
+ ) -> Tuple[go.Figure, RawData]:
13
19
  """
14
20
  Plot actual vs predicted values for time series data and generate a visual comparison for the model.
15
21
 
@@ -2,17 +2,22 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Optional, Tuple
5
6
 
6
7
  import pandas as pd
7
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
8
10
  from sklearn import metrics
9
11
 
10
12
  from validmind import RawData, tags, tasks
13
+ from validmind.vm_models import VMDataset, VMModel
11
14
 
12
15
 
13
16
  @tags("model_performance", "sklearn")
14
17
  @tasks("regression", "time_series_forecasting")
15
- def TimeSeriesR2SquareBySegments(dataset, model, segments=None):
18
+ def TimeSeriesR2SquareBySegments(
19
+ dataset: VMDataset, model: VMModel, segments: Optional[int] = None
20
+ ) -> Tuple[pd.DataFrame, go.Figure, RawData]:
16
21
  """
17
22
  Evaluates the R-Squared values of regression models over specified time segments in time series data to assess
18
23
  segment-wise model performance.
@@ -2,15 +2,20 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import pandas as pd
6
8
  import plotly.graph_objects as go
7
9
 
8
10
  from validmind import RawData, tags, tasks
11
+ from validmind.vm_models import VMDataset, VMModel
9
12
 
10
13
 
11
14
  @tags("nlp", "text_data", "visualization")
12
15
  @tasks("text_classification", "text_summarization")
13
- def TokenDisparity(dataset, model):
16
+ def TokenDisparity(
17
+ dataset: VMDataset, model: VMModel
18
+ ) -> Tuple[pd.DataFrame, go.Figure, RawData]:
14
19
  """
15
20
  Evaluates the token disparity between reference and generated texts, visualizing the results through histograms and
16
21
  bar charts, alongside compiling a comprehensive table of descriptive statistics for token counts.
@@ -2,16 +2,21 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import evaluate
6
8
  import pandas as pd
7
9
  import plotly.graph_objects as go
8
10
 
9
11
  from validmind import RawData, tags, tasks
12
+ from validmind.vm_models import VMDataset, VMModel
10
13
 
11
14
 
12
15
  @tags("nlp", "text_data", "visualization")
13
16
  @tasks("text_classification", "text_summarization")
14
- def ToxicityScore(dataset, model):
17
+ def ToxicityScore(
18
+ dataset: VMDataset, model: VMModel
19
+ ) -> Tuple[pd.DataFrame, go.Figure, RawData]:
15
20
  """
16
21
  Assesses the toxicity levels of texts generated by NLP models to identify and mitigate harmful or offensive content.
17
22
 
@@ -2,7 +2,10 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import plotly.express as px
8
+ import plotly.graph_objects as go
6
9
  from sklearn.cluster import KMeans
7
10
 
8
11
  from validmind import RawData, tags, tasks
@@ -11,7 +14,9 @@ from validmind.vm_models import VMDataset, VMModel
11
14
 
12
15
  @tags("llm", "text_data", "embeddings", "visualization")
13
16
  @tasks("feature_extraction")
14
- def ClusterDistribution(model: VMModel, dataset: VMDataset, num_clusters: int = 5):
17
+ def ClusterDistribution(
18
+ model: VMModel, dataset: VMDataset, num_clusters: int = 5
19
+ ) -> Tuple[go.Figure, RawData]:
15
20
  """
16
21
  Assesses the distribution of text embeddings across clusters produced by a model using KMeans clustering.
17
22
 
@@ -3,18 +3,23 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  from itertools import combinations
6
+ from typing import List, Tuple
6
7
 
7
8
  import numpy as np
8
9
  import pandas as pd
9
10
  import plotly.express as px
11
+ import plotly.graph_objects as go
10
12
  from sklearn.metrics.pairwise import cosine_similarity
11
13
 
12
14
  from validmind import RawData, tags, tasks
15
+ from validmind.vm_models import VMDataset, VMModel
13
16
 
14
17
 
15
18
  @tags("visualization", "dimensionality_reduction", "embeddings")
16
19
  @tasks("text_qa", "text_generation", "text_summarization")
17
- def CosineSimilarityComparison(dataset, models):
20
+ def CosineSimilarityComparison(
21
+ dataset: VMDataset, models: List[VMModel]
22
+ ) -> Tuple[go.Figure, RawData, pd.DataFrame]:
18
23
  """
19
24
  Assesses the similarity between embeddings generated by different models using Cosine Similarity, providing both
20
25
  statistical and visual insights.
@@ -2,7 +2,10 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import plotly.express as px
8
+ import plotly.graph_objects as go
6
9
  from sklearn.metrics.pairwise import cosine_similarity
7
10
 
8
11
  from validmind import RawData, tags, tasks
@@ -11,7 +14,9 @@ from validmind.vm_models import VMDataset, VMModel
11
14
 
12
15
  @tags("llm", "text_data", "embeddings", "visualization")
13
16
  @tasks("feature_extraction")
14
- def CosineSimilarityDistribution(dataset: VMDataset, model: VMModel):
17
+ def CosineSimilarityDistribution(
18
+ dataset: VMDataset, model: VMModel
19
+ ) -> Tuple[go.Figure, RawData]:
15
20
  """
16
21
  Assesses the similarity between predicted text embeddings from a model using a Cosine Similarity distribution
17
22
  histogram.