validmind 2.8.10__py3-none-any.whl → 2.8.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. validmind/__init__.py +6 -5
  2. validmind/__version__.py +1 -1
  3. validmind/ai/test_descriptions.py +17 -11
  4. validmind/ai/utils.py +2 -2
  5. validmind/api_client.py +75 -32
  6. validmind/client.py +108 -100
  7. validmind/client_config.py +3 -3
  8. validmind/datasets/classification/__init__.py +7 -3
  9. validmind/datasets/credit_risk/lending_club.py +28 -16
  10. validmind/datasets/nlp/cnn_dailymail.py +10 -4
  11. validmind/datasets/regression/__init__.py +22 -5
  12. validmind/errors.py +17 -7
  13. validmind/input_registry.py +1 -1
  14. validmind/logging.py +44 -35
  15. validmind/models/foundation.py +2 -2
  16. validmind/models/function.py +10 -3
  17. validmind/template.py +30 -22
  18. validmind/test_suites/__init__.py +2 -2
  19. validmind/tests/_store.py +13 -4
  20. validmind/tests/comparison.py +65 -33
  21. validmind/tests/data_validation/ACFandPACFPlot.py +4 -1
  22. validmind/tests/data_validation/AutoMA.py +1 -1
  23. validmind/tests/data_validation/BivariateScatterPlots.py +5 -1
  24. validmind/tests/data_validation/BoxPierce.py +3 -1
  25. validmind/tests/data_validation/ClassImbalance.py +4 -2
  26. validmind/tests/data_validation/DatasetDescription.py +3 -24
  27. validmind/tests/data_validation/DescriptiveStatistics.py +1 -1
  28. validmind/tests/data_validation/DickeyFullerGLS.py +1 -1
  29. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +1 -1
  30. validmind/tests/data_validation/HighCardinality.py +5 -1
  31. validmind/tests/data_validation/HighPearsonCorrelation.py +1 -1
  32. validmind/tests/data_validation/IQROutliersBarPlot.py +5 -3
  33. validmind/tests/data_validation/IQROutliersTable.py +5 -2
  34. validmind/tests/data_validation/IsolationForestOutliers.py +5 -4
  35. validmind/tests/data_validation/JarqueBera.py +2 -2
  36. validmind/tests/data_validation/LJungBox.py +2 -2
  37. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +1 -1
  38. validmind/tests/data_validation/MissingValues.py +14 -10
  39. validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
  40. validmind/tests/data_validation/MutualInformation.py +2 -1
  41. validmind/tests/data_validation/PearsonCorrelationMatrix.py +1 -1
  42. validmind/tests/data_validation/ProtectedClassesCombination.py +2 -0
  43. validmind/tests/data_validation/ProtectedClassesDescription.py +2 -2
  44. validmind/tests/data_validation/ProtectedClassesDisparity.py +9 -5
  45. validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +10 -2
  46. validmind/tests/data_validation/RollingStatsPlot.py +2 -1
  47. validmind/tests/data_validation/ScoreBandDefaultRates.py +4 -2
  48. validmind/tests/data_validation/SeasonalDecompose.py +1 -1
  49. validmind/tests/data_validation/ShapiroWilk.py +2 -2
  50. validmind/tests/data_validation/Skewness.py +7 -6
  51. validmind/tests/data_validation/SpreadPlot.py +1 -1
  52. validmind/tests/data_validation/TabularCategoricalBarPlots.py +1 -1
  53. validmind/tests/data_validation/TabularDateTimeHistograms.py +1 -1
  54. validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
  55. validmind/tests/data_validation/TimeSeriesFrequency.py +1 -1
  56. validmind/tests/data_validation/TimeSeriesOutliers.py +7 -2
  57. validmind/tests/data_validation/WOEBinPlots.py +1 -1
  58. validmind/tests/data_validation/WOEBinTable.py +1 -1
  59. validmind/tests/data_validation/ZivotAndrewsArch.py +5 -2
  60. validmind/tests/data_validation/nlp/CommonWords.py +1 -1
  61. validmind/tests/data_validation/nlp/Hashtags.py +1 -1
  62. validmind/tests/data_validation/nlp/LanguageDetection.py +1 -1
  63. validmind/tests/data_validation/nlp/Mentions.py +1 -1
  64. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +5 -1
  65. validmind/tests/data_validation/nlp/Punctuations.py +1 -1
  66. validmind/tests/data_validation/nlp/Sentiment.py +3 -1
  67. validmind/tests/data_validation/nlp/TextDescription.py +1 -1
  68. validmind/tests/data_validation/nlp/Toxicity.py +1 -1
  69. validmind/tests/decorator.py +14 -11
  70. validmind/tests/load.py +38 -24
  71. validmind/tests/model_validation/BertScore.py +7 -1
  72. validmind/tests/model_validation/BleuScore.py +7 -1
  73. validmind/tests/model_validation/ClusterSizeDistribution.py +3 -1
  74. validmind/tests/model_validation/ContextualRecall.py +9 -1
  75. validmind/tests/model_validation/FeaturesAUC.py +1 -1
  76. validmind/tests/model_validation/MeteorScore.py +7 -1
  77. validmind/tests/model_validation/ModelPredictionResiduals.py +5 -1
  78. validmind/tests/model_validation/RegardScore.py +6 -1
  79. validmind/tests/model_validation/RegressionResidualsPlot.py +10 -1
  80. validmind/tests/model_validation/RougeScore.py +3 -1
  81. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +2 -0
  82. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +10 -2
  83. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -2
  84. validmind/tests/model_validation/TokenDisparity.py +5 -1
  85. validmind/tests/model_validation/ToxicityScore.py +2 -0
  86. validmind/tests/model_validation/embeddings/ClusterDistribution.py +1 -1
  87. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +5 -1
  88. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +5 -1
  89. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +5 -1
  90. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +2 -0
  91. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +5 -1
  92. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -2
  93. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +3 -1
  94. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +4 -1
  95. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -1
  96. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
  97. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +5 -1
  98. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
  99. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +6 -1
  100. validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -3
  101. validmind/tests/model_validation/ragas/AspectCritic.py +4 -1
  102. validmind/tests/model_validation/ragas/ContextEntityRecall.py +5 -3
  103. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -3
  104. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +5 -3
  105. validmind/tests/model_validation/ragas/ContextRecall.py +5 -3
  106. validmind/tests/model_validation/ragas/Faithfulness.py +5 -3
  107. validmind/tests/model_validation/ragas/NoiseSensitivity.py +1 -1
  108. validmind/tests/model_validation/ragas/ResponseRelevancy.py +5 -3
  109. validmind/tests/model_validation/ragas/SemanticSimilarity.py +5 -3
  110. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +9 -9
  111. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +9 -9
  112. validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -2
  113. validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py +28 -5
  114. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
  115. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +24 -14
  116. validmind/tests/model_validation/sklearn/CompletenessScore.py +8 -9
  117. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -3
  118. validmind/tests/model_validation/sklearn/FeatureImportance.py +6 -2
  119. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -9
  120. validmind/tests/model_validation/sklearn/HomogeneityScore.py +14 -9
  121. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +4 -2
  122. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +6 -1
  123. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +12 -7
  124. validmind/tests/model_validation/sklearn/MinimumF1Score.py +12 -7
  125. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +21 -6
  126. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +11 -3
  127. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +5 -1
  128. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +5 -1
  129. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +6 -1
  130. validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
  131. validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -2
  132. validmind/tests/model_validation/sklearn/RegressionPerformance.py +13 -8
  133. validmind/tests/model_validation/sklearn/RegressionR2Square.py +8 -5
  134. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +5 -1
  135. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +34 -26
  136. validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +10 -2
  137. validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -1
  138. validmind/tests/model_validation/sklearn/VMeasure.py +12 -9
  139. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +15 -10
  140. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +5 -1
  141. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
  142. validmind/tests/model_validation/statsmodels/GINITable.py +8 -1
  143. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +2 -2
  144. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
  145. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +8 -2
  146. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +3 -1
  147. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +7 -2
  148. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -0
  149. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +2 -0
  150. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +4 -2
  151. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
  152. validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +11 -1
  153. validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +10 -2
  154. validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +8 -1
  155. validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +18 -2
  156. validmind/tests/ongoing_monitoring/FeatureDrift.py +9 -2
  157. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +8 -2
  158. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +13 -2
  159. validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +13 -2
  160. validmind/tests/ongoing_monitoring/ROCCurveDrift.py +16 -2
  161. validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +11 -2
  162. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +13 -2
  163. validmind/tests/output.py +66 -11
  164. validmind/tests/prompt_validation/Clarity.py +1 -1
  165. validmind/tests/prompt_validation/NegativeInstruction.py +1 -1
  166. validmind/tests/prompt_validation/Robustness.py +6 -1
  167. validmind/tests/prompt_validation/Specificity.py +1 -1
  168. validmind/tests/run.py +28 -14
  169. validmind/tests/test_providers.py +28 -35
  170. validmind/tests/utils.py +17 -4
  171. validmind/unit_metrics/__init__.py +1 -1
  172. validmind/utils.py +295 -31
  173. validmind/vm_models/dataset/dataset.py +19 -16
  174. validmind/vm_models/dataset/utils.py +5 -3
  175. validmind/vm_models/figure.py +6 -6
  176. validmind/vm_models/input.py +6 -5
  177. validmind/vm_models/model.py +5 -5
  178. validmind/vm_models/result/result.py +122 -43
  179. validmind/vm_models/result/utils.py +9 -28
  180. validmind/vm_models/test_suite/__init__.py +5 -0
  181. validmind/vm_models/test_suite/runner.py +5 -5
  182. validmind/vm_models/test_suite/summary.py +20 -2
  183. validmind/vm_models/test_suite/test.py +6 -6
  184. validmind/vm_models/test_suite/test_suite.py +10 -10
  185. {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/METADATA +4 -5
  186. {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/RECORD +189 -188
  187. {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/WHEEL +1 -1
  188. {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/LICENSE +0 -0
  189. {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/entry_points.txt +0 -0
@@ -173,4 +173,4 @@ def TextDescription(
173
173
  )
174
174
  )
175
175
 
176
- return (*figures, RawData(metrics_dataframe=metrics_df))
176
+ return (*figures, RawData(metrics_dataframe=metrics_df, dataset=dataset.input_id))
@@ -73,4 +73,4 @@ def Toxicity(dataset):
73
73
 
74
74
  plt.close()
75
75
 
76
- return fig, RawData(toxicity_scores=toxicity_scores)
76
+ return fig, RawData(toxicity_scores=toxicity_scores, dataset=dataset.input_id)
@@ -7,6 +7,7 @@
7
7
  import inspect
8
8
  import os
9
9
  from functools import wraps
10
+ from typing import Any, Callable, List, Optional, TypeVar, Union
10
11
 
11
12
  from validmind.logging import get_logger
12
13
 
@@ -15,8 +16,10 @@ from .load import load_test
15
16
 
16
17
  logger = get_logger(__name__)
17
18
 
19
+ F = TypeVar("F", bound=Callable[..., Any])
18
20
 
19
- def _get_save_func(func, test_id):
21
+
22
+ def _get_save_func(func: Callable[..., Any], test_id: str) -> Callable[..., None]:
20
23
  """Helper function to save a decorated function to a file
21
24
 
22
25
  Useful when a custom test function has been created inline in a notebook or
@@ -29,7 +32,7 @@ def _get_save_func(func, test_id):
29
32
  # remove decorator line
30
33
  source = source.split("\n", 1)[1]
31
34
 
32
- def save(root_folder=".", imports=None):
35
+ def save(root_folder: str = ".", imports: Optional[List[str]] = None) -> None:
33
36
  parts = test_id.split(".")
34
37
 
35
38
  if len(parts) > 1:
@@ -84,7 +87,7 @@ def _get_save_func(func, test_id):
84
87
  return save
85
88
 
86
89
 
87
- def test(func_or_id):
90
+ def test(func_or_id: Union[Callable[..., Any], str, None]) -> Callable[[F], F]:
88
91
  """Decorator for creating and registering custom tests
89
92
 
90
93
  This decorator registers the function it wraps as a test function within ValidMind
@@ -109,14 +112,14 @@ def test(func_or_id):
109
112
  as the metric's description.
110
113
 
111
114
  Args:
112
- func: The function to decorate
113
- test_id: The identifier for the metric. If not provided, the function name is used.
115
+ func_or_id (Union[Callable[..., Any], str, None]): Either the function to decorate
116
+ or the test ID. If None, the function name is used.
114
117
 
115
118
  Returns:
116
- The decorated function.
119
+ Callable[[F], F]: The decorated function.
117
120
  """
118
121
 
119
- def decorator(func):
122
+ def decorator(func: F) -> F:
120
123
  test_id = func_or_id or f"validmind.custom_metrics.{func.__name__}"
121
124
  test_func = load_test(test_id, func, reload=True)
122
125
  test_store.register_test(test_id, test_func)
@@ -136,28 +139,28 @@ def test(func_or_id):
136
139
  return decorator
137
140
 
138
141
 
139
- def tasks(*tasks):
142
+ def tasks(*tasks: str) -> Callable[[F], F]:
140
143
  """Decorator for specifying the task types that a test is designed for.
141
144
 
142
145
  Args:
143
146
  *tasks: The task types that the test is designed for.
144
147
  """
145
148
 
146
- def decorator(func):
149
+ def decorator(func: F) -> F:
147
150
  func.__tasks__ = list(tasks)
148
151
  return func
149
152
 
150
153
  return decorator
151
154
 
152
155
 
153
- def tags(*tags):
156
+ def tags(*tags: str) -> Callable[[F], F]:
154
157
  """Decorator for specifying tags for a test.
155
158
 
156
159
  Args:
157
160
  *tags: The tags to apply to the test.
158
161
  """
159
162
 
160
- def decorator(func):
163
+ def decorator(func: F) -> F:
161
164
  func.__tags__ = list(tags)
162
165
  return func
163
166
 
validmind/tests/load.py CHANGED
@@ -7,7 +7,7 @@
7
7
  import inspect
8
8
  import json
9
9
  from pprint import pformat
10
- from typing import List
10
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
11
11
  from uuid import uuid4
12
12
 
13
13
  import pandas as pd
@@ -32,7 +32,10 @@ INPUT_TYPE_MAP = {
32
32
  }
33
33
 
34
34
 
35
- def _inspect_signature(test_func: callable):
35
+ def _inspect_signature(
36
+ test_func: Callable[..., Any],
37
+ ) -> Tuple[Dict[str, Dict[str, Any]], Dict[str, Dict[str, Any]]]:
38
+ """Inspect a test function's signature to get inputs and parameters"""
36
39
  inputs = {}
37
40
  params = {}
38
41
 
@@ -56,7 +59,9 @@ def _inspect_signature(test_func: callable):
56
59
  return inputs, params
57
60
 
58
61
 
59
- def load_test(test_id: str, test_func: callable = None, reload: bool = False):
62
+ def load_test(
63
+ test_id: str, test_func: Optional[Callable[..., Any]] = None, reload: bool = False
64
+ ) -> Callable[..., Any]:
60
65
  """Load a test by test ID
61
66
 
62
67
  Test IDs are in the format `namespace.path_to_module.TestClassOrFuncName[:tag]`.
@@ -67,6 +72,8 @@ def load_test(test_id: str, test_func: callable = None, reload: bool = False):
67
72
  test_id (str): The test ID in the format `namespace.path_to_module.TestName[:tag]`
68
73
  test_func (callable, optional): The test function to load. If not provided, the
69
74
  test will be loaded from the test provider. Defaults to None.
75
+ reload (bool, optional): If True, reload the test even if it's already loaded.
76
+ Defaults to False.
70
77
  """
71
78
  # remove tag if present
72
79
  test_id = test_id.split(":", 1)[0]
@@ -109,7 +116,8 @@ def load_test(test_id: str, test_func: callable = None, reload: bool = False):
109
116
  return test_store.get_test(test_id)
110
117
 
111
118
 
112
- def _list_test_ids():
119
+ def _list_test_ids() -> List[str]:
120
+ """List all available test IDs"""
113
121
  test_ids = []
114
122
 
115
123
  for namespace, test_provider in test_provider_store.test_providers.items():
@@ -120,7 +128,7 @@ def _list_test_ids():
120
128
  return test_ids
121
129
 
122
130
 
123
- def _load_tests(test_ids):
131
+ def _load_tests(test_ids: List[str]) -> Dict[str, Callable[..., Any]]:
124
132
  """Load a set of tests, handling missing dependencies."""
125
133
  tests = {}
126
134
 
@@ -138,12 +146,12 @@ def _load_tests(test_ids):
138
146
  logger.debug(str(e))
139
147
 
140
148
  if e.extra:
141
- logger.info(
149
+ logger.debug(
142
150
  f"Skipping `{test_id}` as it requires extra dependencies: {e.required_dependencies}."
143
151
  f" Please run `pip install validmind[{e.extra}]` to view and run this test."
144
152
  )
145
153
  else:
146
- logger.info(
154
+ logger.debug(
147
155
  f"Skipping `{test_id}` as it requires missing dependencies: {e.required_dependencies}."
148
156
  " Please install the missing dependencies to view and run this test."
149
157
  )
@@ -151,7 +159,8 @@ def _load_tests(test_ids):
151
159
  return tests
152
160
 
153
161
 
154
- def _test_description(test_description: str, num_lines: int = 5):
162
+ def _test_description(test_description: str, num_lines: int = 5) -> str:
163
+ """Format a test description"""
155
164
  description = test_description.strip("\n").strip()
156
165
 
157
166
  if len(description.split("\n")) > num_lines:
@@ -160,7 +169,10 @@ def _test_description(test_description: str, num_lines: int = 5):
160
169
  return description
161
170
 
162
171
 
163
- def _pretty_list_tests(tests, truncate=True):
172
+ def _pretty_list_tests(
173
+ tests: Dict[str, Callable[..., Any]], truncate: bool = True
174
+ ) -> None:
175
+ """Pretty print a list of tests"""
164
176
  table = [
165
177
  {
166
178
  "ID": test_id,
@@ -171,6 +183,8 @@ def _pretty_list_tests(tests, truncate=True):
171
183
  ),
172
184
  "Required Inputs": list(test.inputs.keys()),
173
185
  "Params": test.params,
186
+ "Tags": test.__tags__,
187
+ "Tasks": test.__tasks__,
174
188
  }
175
189
  for test_id, test in tests.items()
176
190
  ]
@@ -178,10 +192,8 @@ def _pretty_list_tests(tests, truncate=True):
178
192
  return format_dataframe(pd.DataFrame(table))
179
193
 
180
194
 
181
- def list_tags():
182
- """
183
- List unique tags from all test classes.
184
- """
195
+ def list_tags() -> List[str]:
196
+ """List all unique available tags"""
185
197
 
186
198
  unique_tags = set()
187
199
 
@@ -191,7 +203,7 @@ def list_tags():
191
203
  return list(unique_tags)
192
204
 
193
205
 
194
- def list_tasks_and_tags(as_json=False):
206
+ def list_tasks_and_tags(as_json: bool = False) -> Union[str, Dict[str, List[str]]]:
195
207
  """
196
208
  List all task types and their associated tags, with one row per task type and
197
209
  all tags for a task type in one row.
@@ -218,11 +230,8 @@ def list_tasks_and_tags(as_json=False):
218
230
  )
219
231
 
220
232
 
221
- def list_tasks():
222
- """
223
- List unique tasks from all test classes.
224
- """
225
-
233
+ def list_tasks() -> List[str]:
234
+ """List all unique available tasks"""
226
235
  unique_tasks = set()
227
236
 
228
237
  for test in _load_tests(list_tests(pretty=False)).values():
@@ -231,7 +240,13 @@ def list_tasks():
231
240
  return list(unique_tasks)
232
241
 
233
242
 
234
- def list_tests(filter=None, task=None, tags=None, pretty=True, truncate=True):
243
+ def list_tests(
244
+ filter: Optional[str] = None,
245
+ task: Optional[str] = None,
246
+ tags: Optional[List[str]] = None,
247
+ pretty: bool = True,
248
+ truncate: bool = True,
249
+ ) -> Union[List[str], None]:
235
250
  """List all tests in the tests directory.
236
251
 
237
252
  Args:
@@ -245,9 +260,6 @@ def list_tests(filter=None, task=None, tags=None, pretty=True, truncate=True):
245
260
  formatted table. Defaults to True.
246
261
  truncate (bool, optional): If True, truncates the test description to the first
247
262
  line. Defaults to True. (only used if pretty=True)
248
-
249
- Returns:
250
- list or pandas.DataFrame: A list of all tests or a formatted table.
251
263
  """
252
264
  test_ids = _list_test_ids()
253
265
 
@@ -286,7 +298,9 @@ def list_tests(filter=None, task=None, tags=None, pretty=True, truncate=True):
286
298
  return _pretty_list_tests(tests, truncate=truncate)
287
299
 
288
300
 
289
- def describe_test(test_id: TestID = None, raw: bool = False, show: bool = True):
301
+ def describe_test(
302
+ test_id: Optional[TestID] = None, raw: bool = False, show: bool = True
303
+ ) -> Union[str, HTML, Dict[str, Any]]:
290
304
  """Get or show details about the test
291
305
 
292
306
  This function can be used to see test details including the test name, description,
@@ -131,4 +131,10 @@ def BertScore(
131
131
  # Create a DataFrame from all collected statistics
132
132
  result_df = pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"})
133
133
 
134
- return (result_df, *figures, RawData(bert_scores_df=metrics_df))
134
+ return (
135
+ result_df,
136
+ *figures,
137
+ RawData(
138
+ bert_scores_df=metrics_df, model=model.input_id, dataset=dataset.input_id
139
+ ),
140
+ )
@@ -114,4 +114,10 @@ def BleuScore(dataset, model):
114
114
  # Create a DataFrame from all collected statistics
115
115
  result_df = pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"})
116
116
 
117
- return (result_df, *figures, RawData(bleu_scores_df=metrics_df))
117
+ return (
118
+ result_df,
119
+ *figures,
120
+ RawData(
121
+ bleu_scores_df=metrics_df, model=model.input_id, dataset=dataset.input_id
122
+ ),
123
+ )
@@ -72,4 +72,6 @@ def ClusterSizeDistribution(dataset: VMDataset, model: VMModel):
72
72
  fig.update_yaxes(title_text="Counts", showgrid=False)
73
73
  fig.update_layout(title_text="Cluster distribution", title_x=0.5, barmode="group")
74
74
 
75
- return fig, RawData(cluster_counts=df_counts)
75
+ return fig, RawData(
76
+ cluster_counts=df_counts, model=model.input_id, dataset=dataset.input_id
77
+ )
@@ -118,4 +118,12 @@ def ContextualRecall(dataset, model):
118
118
  # Create a DataFrame from all collected statistics
119
119
  result_df = pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"})
120
120
 
121
- return (result_df, *figures, RawData(contextual_recall_scores=metrics_df))
121
+ return (
122
+ result_df,
123
+ *figures,
124
+ RawData(
125
+ contextual_recall_scores=metrics_df,
126
+ model=model.input_id,
127
+ dataset=dataset.input_id,
128
+ ),
129
+ )
@@ -95,4 +95,4 @@ def FeaturesAUC(dataset: VMDataset, fontsize: int = 12, figure_height: int = 500
95
95
  height=figure_height,
96
96
  )
97
97
 
98
- return fig, RawData(feature_aucs=aucs)
98
+ return fig, RawData(feature_aucs=aucs, dataset=dataset.input_id)
@@ -117,4 +117,10 @@ def MeteorScore(dataset, model):
117
117
  # Create a DataFrame from all collected statistics
118
118
  result_df = pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"})
119
119
 
120
- return (result_df, *figures, RawData(meteor_scores=metrics_df))
120
+ return (
121
+ result_df,
122
+ *figures,
123
+ RawData(
124
+ meteor_scores=metrics_df, model=model.input_id, dataset=dataset.input_id
125
+ ),
126
+ )
@@ -102,4 +102,8 @@ def ModelPredictionResiduals(
102
102
  # Create a summary DataFrame for the KS normality test results
103
103
  summary_df = pd.DataFrame([summary])
104
104
 
105
- return (summary_df, *figures, RawData(residuals=residuals))
105
+ return (
106
+ summary_df,
107
+ *figures,
108
+ RawData(residuals=residuals, model=model.input_id, dataset=dataset.input_id),
109
+ )
@@ -145,5 +145,10 @@ def RegardScore(dataset, model):
145
145
  return (
146
146
  result_df,
147
147
  *figures,
148
- RawData(true_regard=true_df, pred_regard=pred_df),
148
+ RawData(
149
+ true_regard=true_df,
150
+ pred_regard=pred_df,
151
+ model=model.input_id,
152
+ dataset=dataset.input_id,
153
+ ),
149
154
  )
@@ -105,4 +105,13 @@ def RegressionResidualsPlot(model: VMModel, dataset: VMDataset, bin_size: float
105
105
  )
106
106
  )
107
107
 
108
- return (*figures, RawData(residuals=residuals, y_true=y_true, y_pred=y_pred))
108
+ return (
109
+ *figures,
110
+ RawData(
111
+ residuals=residuals,
112
+ y_true=y_true,
113
+ y_pred=y_pred,
114
+ model=model.input_id,
115
+ dataset=dataset.input_id,
116
+ ),
117
+ )
@@ -121,5 +121,7 @@ def RougeScore(dataset, model, metric="rouge-1"):
121
121
  return (
122
122
  pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"}),
123
123
  *figures,
124
- RawData(rouge_scores_df=df_scores),
124
+ RawData(
125
+ rouge_scores_df=df_scores, model=model.input_id, dataset=dataset.input_id
126
+ ),
125
127
  )
@@ -152,5 +152,7 @@ def TimeSeriesPredictionWithCI(dataset, model, confidence=0.95):
152
152
  z_score=z_score,
153
153
  lower_confidence=lower_conf,
154
154
  upper_confidence=upper_conf,
155
+ model=model.input_id,
156
+ dataset=dataset.input_id,
155
157
  ),
156
158
  )
@@ -4,7 +4,7 @@
4
4
 
5
5
  import plotly.graph_objects as go
6
6
 
7
- from validmind import tags, tasks
7
+ from validmind import RawData, tags, tasks
8
8
 
9
9
 
10
10
  @tags("model_predictions", "visualization")
@@ -70,4 +70,12 @@ def TimeSeriesPredictionsPlot(dataset, model):
70
70
  template="plotly_white",
71
71
  )
72
72
 
73
- return fig
73
+ raw_data = RawData(
74
+ time_index=time_index,
75
+ actual_values=dataset.y,
76
+ predicted_values=y_pred,
77
+ model=model.input_id,
78
+ dataset=dataset.input_id,
79
+ )
80
+
81
+ return fig, raw_data
@@ -7,7 +7,7 @@ import pandas as pd
7
7
  import plotly.express as px
8
8
  from sklearn import metrics
9
9
 
10
- from validmind import tags, tasks
10
+ from validmind import RawData, tags, tasks
11
11
 
12
12
 
13
13
  @tags("model_performance", "sklearn")
@@ -105,4 +105,8 @@ def TimeSeriesR2SquareBySegments(dataset, model, segments=None):
105
105
  },
106
106
  )
107
107
 
108
- return fig, results_df
108
+ return (
109
+ fig,
110
+ results_df,
111
+ RawData(summary=results_df, model=model.input_id, dataset=dataset.input_id),
112
+ )
@@ -108,4 +108,8 @@ def TokenDisparity(dataset, model):
108
108
  # Create a DataFrame from all collected statistics
109
109
  result_df = pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"})
110
110
 
111
- return (result_df, *figures, RawData(token_counts_df=df))
111
+ return (
112
+ result_df,
113
+ *figures,
114
+ RawData(token_counts_df=df, model=model.input_id, dataset=dataset.input_id),
115
+ )
@@ -146,5 +146,7 @@ def ToxicityScore(dataset, model):
146
146
  input_toxicity_df=input_df,
147
147
  true_toxicity_df=true_df,
148
148
  pred_toxicity_df=pred_df,
149
+ model=model.input_id,
150
+ dataset=dataset.input_id,
149
151
  ),
150
152
  )
@@ -62,4 +62,4 @@ def ClusterDistribution(model: VMModel, dataset: VMDataset, num_clusters: int =
62
62
  title="Embeddings Cluster Distribution",
63
63
  )
64
64
 
65
- return fig, RawData(labels=labels)
65
+ return fig, RawData(labels=labels, model=model.input_id, dataset=dataset.input_id)
@@ -113,5 +113,9 @@ def CosineSimilarityComparison(dataset, models):
113
113
  return (
114
114
  *figures,
115
115
  stats_df,
116
- RawData(similarity_matrices=pd.DataFrame(similarity_matrices)),
116
+ RawData(
117
+ similarity_matrices=pd.DataFrame(similarity_matrices),
118
+ dataset=dataset.input_id,
119
+ models=[model.input_id for model in models],
120
+ ),
117
121
  )
@@ -59,4 +59,8 @@ def CosineSimilarityDistribution(dataset: VMDataset, model: VMModel):
59
59
  nbins=100,
60
60
  title="Cosine Similarity Distribution",
61
61
  labels={"x": "Cosine Similarity"},
62
- ), RawData(similarity_scores=similarity_scores)
62
+ ), RawData(
63
+ similarity_scores=similarity_scores,
64
+ model=model.input_id,
65
+ dataset=dataset.input_id,
66
+ )
@@ -81,4 +81,8 @@ def CosineSimilarityHeatmap(
81
81
  yaxis_title=yaxis_title,
82
82
  )
83
83
 
84
- return fig, RawData(similarity_matrix=similarity_matrix)
84
+ return fig, RawData(
85
+ similarity_matrix=similarity_matrix,
86
+ model=model.input_id,
87
+ dataset=dataset.input_id,
88
+ )
@@ -77,5 +77,7 @@ def DescriptiveAnalytics(dataset: VMDataset, model: VMModel):
77
77
  embedding_means=embedding_means,
78
78
  embedding_medians=embedding_medians,
79
79
  embedding_stds=embedding_stds,
80
+ model=model.input_id,
81
+ dataset=dataset.input_id,
80
82
  ),
81
83
  )
@@ -89,4 +89,8 @@ def EmbeddingsVisualization2D(
89
89
  fig = px.scatter(**scatter_kwargs)
90
90
  fig.update_layout(width=500, height=500)
91
91
 
92
- return fig, RawData(tsne_embeddings=reduced_embeddings)
92
+ return fig, RawData(
93
+ tsne_embeddings=reduced_embeddings,
94
+ model=model.input_id,
95
+ dataset=dataset.input_id,
96
+ )
@@ -57,7 +57,7 @@ def EuclideanDistanceComparison(dataset, models):
57
57
  figures = []
58
58
  all_stats = []
59
59
 
60
- distance_matrices = {}
60
+ distance_matrices = []
61
61
 
62
62
  # Generate all pairs of models for comparison
63
63
  for model_A, model_B in combinations(models, 2):
@@ -105,6 +105,10 @@ def EuclideanDistanceComparison(dataset, models):
105
105
  stats_df = pd.DataFrame(all_stats)
106
106
 
107
107
  # Add raw data to return
108
- raw_data = RawData(distance_matrices=pd.DataFrame(distance_matrices))
108
+ raw_data = RawData(
109
+ distance_matrices=pd.DataFrame(distance_matrices),
110
+ dataset=dataset.input_id,
111
+ models=[model.input_id for model in models],
112
+ )
109
113
 
110
114
  return (stats_df, *figures, raw_data)
@@ -79,4 +79,6 @@ def EuclideanDistanceHeatmap(
79
79
  yaxis_title=yaxis_title,
80
80
  )
81
81
 
82
- return fig, RawData(distance_matrix=distance_matrix)
82
+ return fig, RawData(
83
+ distance_matrix=distance_matrix, model=model.input_id, dataset=dataset.input_id
84
+ )
@@ -90,4 +90,7 @@ def PCAComponentsPairwisePlots(dataset, model, n_components=3):
90
90
  )
91
91
  figures.append(fig)
92
92
 
93
- return (*figures, RawData(pca_results=pca_df))
93
+ return (
94
+ *figures,
95
+ RawData(pca_results=pca_df, model=model.input_id, dataset=dataset.input_id),
96
+ )
@@ -97,4 +97,8 @@ def StabilityAnalysisKeyword(
97
97
  mean_similarity_threshold,
98
98
  )
99
99
 
100
- return results, RawData(original_perturbed_similarity=raw_data)
100
+ return results, RawData(
101
+ original_perturbed_similarity=raw_data,
102
+ model=model.input_id,
103
+ dataset=dataset.input_id,
104
+ )
@@ -151,4 +151,8 @@ def StabilityAnalysisRandomNoise(
151
151
  mean_similarity_threshold,
152
152
  )
153
153
 
154
- return *result, RawData(original_perturbed_similarity=raw_data)
154
+ return *result, RawData(
155
+ original_perturbed_similarity=raw_data,
156
+ model=model.input_id,
157
+ dataset=dataset.input_id,
158
+ )
@@ -107,4 +107,8 @@ def StabilityAnalysisSynonyms(
107
107
  mean_similarity_threshold,
108
108
  )
109
109
 
110
- return *result, RawData(original_perturbed_similarity=raw_data)
110
+ return *result, RawData(
111
+ original_perturbed_similarity=raw_data,
112
+ model=model.input_id,
113
+ dataset=dataset.input_id,
114
+ )
@@ -134,4 +134,8 @@ def StabilityAnalysisTranslation(
134
134
  mean_similarity_threshold,
135
135
  )
136
136
 
137
- return *result, RawData(original_perturbed_similarity=raw_data)
137
+ return *result, RawData(
138
+ original_perturbed_similarity=raw_data,
139
+ model=model.input_id,
140
+ dataset=dataset.input_id,
141
+ )
@@ -110,5 +110,10 @@ def TSNEComponentsPairwisePlots(
110
110
 
111
111
  return (
112
112
  *figures,
113
- RawData(embeddings_scaled=embeddings_scaled, tsne_results=tsne_results),
113
+ RawData(
114
+ embeddings_scaled=embeddings_scaled,
115
+ tsne_results=tsne_results,
116
+ model=model.input_id,
117
+ dataset=dataset.input_id,
118
+ ),
114
119
  )
@@ -123,8 +123,10 @@ def AnswerCorrectness(
123
123
 
124
124
  score_column = "answer_correctness"
125
125
 
126
- fig_histogram = px.histogram(x=result_df[score_column].to_list(), nbins=10)
127
- fig_box = px.box(x=result_df[score_column].to_list())
126
+ fig_histogram = px.histogram(
127
+ x=result_df[score_column].to_list(), nbins=10, title="Answer Correctness"
128
+ )
129
+ fig_box = px.box(x=result_df[score_column].to_list(), title="Answer Correctness")
128
130
 
129
131
  return (
130
132
  {
@@ -144,5 +146,5 @@ def AnswerCorrectness(
144
146
  },
145
147
  fig_histogram,
146
148
  fig_box,
147
- RawData(evaluation_results=result_df),
149
+ RawData(evaluation_results=result_df, dataset=dataset.input_id),
148
150
  )
@@ -195,5 +195,8 @@ def AspectCritic(
195
195
  ]
196
196
  },
197
197
  fig,
198
- RawData(evaluation_results=result_df),
198
+ RawData(
199
+ evaluation_results=result_df,
200
+ dataset=dataset.input_id,
201
+ ),
199
202
  )