validmind 2.5.24__py3-none-any.whl → 2.6.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. validmind/__init__.py +8 -17
  2. validmind/__version__.py +1 -1
  3. validmind/ai/test_descriptions.py +66 -85
  4. validmind/ai/test_result_description/context.py +2 -2
  5. validmind/ai/utils.py +26 -1
  6. validmind/api_client.py +43 -79
  7. validmind/client.py +5 -7
  8. validmind/client_config.py +1 -1
  9. validmind/datasets/__init__.py +1 -1
  10. validmind/datasets/classification/customer_churn.py +7 -5
  11. validmind/datasets/nlp/__init__.py +2 -2
  12. validmind/errors.py +6 -10
  13. validmind/html_templates/content_blocks.py +18 -16
  14. validmind/logging.py +21 -16
  15. validmind/tests/__init__.py +28 -5
  16. validmind/tests/__types__.py +186 -170
  17. validmind/tests/_store.py +7 -21
  18. validmind/tests/comparison.py +362 -0
  19. validmind/tests/data_validation/ACFandPACFPlot.py +44 -73
  20. validmind/tests/data_validation/ADF.py +49 -83
  21. validmind/tests/data_validation/AutoAR.py +59 -96
  22. validmind/tests/data_validation/AutoMA.py +59 -96
  23. validmind/tests/data_validation/AutoStationarity.py +66 -114
  24. validmind/tests/data_validation/ClassImbalance.py +48 -117
  25. validmind/tests/data_validation/DatasetDescription.py +180 -209
  26. validmind/tests/data_validation/DatasetSplit.py +50 -75
  27. validmind/tests/data_validation/DescriptiveStatistics.py +59 -85
  28. validmind/tests/data_validation/{DFGLSArch.py → DickeyFullerGLS.py} +44 -76
  29. validmind/tests/data_validation/Duplicates.py +21 -90
  30. validmind/tests/data_validation/EngleGrangerCoint.py +53 -75
  31. validmind/tests/data_validation/HighCardinality.py +32 -80
  32. validmind/tests/data_validation/HighPearsonCorrelation.py +29 -97
  33. validmind/tests/data_validation/IQROutliersBarPlot.py +63 -94
  34. validmind/tests/data_validation/IQROutliersTable.py +40 -80
  35. validmind/tests/data_validation/IsolationForestOutliers.py +41 -63
  36. validmind/tests/data_validation/KPSS.py +33 -81
  37. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +47 -95
  38. validmind/tests/data_validation/MissingValues.py +17 -58
  39. validmind/tests/data_validation/MissingValuesBarPlot.py +61 -87
  40. validmind/tests/data_validation/PhillipsPerronArch.py +56 -79
  41. validmind/tests/data_validation/RollingStatsPlot.py +50 -81
  42. validmind/tests/data_validation/SeasonalDecompose.py +102 -184
  43. validmind/tests/data_validation/Skewness.py +27 -64
  44. validmind/tests/data_validation/SpreadPlot.py +34 -57
  45. validmind/tests/data_validation/TabularCategoricalBarPlots.py +46 -65
  46. validmind/tests/data_validation/TabularDateTimeHistograms.py +23 -45
  47. validmind/tests/data_validation/TabularNumericalHistograms.py +27 -46
  48. validmind/tests/data_validation/TargetRateBarPlots.py +54 -93
  49. validmind/tests/data_validation/TimeSeriesFrequency.py +48 -133
  50. validmind/tests/data_validation/TimeSeriesHistogram.py +24 -3
  51. validmind/tests/data_validation/TimeSeriesLinePlot.py +29 -47
  52. validmind/tests/data_validation/TimeSeriesMissingValues.py +59 -135
  53. validmind/tests/data_validation/TimeSeriesOutliers.py +54 -171
  54. validmind/tests/data_validation/TooManyZeroValues.py +21 -70
  55. validmind/tests/data_validation/UniqueRows.py +23 -62
  56. validmind/tests/data_validation/WOEBinPlots.py +83 -109
  57. validmind/tests/data_validation/WOEBinTable.py +28 -69
  58. validmind/tests/data_validation/ZivotAndrewsArch.py +33 -75
  59. validmind/tests/data_validation/nlp/CommonWords.py +49 -57
  60. validmind/tests/data_validation/nlp/Hashtags.py +27 -49
  61. validmind/tests/data_validation/nlp/LanguageDetection.py +7 -13
  62. validmind/tests/data_validation/nlp/Mentions.py +32 -63
  63. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +89 -14
  64. validmind/tests/data_validation/nlp/Punctuations.py +63 -47
  65. validmind/tests/data_validation/nlp/Sentiment.py +4 -0
  66. validmind/tests/data_validation/nlp/StopWords.py +62 -91
  67. validmind/tests/data_validation/nlp/TextDescription.py +116 -159
  68. validmind/tests/data_validation/nlp/Toxicity.py +12 -4
  69. validmind/tests/decorator.py +33 -242
  70. validmind/tests/load.py +212 -153
  71. validmind/tests/model_validation/BertScore.py +13 -7
  72. validmind/tests/model_validation/BleuScore.py +4 -0
  73. validmind/tests/model_validation/ClusterSizeDistribution.py +24 -47
  74. validmind/tests/model_validation/ContextualRecall.py +3 -0
  75. validmind/tests/model_validation/FeaturesAUC.py +43 -74
  76. validmind/tests/model_validation/MeteorScore.py +3 -0
  77. validmind/tests/model_validation/RegardScore.py +5 -1
  78. validmind/tests/model_validation/RegressionResidualsPlot.py +54 -75
  79. validmind/tests/model_validation/embeddings/ClusterDistribution.py +10 -33
  80. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +11 -29
  81. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +19 -31
  82. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +40 -49
  83. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +29 -15
  84. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +25 -11
  85. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +28 -13
  86. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +67 -38
  87. validmind/tests/model_validation/embeddings/utils.py +53 -0
  88. validmind/tests/model_validation/ragas/AnswerCorrectness.py +37 -32
  89. validmind/tests/model_validation/ragas/{AspectCritique.py → AspectCritic.py} +33 -27
  90. validmind/tests/model_validation/ragas/ContextEntityRecall.py +44 -41
  91. validmind/tests/model_validation/ragas/ContextPrecision.py +40 -35
  92. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +133 -0
  93. validmind/tests/model_validation/ragas/ContextRecall.py +40 -35
  94. validmind/tests/model_validation/ragas/Faithfulness.py +42 -30
  95. validmind/tests/model_validation/ragas/NoiseSensitivity.py +59 -35
  96. validmind/tests/model_validation/ragas/{AnswerRelevance.py → ResponseRelevancy.py} +52 -41
  97. validmind/tests/model_validation/ragas/{AnswerSimilarity.py → SemanticSimilarity.py} +39 -34
  98. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +13 -16
  99. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +13 -16
  100. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +51 -89
  101. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +31 -61
  102. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +118 -83
  103. validmind/tests/model_validation/sklearn/CompletenessScore.py +13 -16
  104. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +62 -94
  105. validmind/tests/model_validation/sklearn/FeatureImportance.py +7 -8
  106. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -15
  107. validmind/tests/model_validation/sklearn/HomogeneityScore.py +12 -15
  108. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +23 -53
  109. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +60 -74
  110. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +16 -84
  111. validmind/tests/model_validation/sklearn/MinimumF1Score.py +22 -72
  112. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +29 -78
  113. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +52 -82
  114. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +51 -145
  115. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +60 -78
  116. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +130 -172
  117. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +26 -55
  118. validmind/tests/model_validation/sklearn/ROCCurve.py +43 -77
  119. validmind/tests/model_validation/sklearn/RegressionPerformance.py +41 -94
  120. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +47 -136
  121. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +164 -208
  122. validmind/tests/model_validation/sklearn/SilhouettePlot.py +54 -99
  123. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +50 -124
  124. validmind/tests/model_validation/sklearn/VMeasure.py +12 -15
  125. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +225 -281
  126. validmind/tests/model_validation/statsmodels/AutoARIMA.py +40 -45
  127. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +22 -47
  128. validmind/tests/model_validation/statsmodels/Lilliefors.py +17 -28
  129. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +37 -81
  130. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +37 -105
  131. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +62 -166
  132. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +57 -119
  133. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +20 -57
  134. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +47 -80
  135. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +2 -0
  136. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -2
  137. validmind/tests/output.py +120 -0
  138. validmind/tests/prompt_validation/Bias.py +55 -98
  139. validmind/tests/prompt_validation/Clarity.py +56 -99
  140. validmind/tests/prompt_validation/Conciseness.py +63 -101
  141. validmind/tests/prompt_validation/Delimitation.py +48 -89
  142. validmind/tests/prompt_validation/NegativeInstruction.py +62 -96
  143. validmind/tests/prompt_validation/Robustness.py +80 -121
  144. validmind/tests/prompt_validation/Specificity.py +61 -95
  145. validmind/tests/prompt_validation/ai_powered_test.py +2 -2
  146. validmind/tests/run.py +314 -496
  147. validmind/tests/test_providers.py +109 -79
  148. validmind/tests/utils.py +91 -0
  149. validmind/unit_metrics/__init__.py +16 -155
  150. validmind/unit_metrics/classification/F1.py +1 -0
  151. validmind/unit_metrics/classification/Precision.py +1 -0
  152. validmind/unit_metrics/classification/ROC_AUC.py +1 -0
  153. validmind/unit_metrics/classification/Recall.py +1 -0
  154. validmind/unit_metrics/regression/AdjustedRSquaredScore.py +1 -0
  155. validmind/unit_metrics/regression/GiniCoefficient.py +1 -0
  156. validmind/unit_metrics/regression/HuberLoss.py +1 -0
  157. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -0
  158. validmind/unit_metrics/regression/MeanAbsoluteError.py +1 -0
  159. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -0
  160. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -0
  161. validmind/unit_metrics/regression/MeanSquaredError.py +1 -0
  162. validmind/unit_metrics/regression/QuantileLoss.py +1 -0
  163. validmind/unit_metrics/regression/RSquaredScore.py +2 -1
  164. validmind/unit_metrics/regression/RootMeanSquaredError.py +1 -0
  165. validmind/utils.py +66 -17
  166. validmind/vm_models/__init__.py +2 -17
  167. validmind/vm_models/dataset/dataset.py +31 -4
  168. validmind/vm_models/figure.py +7 -37
  169. validmind/vm_models/model.py +3 -0
  170. validmind/vm_models/result/__init__.py +7 -0
  171. validmind/vm_models/result/result.jinja +21 -0
  172. validmind/vm_models/result/result.py +337 -0
  173. validmind/vm_models/result/utils.py +160 -0
  174. validmind/vm_models/test_suite/runner.py +16 -54
  175. validmind/vm_models/test_suite/summary.py +3 -3
  176. validmind/vm_models/test_suite/test.py +43 -77
  177. validmind/vm_models/test_suite/test_suite.py +8 -40
  178. validmind-2.6.7.dist-info/METADATA +137 -0
  179. {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/RECORD +182 -189
  180. validmind/tests/data_validation/AutoSeasonality.py +0 -190
  181. validmind/tests/metadata.py +0 -59
  182. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -176
  183. validmind/tests/model_validation/ragas/ContextUtilization.py +0 -161
  184. validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -80
  185. validmind/unit_metrics/composite.py +0 -238
  186. validmind/vm_models/test/metric.py +0 -98
  187. validmind/vm_models/test/metric_result.py +0 -61
  188. validmind/vm_models/test/output_template.py +0 -55
  189. validmind/vm_models/test/result_summary.py +0 -76
  190. validmind/vm_models/test/result_wrapper.py +0 -488
  191. validmind/vm_models/test/test.py +0 -103
  192. validmind/vm_models/test/threshold_test.py +0 -106
  193. validmind/vm_models/test/threshold_test_result.py +0 -75
  194. validmind/vm_models/test_context.py +0 -259
  195. validmind-2.5.24.dist-info/METADATA +0 -118
  196. {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/LICENSE +0 -0
  197. {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/WHEEL +0 -0
  198. {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,160 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import os
6
+ from typing import TYPE_CHECKING, Dict, List, Union
7
+
8
+ import pandas as pd
9
+ from ipywidgets import HTML, GridBox, Layout
10
+ from jinja2 import Template
11
+
12
+ from ... import api_client
13
+ from ...logging import get_logger
14
+ from ..dataset import VMDataset
15
+ from ..figure import Figure
16
+ from ..input import VMInput
17
+
18
+ if TYPE_CHECKING:
19
+ from .result import ResultTable
20
+
21
+
22
+ AI_REVISION_NAME = "Generated by ValidMind AI"
23
+ DEFAULT_REVISION_NAME = "Default Description"
24
+
25
+ logger = get_logger(__name__)
26
+
27
+ _result_template = None
28
+
29
+
30
+ def get_result_template():
31
+ """Get the jinja html template for rendering test results"""
32
+ global _result_template
33
+
34
+ if _result_template is None:
35
+ with open(os.path.join(os.path.dirname(__file__), "result.jinja")) as f:
36
+ _result_template = Template(f.read())
37
+
38
+ return _result_template
39
+
40
+
41
+ async def update_metadata(content_id: str, text: str, _json: Union[Dict, List] = None):
42
+ """Create or Update a Metadata Object"""
43
+ parts = content_id.split("::")
44
+ content_id = parts[0]
45
+ revision_name = parts[1] if len(parts) > 1 else None
46
+
47
+ # we always want composite metric definitions to be updated
48
+ should_update = content_id.startswith("composite_metric_def:")
49
+
50
+ # if we are updating a metric or test description, we check if the text
51
+ # has changed from the last time it was logged, and only update if it has
52
+ if content_id.split(":", 1)[0] in ["metric_description", "test_description"]:
53
+ try:
54
+ md = await api_client.aget_metadata(content_id)
55
+ # if there is an existing description, only update it if the new one
56
+ # is different and is an AI-generated description
57
+ should_update = (
58
+ md["text"] != text if revision_name == AI_REVISION_NAME else False
59
+ )
60
+ logger.debug(f"Check if description has changed: {should_update}")
61
+ except Exception:
62
+ # if exception, assume its not created yet TODO: don't catch all
63
+ should_update = True
64
+
65
+ if should_update:
66
+ if revision_name:
67
+ content_id = f"{content_id}::{revision_name}"
68
+
69
+ logger.debug(f"Updating metadata for `{content_id}`")
70
+
71
+ await api_client.alog_metadata(content_id, text, _json)
72
+
73
+
74
+ def check_for_sensitive_data(data: pd.DataFrame, inputs: List[VMInput]):
75
+ """Check if a table contains raw data from input datasets"""
76
+ dataset_columns = {
77
+ col: len(input_obj.df)
78
+ for input_obj in inputs
79
+ if isinstance(input_obj, VMDataset)
80
+ for col in input_obj.columns
81
+ }
82
+
83
+ table_columns = {col: len(data) for col in data.columns}
84
+
85
+ offending_columns = [
86
+ col
87
+ for col in table_columns
88
+ if col in dataset_columns and table_columns[col] == dataset_columns[col]
89
+ ]
90
+
91
+ if offending_columns:
92
+ raise ValueError(
93
+ f"Raw input data found in table, pass `unsafe=True` "
94
+ f"or remove the offending columns: {offending_columns}"
95
+ )
96
+
97
+
98
+ def tables_to_widgets(tables: List["ResultTable"]):
99
+ """Convert summary (list of json tables) into a list of ipywidgets"""
100
+ widgets = [
101
+ HTML("<h3>Tables</h3>"),
102
+ ]
103
+
104
+ for table in tables:
105
+ html = ""
106
+ if table.title:
107
+ html += f"<h4>{table.title}</h4>"
108
+
109
+ html += (
110
+ table.data.reset_index(drop=True)
111
+ .style.format(precision=4)
112
+ .hide(axis="index")
113
+ .set_table_styles(
114
+ [
115
+ {
116
+ "selector": "",
117
+ "props": [("width", "100%")],
118
+ },
119
+ {
120
+ "selector": "th",
121
+ "props": [("text-align", "left")],
122
+ },
123
+ {
124
+ "selector": "tbody tr:nth-child(even)",
125
+ "props": [("background-color", "#FFFFFF")],
126
+ },
127
+ {
128
+ "selector": "tbody tr:nth-child(odd)",
129
+ "props": [("background-color", "#F5F5F5")],
130
+ },
131
+ {
132
+ "selector": "td, th",
133
+ "props": [
134
+ ("padding-left", "5px"),
135
+ ("padding-right", "5px"),
136
+ ],
137
+ },
138
+ ]
139
+ )
140
+ .set_properties(**{"text-align": "left"})
141
+ .to_html(escape=False)
142
+ )
143
+
144
+ widgets.append(HTML(html))
145
+
146
+ return widgets
147
+
148
+
149
+ def figures_to_widgets(figures: List[Figure]) -> list:
150
+ """Plot figures to a ipywidgets GridBox"""
151
+ num_columns = 2 if len(figures) > 1 else 1
152
+
153
+ plot_widgets = GridBox(
154
+ [figure.to_widget() for figure in figures],
155
+ layout=Layout(
156
+ grid_template_columns=f"repeat({num_columns}, 1fr)",
157
+ ),
158
+ )
159
+
160
+ return [HTML("<h3>Figures</h3>"), plot_widgets]
@@ -9,7 +9,6 @@ from IPython.display import display
9
9
 
10
10
  from ...logging import get_logger
11
11
  from ...utils import is_notebook, run_async, run_async_check
12
- from ..test_context import TestContext, TestInput
13
12
  from .summary import TestSuiteSummary
14
13
  from .test_suite import TestSuite
15
14
 
@@ -22,8 +21,6 @@ class TestSuiteRunner:
22
21
  """
23
22
 
24
23
  suite: TestSuite = None
25
- context: TestContext = None
26
- input: TestInput = None
27
24
  config: dict = None
28
25
 
29
26
  _test_configs: dict = None
@@ -32,67 +29,33 @@ class TestSuiteRunner:
32
29
  pbar_description: widgets.Label = None
33
30
  pbar_box: widgets.HBox = None
34
31
 
35
- def __init__(self, suite: TestSuite, input: TestInput, config: dict = None):
32
+ def __init__(self, suite: TestSuite, config: dict = None, inputs: dict = None):
36
33
  self.suite = suite
37
- self.input = input
38
34
  self.config = config or {}
39
35
 
40
- self.context = TestContext()
36
+ self._load_config(inputs)
41
37
 
42
- self._load_config()
43
- self._init_tests()
44
-
45
- def _load_config(self):
38
+ def _load_config(self, inputs: dict = None):
46
39
  """Splits the config into a global config and test configs"""
47
- self._test_configs = {}
40
+ self._test_configs = {
41
+ test.test_id: {"inputs": inputs or {}} for test in self.suite.get_tests()
42
+ }
48
43
 
49
44
  for key, value in self.config.items():
50
- test_ids = [test.test_id for test in self.suite.get_tests()]
51
-
52
45
  # If the key does not exist in the test suite, we need to
53
46
  # inform the user the config is probably wrong but we will
54
47
  # keep running all tests
55
- if key not in test_ids:
48
+ if key not in self._test_configs:
56
49
  logger.warning(
57
50
  f"Config key '{key}' does not match a test_id in the template."
58
51
  "\n\tEnsure you registered a content block with the correct content_id in the template"
59
52
  "\n\tThe configuration for this test will be ignored."
60
53
  )
61
- else:
62
- self._test_configs[key] = value
54
+ continue
63
55
 
64
- def _init_tests(self):
65
- """
66
- Loads the tests in a test suite
67
- """
68
- for section in self.suite.sections:
69
- for test in section.tests:
70
- # use local inputs from config if provided
71
- test_configs = self._test_configs.get(test.test_id, {})
72
- inputs = self.input
73
- if (
74
- test.test_id in self.config
75
- and "inputs" in self.config[test.test_id]
76
- ):
77
- inputs = TestInput(self.config[test.test_id]["inputs"])
78
- test_configs = {
79
- key: value
80
- for key, value in test_configs.items()
81
- if key != "inputs"
82
- }
83
- test_configs = test_configs.get("params", {})
84
- else:
85
- if (test_configs) and ("params" not in test_configs):
86
- # [DEPRECATED] This is the old way of setting test parameters
87
- msg = (
88
- "Setting test parameters directly in the 'config' parameter"
89
- " of the run_documentation_tests() method is deprecated. "
90
- "Instead, use the new format of the config: "
91
- 'config = {"test_id": {"params": {...}, "inputs": {...}}}'
92
- )
93
- logger.warning(msg)
94
-
95
- test.load(inputs=inputs, context=self.context, config=test_configs)
56
+ # override the global config (inputs) with the test-specific config
57
+ # TODO: better configuration would make for a better DX
58
+ self._test_configs[key] = value
96
59
 
97
60
  def _start_progress_bar(self, send: bool = True):
98
61
  """
@@ -176,12 +139,11 @@ class TestSuiteRunner:
176
139
 
177
140
  for section in self.suite.sections:
178
141
  for test in section.tests:
179
- if test._test_class is None:
180
- self.pbar.value += 1
181
- continue
182
-
183
- self.pbar_description.value = f"Running {test.test_type}: {test.name}"
184
- test.run(fail_fast=fail_fast)
142
+ self.pbar_description.value = f"Running {test.name}"
143
+ test.run(
144
+ fail_fast=fail_fast,
145
+ config=self._test_configs.get(test.test_id, {}),
146
+ )
185
147
  self.pbar.value += 1
186
148
 
187
149
  if send:
@@ -9,7 +9,7 @@ import ipywidgets as widgets
9
9
 
10
10
  from ...logging import get_logger
11
11
  from ...utils import display, md_to_html
12
- from ..test.result_wrapper import FailedResultWrapper
12
+ from ..result import ErrorResult
13
13
  from .test_suite import TestSuiteSection, TestSuiteTest
14
14
 
15
15
  logger = get_logger(__name__)
@@ -52,7 +52,7 @@ class TestSuiteSectionSummary:
52
52
  children.append(test.result.to_widget())
53
53
  titles.append(
54
54
  f"❌ {test.result.name}: {test.name} ({test.test_id})"
55
- if isinstance(test.result, FailedResultWrapper)
55
+ if isinstance(test.result, ErrorResult)
56
56
  else f"{test.result.name}: {test.name} ({test.test_id})"
57
57
  )
58
58
 
@@ -96,7 +96,7 @@ class TestSuiteSummary:
96
96
  from ...api_client import get_api_host, get_api_model
97
97
 
98
98
  ui_host = get_api_host().replace("/api/v1/tracking", "").replace("api", "app")
99
- link = f"{ui_host}/projects/{get_api_model()}/project-overview"
99
+ link = f"{ui_host}model-inventory/{get_api_model()}"
100
100
  results_link = f"""
101
101
  <h3>
102
102
  Check out the updated documentation in your
@@ -2,14 +2,14 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from ...errors import should_raise_on_fail_fast
5
+ from typing import Any, Dict, Union
6
+
7
+ from ...errors import LoadTestError, should_raise_on_fail_fast
6
8
  from ...logging import get_logger, log_performance
7
- from ...tests import LoadTestError
8
- from ...tests import load_test as load_test_class
9
+ from ...tests.load import load_test
10
+ from ...tests.run import run_test
9
11
  from ...utils import test_id_to_name
10
- from ..test.result_wrapper import FailedResultWrapper, ResultWrapper
11
- from ..test.test import Test
12
- from ..test_context import TestContext, TestInput
12
+ from ..result import ErrorResult, Result, TestResult
13
13
 
14
14
  logger = get_logger(__name__)
15
15
 
@@ -21,12 +21,11 @@ class TestSuiteTest:
21
21
 
22
22
  test_id: str
23
23
  output_template: str = None
24
- name: str = None
25
-
26
- _test_class: Test = None
27
- _test_instance: Test = None
24
+ name: Union[str, None] = None
25
+ description: Union[Dict[str, Any], None] = None
26
+ result: Union[Result, None] = None
28
27
 
29
- result: object = None
28
+ _load_failed: bool = False
30
29
 
31
30
  def __init__(self, test_id_or_obj):
32
31
  """Load the test class from the test id
@@ -42,69 +41,46 @@ class TestSuiteTest:
42
41
 
43
42
  self.name = test_id_to_name(self.test_id)
44
43
 
44
+ def get_default_config(self):
45
+ """Returns the default configuration for the test"""
45
46
  try:
46
- self._test_class = load_test_class(self.test_id)
47
+ test_func = load_test(self.test_id)
47
48
  except LoadTestError as e:
48
- self.result = FailedResultWrapper(
49
- error=e,
50
- message=f"Failed to load test '{self.test_id}'",
51
- result_id=self.test_id,
52
- )
53
- except Exception as e:
54
- # The test suite runner will appropriately ignore this error
55
- # since _test_class is None
56
49
  logger.error(f"Failed to load test '{self.test_id}': {e}")
57
50
 
58
- @property
59
- def test_type(self):
60
- return self._test_class.test_type
61
-
62
- def get_default_params(self):
63
- """Returns the default params for the test"""
64
- if not self._test_class:
65
- return {}
66
-
67
- return self._test_class.default_params
68
-
69
- def load(self, inputs: TestInput, context: TestContext, config: dict = None):
70
- """Load an instance of the test class"""
71
- if not self._test_class:
72
- return
73
-
74
- try:
75
- self._test_instance = self._test_class(
76
- test_id=self.test_id,
77
- context=context,
78
- inputs=inputs,
79
- params=config,
80
- output_template=self.output_template,
81
- )
82
- except Exception as e:
83
- logger.error(
84
- f"Failed to load test '{self.test_id}': "
85
- f"({e.__class__.__name__}) {e}"
86
- )
87
- self.result = FailedResultWrapper(
51
+ self._load_failed = True
52
+ self.result = ErrorResult(
88
53
  error=e,
89
54
  message=f"Failed to load test '{self.name}'",
90
55
  result_id=self.test_id,
91
56
  )
92
57
 
93
- def run(self, fail_fast: bool = False):
58
+ return None
59
+
60
+ config = {
61
+ # we use the input name ('dataset', 'model') as the key and the value
62
+ "inputs": {k: k for k in test_func.inputs},
63
+ "params": {k: v.get("default") for k, v in test_func.params.items()},
64
+ }
65
+
66
+ return config
67
+
68
+ def run(self, fail_fast: bool = False, config: dict = None):
94
69
  """Run the test"""
95
- if not self._test_instance:
96
- # test failed to load and we have already logged the error
70
+ if self._load_failed:
97
71
  return
98
72
 
99
73
  try:
100
- self._test_instance.validate_inputs()
101
-
102
74
  # run the test and log the performance if LOG_LEVEL is set to DEBUG
103
- log_performance(
104
- func=self._test_instance.run,
105
- name=self.test_id,
106
- logger=logger,
107
- )() # this is a decorator so we need to call it
75
+ @log_performance(name=self.test_id, logger=logger)
76
+ def run_test_with_logging():
77
+ return run_test(
78
+ self.test_id,
79
+ **(config or {}),
80
+ show=False,
81
+ )
82
+
83
+ self.result = run_test_with_logging()
108
84
 
109
85
  except Exception as e:
110
86
  if fail_fast and should_raise_on_fail_fast(e):
@@ -113,40 +89,30 @@ class TestSuiteTest:
113
89
  logger.error(
114
90
  f"Failed to run test '{self.test_id}': " f"({e.__class__.__name__}) {e}"
115
91
  )
116
- self.result = FailedResultWrapper(
117
- name=f"Failed {self._test_instance.test_type}",
92
+ self.result = ErrorResult(
118
93
  error=e,
119
94
  message=f"Failed to run '{self.name}'",
120
95
  result_id=self.test_id,
121
96
  )
122
97
 
123
- return
124
-
125
- if self._test_instance.result is None:
126
- self.result = FailedResultWrapper(
127
- name=f"Failed {self._test_instance.test_type}",
98
+ if self.result is None:
99
+ self.result = ErrorResult(
128
100
  error=None,
129
101
  message=f"'{self.name}' did not return a result",
130
102
  result_id=self.test_id,
131
103
  )
132
104
 
133
- return
134
-
135
- if not isinstance(self._test_instance.result, ResultWrapper):
136
- self.result = FailedResultWrapper(
137
- name=f"Failed {self._test_instance.test_type}",
105
+ if not isinstance(self.result, Result):
106
+ self.result = ErrorResult(
138
107
  error=None,
139
108
  message=f"{self.name} returned an invalid result: {self._test_instance.result}",
140
109
  result_id=self.test_id,
141
110
  )
142
111
 
143
- return
144
-
145
- self.result = self._test_instance.result
146
-
147
112
  async def log_async(self):
148
113
  """Log the result for this test to ValidMind"""
149
114
  if not self.result:
150
115
  raise ValueError("Cannot log test result before running the test")
151
116
 
152
- await self.result.log_async()
117
+ if isinstance(self.result, TestResult):
118
+ return await self.result.log_async()
@@ -48,48 +48,16 @@ class TestSuiteSection:
48
48
  section_id: str = None
49
49
  description: Optional[str] = None
50
50
 
51
- def get_required_inputs_for_test(self, test: TestSuiteTest) -> List[str]:
52
- """
53
- Returns the required inputs for a specific test. Returns an input
54
- dictionary that can be passed directly to run_test() or run_documentation_test()
55
-
56
- Args:
57
- test (TestSuiteTest): The test to get the required inputs for
58
-
59
- Returns:
60
- dict: A dictionary of required inputs
61
- """
62
- test_class = test._test_class
63
- inputs_dict = {}
64
- if (
65
- not hasattr(test_class, "required_inputs")
66
- or test_class.required_inputs is None
67
- ):
68
- return inputs_dict
69
-
70
- for input_name in test_class.required_inputs:
71
- # This required input is not valid but the behavior in this function
72
- # is consistent with required_inputs as defined in the test class so
73
- # we will ignore it for now
74
- #
75
- # if input_name == "model.train_ds" or input_name == "model.test_ds":
76
- # continue
77
-
78
- # Assign None to the input to indicate that it is required
79
- inputs_dict[input_name] = None
80
-
81
- return inputs_dict
82
-
83
51
  def get_default_config(self):
84
52
  """Returns the default configuration for the test suite section"""
85
53
  # TODO: configuration across sections/tests needs more work
86
54
  section_default_config = {}
87
55
 
88
56
  for test in self.tests:
89
- section_default_config[test.test_id] = {
90
- "inputs": self.get_required_inputs_for_test(test),
91
- "params": test.get_default_params() or {},
92
- }
57
+ default_config = test.get_default_config()
58
+
59
+ if default_config:
60
+ section_default_config[test.test_id] = default_config
93
61
 
94
62
  return section_default_config
95
63
 
@@ -162,13 +130,13 @@ class TestSuite:
162
130
  return self.suite_id.title().replace("_", " ")
163
131
 
164
132
  def get_tests(self) -> List[str]:
165
- """Get all test IDs from all sections"""
166
- test_ids = []
133
+ """Get all test suite test objects from all sections"""
134
+ tests = []
167
135
 
168
136
  for section in self.sections:
169
- test_ids.extend(section.tests)
137
+ tests.extend(section.tests)
170
138
 
171
- return test_ids
139
+ return tests
172
140
 
173
141
  def num_tests(self) -> int:
174
142
  """Returns the total number of tests in the test suite"""