validmind 2.5.25__py3-none-any.whl → 2.6.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +8 -17
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +66 -85
- validmind/ai/test_result_description/context.py +2 -2
- validmind/ai/utils.py +26 -1
- validmind/api_client.py +43 -79
- validmind/client.py +5 -7
- validmind/client_config.py +1 -1
- validmind/datasets/__init__.py +1 -1
- validmind/datasets/classification/customer_churn.py +7 -5
- validmind/datasets/nlp/__init__.py +2 -2
- validmind/errors.py +6 -10
- validmind/html_templates/content_blocks.py +18 -16
- validmind/logging.py +21 -16
- validmind/tests/__init__.py +28 -5
- validmind/tests/__types__.py +186 -170
- validmind/tests/_store.py +7 -21
- validmind/tests/comparison.py +362 -0
- validmind/tests/data_validation/ACFandPACFPlot.py +44 -73
- validmind/tests/data_validation/ADF.py +49 -83
- validmind/tests/data_validation/AutoAR.py +59 -96
- validmind/tests/data_validation/AutoMA.py +59 -96
- validmind/tests/data_validation/AutoStationarity.py +66 -114
- validmind/tests/data_validation/ClassImbalance.py +48 -117
- validmind/tests/data_validation/DatasetDescription.py +180 -209
- validmind/tests/data_validation/DatasetSplit.py +50 -75
- validmind/tests/data_validation/DescriptiveStatistics.py +59 -85
- validmind/tests/data_validation/{DFGLSArch.py → DickeyFullerGLS.py} +44 -76
- validmind/tests/data_validation/Duplicates.py +21 -90
- validmind/tests/data_validation/EngleGrangerCoint.py +53 -75
- validmind/tests/data_validation/HighCardinality.py +32 -80
- validmind/tests/data_validation/HighPearsonCorrelation.py +29 -97
- validmind/tests/data_validation/IQROutliersBarPlot.py +63 -94
- validmind/tests/data_validation/IQROutliersTable.py +40 -80
- validmind/tests/data_validation/IsolationForestOutliers.py +41 -63
- validmind/tests/data_validation/KPSS.py +33 -81
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +47 -95
- validmind/tests/data_validation/MissingValues.py +17 -58
- validmind/tests/data_validation/MissingValuesBarPlot.py +61 -87
- validmind/tests/data_validation/PhillipsPerronArch.py +56 -79
- validmind/tests/data_validation/RollingStatsPlot.py +50 -81
- validmind/tests/data_validation/SeasonalDecompose.py +102 -184
- validmind/tests/data_validation/Skewness.py +27 -64
- validmind/tests/data_validation/SpreadPlot.py +34 -57
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +46 -65
- validmind/tests/data_validation/TabularDateTimeHistograms.py +23 -45
- validmind/tests/data_validation/TabularNumericalHistograms.py +27 -46
- validmind/tests/data_validation/TargetRateBarPlots.py +54 -93
- validmind/tests/data_validation/TimeSeriesFrequency.py +48 -133
- validmind/tests/data_validation/TimeSeriesHistogram.py +24 -3
- validmind/tests/data_validation/TimeSeriesLinePlot.py +29 -47
- validmind/tests/data_validation/TimeSeriesMissingValues.py +59 -135
- validmind/tests/data_validation/TimeSeriesOutliers.py +54 -171
- validmind/tests/data_validation/TooManyZeroValues.py +21 -70
- validmind/tests/data_validation/UniqueRows.py +23 -62
- validmind/tests/data_validation/WOEBinPlots.py +83 -109
- validmind/tests/data_validation/WOEBinTable.py +28 -69
- validmind/tests/data_validation/ZivotAndrewsArch.py +33 -75
- validmind/tests/data_validation/nlp/CommonWords.py +49 -57
- validmind/tests/data_validation/nlp/Hashtags.py +27 -49
- validmind/tests/data_validation/nlp/LanguageDetection.py +7 -13
- validmind/tests/data_validation/nlp/Mentions.py +32 -63
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +89 -14
- validmind/tests/data_validation/nlp/Punctuations.py +63 -47
- validmind/tests/data_validation/nlp/Sentiment.py +4 -0
- validmind/tests/data_validation/nlp/StopWords.py +62 -91
- validmind/tests/data_validation/nlp/TextDescription.py +116 -159
- validmind/tests/data_validation/nlp/Toxicity.py +12 -4
- validmind/tests/decorator.py +33 -242
- validmind/tests/load.py +212 -153
- validmind/tests/model_validation/BertScore.py +13 -7
- validmind/tests/model_validation/BleuScore.py +4 -0
- validmind/tests/model_validation/ClusterSizeDistribution.py +24 -47
- validmind/tests/model_validation/ContextualRecall.py +3 -0
- validmind/tests/model_validation/FeaturesAUC.py +43 -74
- validmind/tests/model_validation/MeteorScore.py +3 -0
- validmind/tests/model_validation/RegardScore.py +5 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +54 -75
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +10 -33
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +11 -29
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +19 -31
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +40 -49
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +29 -15
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +25 -11
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +28 -13
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +67 -38
- validmind/tests/model_validation/embeddings/utils.py +53 -0
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +37 -32
- validmind/tests/model_validation/ragas/{AspectCritique.py → AspectCritic.py} +33 -27
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +44 -41
- validmind/tests/model_validation/ragas/ContextPrecision.py +40 -35
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +133 -0
- validmind/tests/model_validation/ragas/ContextRecall.py +40 -35
- validmind/tests/model_validation/ragas/Faithfulness.py +42 -30
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +59 -35
- validmind/tests/model_validation/ragas/{AnswerRelevance.py → ResponseRelevancy.py} +52 -41
- validmind/tests/model_validation/ragas/{AnswerSimilarity.py → SemanticSimilarity.py} +39 -34
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +13 -16
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +13 -16
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +51 -89
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +31 -61
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +118 -83
- validmind/tests/model_validation/sklearn/CompletenessScore.py +13 -16
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +62 -94
- validmind/tests/model_validation/sklearn/FeatureImportance.py +7 -8
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -15
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +12 -15
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +23 -53
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +60 -74
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +16 -84
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +22 -72
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +29 -78
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +52 -82
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +51 -145
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +60 -78
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +130 -172
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +26 -55
- validmind/tests/model_validation/sklearn/ROCCurve.py +43 -77
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +41 -94
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +47 -136
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +164 -208
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +54 -99
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +50 -124
- validmind/tests/model_validation/sklearn/VMeasure.py +12 -15
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +225 -281
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +40 -45
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +22 -47
- validmind/tests/model_validation/statsmodels/Lilliefors.py +17 -28
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +37 -81
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +37 -105
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +62 -166
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +57 -119
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +20 -57
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +47 -80
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +2 -0
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -2
- validmind/tests/output.py +120 -0
- validmind/tests/prompt_validation/Bias.py +55 -98
- validmind/tests/prompt_validation/Clarity.py +56 -99
- validmind/tests/prompt_validation/Conciseness.py +63 -101
- validmind/tests/prompt_validation/Delimitation.py +48 -89
- validmind/tests/prompt_validation/NegativeInstruction.py +62 -96
- validmind/tests/prompt_validation/Robustness.py +80 -121
- validmind/tests/prompt_validation/Specificity.py +61 -95
- validmind/tests/prompt_validation/ai_powered_test.py +2 -2
- validmind/tests/run.py +314 -496
- validmind/tests/test_providers.py +109 -79
- validmind/tests/utils.py +91 -0
- validmind/unit_metrics/__init__.py +16 -155
- validmind/unit_metrics/classification/F1.py +1 -0
- validmind/unit_metrics/classification/Precision.py +1 -0
- validmind/unit_metrics/classification/ROC_AUC.py +1 -0
- validmind/unit_metrics/classification/Recall.py +1 -0
- validmind/unit_metrics/regression/AdjustedRSquaredScore.py +1 -0
- validmind/unit_metrics/regression/GiniCoefficient.py +1 -0
- validmind/unit_metrics/regression/HuberLoss.py +1 -0
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -0
- validmind/unit_metrics/regression/MeanAbsoluteError.py +1 -0
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -0
- validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -0
- validmind/unit_metrics/regression/MeanSquaredError.py +1 -0
- validmind/unit_metrics/regression/QuantileLoss.py +1 -0
- validmind/unit_metrics/regression/RSquaredScore.py +2 -1
- validmind/unit_metrics/regression/RootMeanSquaredError.py +1 -0
- validmind/utils.py +66 -17
- validmind/vm_models/__init__.py +2 -17
- validmind/vm_models/dataset/dataset.py +31 -4
- validmind/vm_models/figure.py +7 -37
- validmind/vm_models/model.py +3 -0
- validmind/vm_models/result/__init__.py +7 -0
- validmind/vm_models/result/result.jinja +21 -0
- validmind/vm_models/result/result.py +337 -0
- validmind/vm_models/result/utils.py +160 -0
- validmind/vm_models/test_suite/runner.py +16 -54
- validmind/vm_models/test_suite/summary.py +3 -3
- validmind/vm_models/test_suite/test.py +43 -77
- validmind/vm_models/test_suite/test_suite.py +8 -40
- validmind-2.6.7.dist-info/METADATA +137 -0
- {validmind-2.5.25.dist-info → validmind-2.6.7.dist-info}/RECORD +182 -189
- validmind/tests/data_validation/AutoSeasonality.py +0 -190
- validmind/tests/metadata.py +0 -59
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -176
- validmind/tests/model_validation/ragas/ContextUtilization.py +0 -161
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -80
- validmind/unit_metrics/composite.py +0 -238
- validmind/vm_models/test/metric.py +0 -98
- validmind/vm_models/test/metric_result.py +0 -61
- validmind/vm_models/test/output_template.py +0 -55
- validmind/vm_models/test/result_summary.py +0 -76
- validmind/vm_models/test/result_wrapper.py +0 -488
- validmind/vm_models/test/test.py +0 -103
- validmind/vm_models/test/threshold_test.py +0 -106
- validmind/vm_models/test/threshold_test_result.py +0 -75
- validmind/vm_models/test_context.py +0 -259
- validmind-2.5.25.dist-info/METADATA +0 -118
- {validmind-2.5.25.dist-info → validmind-2.6.7.dist-info}/LICENSE +0 -0
- {validmind-2.5.25.dist-info → validmind-2.6.7.dist-info}/WHEEL +0 -0
- {validmind-2.5.25.dist-info → validmind-2.6.7.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,160 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import os
|
6
|
+
from typing import TYPE_CHECKING, Dict, List, Union
|
7
|
+
|
8
|
+
import pandas as pd
|
9
|
+
from ipywidgets import HTML, GridBox, Layout
|
10
|
+
from jinja2 import Template
|
11
|
+
|
12
|
+
from ... import api_client
|
13
|
+
from ...logging import get_logger
|
14
|
+
from ..dataset import VMDataset
|
15
|
+
from ..figure import Figure
|
16
|
+
from ..input import VMInput
|
17
|
+
|
18
|
+
if TYPE_CHECKING:
|
19
|
+
from .result import ResultTable
|
20
|
+
|
21
|
+
|
22
|
+
AI_REVISION_NAME = "Generated by ValidMind AI"
|
23
|
+
DEFAULT_REVISION_NAME = "Default Description"
|
24
|
+
|
25
|
+
logger = get_logger(__name__)
|
26
|
+
|
27
|
+
_result_template = None
|
28
|
+
|
29
|
+
|
30
|
+
def get_result_template():
|
31
|
+
"""Get the jinja html template for rendering test results"""
|
32
|
+
global _result_template
|
33
|
+
|
34
|
+
if _result_template is None:
|
35
|
+
with open(os.path.join(os.path.dirname(__file__), "result.jinja")) as f:
|
36
|
+
_result_template = Template(f.read())
|
37
|
+
|
38
|
+
return _result_template
|
39
|
+
|
40
|
+
|
41
|
+
async def update_metadata(content_id: str, text: str, _json: Union[Dict, List] = None):
|
42
|
+
"""Create or Update a Metadata Object"""
|
43
|
+
parts = content_id.split("::")
|
44
|
+
content_id = parts[0]
|
45
|
+
revision_name = parts[1] if len(parts) > 1 else None
|
46
|
+
|
47
|
+
# we always want composite metric definitions to be updated
|
48
|
+
should_update = content_id.startswith("composite_metric_def:")
|
49
|
+
|
50
|
+
# if we are updating a metric or test description, we check if the text
|
51
|
+
# has changed from the last time it was logged, and only update if it has
|
52
|
+
if content_id.split(":", 1)[0] in ["metric_description", "test_description"]:
|
53
|
+
try:
|
54
|
+
md = await api_client.aget_metadata(content_id)
|
55
|
+
# if there is an existing description, only update it if the new one
|
56
|
+
# is different and is an AI-generated description
|
57
|
+
should_update = (
|
58
|
+
md["text"] != text if revision_name == AI_REVISION_NAME else False
|
59
|
+
)
|
60
|
+
logger.debug(f"Check if description has changed: {should_update}")
|
61
|
+
except Exception:
|
62
|
+
# if exception, assume its not created yet TODO: don't catch all
|
63
|
+
should_update = True
|
64
|
+
|
65
|
+
if should_update:
|
66
|
+
if revision_name:
|
67
|
+
content_id = f"{content_id}::{revision_name}"
|
68
|
+
|
69
|
+
logger.debug(f"Updating metadata for `{content_id}`")
|
70
|
+
|
71
|
+
await api_client.alog_metadata(content_id, text, _json)
|
72
|
+
|
73
|
+
|
74
|
+
def check_for_sensitive_data(data: pd.DataFrame, inputs: List[VMInput]):
|
75
|
+
"""Check if a table contains raw data from input datasets"""
|
76
|
+
dataset_columns = {
|
77
|
+
col: len(input_obj.df)
|
78
|
+
for input_obj in inputs
|
79
|
+
if isinstance(input_obj, VMDataset)
|
80
|
+
for col in input_obj.columns
|
81
|
+
}
|
82
|
+
|
83
|
+
table_columns = {col: len(data) for col in data.columns}
|
84
|
+
|
85
|
+
offending_columns = [
|
86
|
+
col
|
87
|
+
for col in table_columns
|
88
|
+
if col in dataset_columns and table_columns[col] == dataset_columns[col]
|
89
|
+
]
|
90
|
+
|
91
|
+
if offending_columns:
|
92
|
+
raise ValueError(
|
93
|
+
f"Raw input data found in table, pass `unsafe=True` "
|
94
|
+
f"or remove the offending columns: {offending_columns}"
|
95
|
+
)
|
96
|
+
|
97
|
+
|
98
|
+
def tables_to_widgets(tables: List["ResultTable"]):
|
99
|
+
"""Convert summary (list of json tables) into a list of ipywidgets"""
|
100
|
+
widgets = [
|
101
|
+
HTML("<h3>Tables</h3>"),
|
102
|
+
]
|
103
|
+
|
104
|
+
for table in tables:
|
105
|
+
html = ""
|
106
|
+
if table.title:
|
107
|
+
html += f"<h4>{table.title}</h4>"
|
108
|
+
|
109
|
+
html += (
|
110
|
+
table.data.reset_index(drop=True)
|
111
|
+
.style.format(precision=4)
|
112
|
+
.hide(axis="index")
|
113
|
+
.set_table_styles(
|
114
|
+
[
|
115
|
+
{
|
116
|
+
"selector": "",
|
117
|
+
"props": [("width", "100%")],
|
118
|
+
},
|
119
|
+
{
|
120
|
+
"selector": "th",
|
121
|
+
"props": [("text-align", "left")],
|
122
|
+
},
|
123
|
+
{
|
124
|
+
"selector": "tbody tr:nth-child(even)",
|
125
|
+
"props": [("background-color", "#FFFFFF")],
|
126
|
+
},
|
127
|
+
{
|
128
|
+
"selector": "tbody tr:nth-child(odd)",
|
129
|
+
"props": [("background-color", "#F5F5F5")],
|
130
|
+
},
|
131
|
+
{
|
132
|
+
"selector": "td, th",
|
133
|
+
"props": [
|
134
|
+
("padding-left", "5px"),
|
135
|
+
("padding-right", "5px"),
|
136
|
+
],
|
137
|
+
},
|
138
|
+
]
|
139
|
+
)
|
140
|
+
.set_properties(**{"text-align": "left"})
|
141
|
+
.to_html(escape=False)
|
142
|
+
)
|
143
|
+
|
144
|
+
widgets.append(HTML(html))
|
145
|
+
|
146
|
+
return widgets
|
147
|
+
|
148
|
+
|
149
|
+
def figures_to_widgets(figures: List[Figure]) -> list:
|
150
|
+
"""Plot figures to a ipywidgets GridBox"""
|
151
|
+
num_columns = 2 if len(figures) > 1 else 1
|
152
|
+
|
153
|
+
plot_widgets = GridBox(
|
154
|
+
[figure.to_widget() for figure in figures],
|
155
|
+
layout=Layout(
|
156
|
+
grid_template_columns=f"repeat({num_columns}, 1fr)",
|
157
|
+
),
|
158
|
+
)
|
159
|
+
|
160
|
+
return [HTML("<h3>Figures</h3>"), plot_widgets]
|
@@ -9,7 +9,6 @@ from IPython.display import display
|
|
9
9
|
|
10
10
|
from ...logging import get_logger
|
11
11
|
from ...utils import is_notebook, run_async, run_async_check
|
12
|
-
from ..test_context import TestContext, TestInput
|
13
12
|
from .summary import TestSuiteSummary
|
14
13
|
from .test_suite import TestSuite
|
15
14
|
|
@@ -22,8 +21,6 @@ class TestSuiteRunner:
|
|
22
21
|
"""
|
23
22
|
|
24
23
|
suite: TestSuite = None
|
25
|
-
context: TestContext = None
|
26
|
-
input: TestInput = None
|
27
24
|
config: dict = None
|
28
25
|
|
29
26
|
_test_configs: dict = None
|
@@ -32,67 +29,33 @@ class TestSuiteRunner:
|
|
32
29
|
pbar_description: widgets.Label = None
|
33
30
|
pbar_box: widgets.HBox = None
|
34
31
|
|
35
|
-
def __init__(self, suite: TestSuite,
|
32
|
+
def __init__(self, suite: TestSuite, config: dict = None, inputs: dict = None):
|
36
33
|
self.suite = suite
|
37
|
-
self.input = input
|
38
34
|
self.config = config or {}
|
39
35
|
|
40
|
-
self.
|
36
|
+
self._load_config(inputs)
|
41
37
|
|
42
|
-
|
43
|
-
self._init_tests()
|
44
|
-
|
45
|
-
def _load_config(self):
|
38
|
+
def _load_config(self, inputs: dict = None):
|
46
39
|
"""Splits the config into a global config and test configs"""
|
47
|
-
self._test_configs = {
|
40
|
+
self._test_configs = {
|
41
|
+
test.test_id: {"inputs": inputs or {}} for test in self.suite.get_tests()
|
42
|
+
}
|
48
43
|
|
49
44
|
for key, value in self.config.items():
|
50
|
-
test_ids = [test.test_id for test in self.suite.get_tests()]
|
51
|
-
|
52
45
|
# If the key does not exist in the test suite, we need to
|
53
46
|
# inform the user the config is probably wrong but we will
|
54
47
|
# keep running all tests
|
55
|
-
if key not in
|
48
|
+
if key not in self._test_configs:
|
56
49
|
logger.warning(
|
57
50
|
f"Config key '{key}' does not match a test_id in the template."
|
58
51
|
"\n\tEnsure you registered a content block with the correct content_id in the template"
|
59
52
|
"\n\tThe configuration for this test will be ignored."
|
60
53
|
)
|
61
|
-
|
62
|
-
self._test_configs[key] = value
|
54
|
+
continue
|
63
55
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
"""
|
68
|
-
for section in self.suite.sections:
|
69
|
-
for test in section.tests:
|
70
|
-
# use local inputs from config if provided
|
71
|
-
test_configs = self._test_configs.get(test.test_id, {})
|
72
|
-
inputs = self.input
|
73
|
-
if (
|
74
|
-
test.test_id in self.config
|
75
|
-
and "inputs" in self.config[test.test_id]
|
76
|
-
):
|
77
|
-
inputs = TestInput(self.config[test.test_id]["inputs"])
|
78
|
-
test_configs = {
|
79
|
-
key: value
|
80
|
-
for key, value in test_configs.items()
|
81
|
-
if key != "inputs"
|
82
|
-
}
|
83
|
-
test_configs = test_configs.get("params", {})
|
84
|
-
else:
|
85
|
-
if (test_configs) and ("params" not in test_configs):
|
86
|
-
# [DEPRECATED] This is the old way of setting test parameters
|
87
|
-
msg = (
|
88
|
-
"Setting test parameters directly in the 'config' parameter"
|
89
|
-
" of the run_documentation_tests() method is deprecated. "
|
90
|
-
"Instead, use the new format of the config: "
|
91
|
-
'config = {"test_id": {"params": {...}, "inputs": {...}}}'
|
92
|
-
)
|
93
|
-
logger.warning(msg)
|
94
|
-
|
95
|
-
test.load(inputs=inputs, context=self.context, config=test_configs)
|
56
|
+
# override the global config (inputs) with the test-specific config
|
57
|
+
# TODO: better configuration would make for a better DX
|
58
|
+
self._test_configs[key] = value
|
96
59
|
|
97
60
|
def _start_progress_bar(self, send: bool = True):
|
98
61
|
"""
|
@@ -176,12 +139,11 @@ class TestSuiteRunner:
|
|
176
139
|
|
177
140
|
for section in self.suite.sections:
|
178
141
|
for test in section.tests:
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
test.run(fail_fast=fail_fast)
|
142
|
+
self.pbar_description.value = f"Running {test.name}"
|
143
|
+
test.run(
|
144
|
+
fail_fast=fail_fast,
|
145
|
+
config=self._test_configs.get(test.test_id, {}),
|
146
|
+
)
|
185
147
|
self.pbar.value += 1
|
186
148
|
|
187
149
|
if send:
|
@@ -9,7 +9,7 @@ import ipywidgets as widgets
|
|
9
9
|
|
10
10
|
from ...logging import get_logger
|
11
11
|
from ...utils import display, md_to_html
|
12
|
-
from ..
|
12
|
+
from ..result import ErrorResult
|
13
13
|
from .test_suite import TestSuiteSection, TestSuiteTest
|
14
14
|
|
15
15
|
logger = get_logger(__name__)
|
@@ -52,7 +52,7 @@ class TestSuiteSectionSummary:
|
|
52
52
|
children.append(test.result.to_widget())
|
53
53
|
titles.append(
|
54
54
|
f"❌ {test.result.name}: {test.name} ({test.test_id})"
|
55
|
-
if isinstance(test.result,
|
55
|
+
if isinstance(test.result, ErrorResult)
|
56
56
|
else f"{test.result.name}: {test.name} ({test.test_id})"
|
57
57
|
)
|
58
58
|
|
@@ -96,7 +96,7 @@ class TestSuiteSummary:
|
|
96
96
|
from ...api_client import get_api_host, get_api_model
|
97
97
|
|
98
98
|
ui_host = get_api_host().replace("/api/v1/tracking", "").replace("api", "app")
|
99
|
-
link = f"{ui_host}/
|
99
|
+
link = f"{ui_host}model-inventory/{get_api_model()}"
|
100
100
|
results_link = f"""
|
101
101
|
<h3>
|
102
102
|
Check out the updated documentation in your
|
@@ -2,14 +2,14 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from
|
5
|
+
from typing import Any, Dict, Union
|
6
|
+
|
7
|
+
from ...errors import LoadTestError, should_raise_on_fail_fast
|
6
8
|
from ...logging import get_logger, log_performance
|
7
|
-
from ...tests import
|
8
|
-
from ...tests import
|
9
|
+
from ...tests.load import load_test
|
10
|
+
from ...tests.run import run_test
|
9
11
|
from ...utils import test_id_to_name
|
10
|
-
from ..
|
11
|
-
from ..test.test import Test
|
12
|
-
from ..test_context import TestContext, TestInput
|
12
|
+
from ..result import ErrorResult, Result, TestResult
|
13
13
|
|
14
14
|
logger = get_logger(__name__)
|
15
15
|
|
@@ -21,12 +21,11 @@ class TestSuiteTest:
|
|
21
21
|
|
22
22
|
test_id: str
|
23
23
|
output_template: str = None
|
24
|
-
name: str = None
|
25
|
-
|
26
|
-
|
27
|
-
_test_instance: Test = None
|
24
|
+
name: Union[str, None] = None
|
25
|
+
description: Union[Dict[str, Any], None] = None
|
26
|
+
result: Union[Result, None] = None
|
28
27
|
|
29
|
-
|
28
|
+
_load_failed: bool = False
|
30
29
|
|
31
30
|
def __init__(self, test_id_or_obj):
|
32
31
|
"""Load the test class from the test id
|
@@ -42,69 +41,46 @@ class TestSuiteTest:
|
|
42
41
|
|
43
42
|
self.name = test_id_to_name(self.test_id)
|
44
43
|
|
44
|
+
def get_default_config(self):
|
45
|
+
"""Returns the default configuration for the test"""
|
45
46
|
try:
|
46
|
-
|
47
|
+
test_func = load_test(self.test_id)
|
47
48
|
except LoadTestError as e:
|
48
|
-
self.result = FailedResultWrapper(
|
49
|
-
error=e,
|
50
|
-
message=f"Failed to load test '{self.test_id}'",
|
51
|
-
result_id=self.test_id,
|
52
|
-
)
|
53
|
-
except Exception as e:
|
54
|
-
# The test suite runner will appropriately ignore this error
|
55
|
-
# since _test_class is None
|
56
49
|
logger.error(f"Failed to load test '{self.test_id}': {e}")
|
57
50
|
|
58
|
-
|
59
|
-
|
60
|
-
return self._test_class.test_type
|
61
|
-
|
62
|
-
def get_default_params(self):
|
63
|
-
"""Returns the default params for the test"""
|
64
|
-
if not self._test_class:
|
65
|
-
return {}
|
66
|
-
|
67
|
-
return self._test_class.default_params
|
68
|
-
|
69
|
-
def load(self, inputs: TestInput, context: TestContext, config: dict = None):
|
70
|
-
"""Load an instance of the test class"""
|
71
|
-
if not self._test_class:
|
72
|
-
return
|
73
|
-
|
74
|
-
try:
|
75
|
-
self._test_instance = self._test_class(
|
76
|
-
test_id=self.test_id,
|
77
|
-
context=context,
|
78
|
-
inputs=inputs,
|
79
|
-
params=config,
|
80
|
-
output_template=self.output_template,
|
81
|
-
)
|
82
|
-
except Exception as e:
|
83
|
-
logger.error(
|
84
|
-
f"Failed to load test '{self.test_id}': "
|
85
|
-
f"({e.__class__.__name__}) {e}"
|
86
|
-
)
|
87
|
-
self.result = FailedResultWrapper(
|
51
|
+
self._load_failed = True
|
52
|
+
self.result = ErrorResult(
|
88
53
|
error=e,
|
89
54
|
message=f"Failed to load test '{self.name}'",
|
90
55
|
result_id=self.test_id,
|
91
56
|
)
|
92
57
|
|
93
|
-
|
58
|
+
return None
|
59
|
+
|
60
|
+
config = {
|
61
|
+
# we use the input name ('dataset', 'model') as the key and the value
|
62
|
+
"inputs": {k: k for k in test_func.inputs},
|
63
|
+
"params": {k: v.get("default") for k, v in test_func.params.items()},
|
64
|
+
}
|
65
|
+
|
66
|
+
return config
|
67
|
+
|
68
|
+
def run(self, fail_fast: bool = False, config: dict = None):
|
94
69
|
"""Run the test"""
|
95
|
-
if
|
96
|
-
# test failed to load and we have already logged the error
|
70
|
+
if self._load_failed:
|
97
71
|
return
|
98
72
|
|
99
73
|
try:
|
100
|
-
self._test_instance.validate_inputs()
|
101
|
-
|
102
74
|
# run the test and log the performance if LOG_LEVEL is set to DEBUG
|
103
|
-
log_performance(
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
75
|
+
@log_performance(name=self.test_id, logger=logger)
|
76
|
+
def run_test_with_logging():
|
77
|
+
return run_test(
|
78
|
+
self.test_id,
|
79
|
+
**(config or {}),
|
80
|
+
show=False,
|
81
|
+
)
|
82
|
+
|
83
|
+
self.result = run_test_with_logging()
|
108
84
|
|
109
85
|
except Exception as e:
|
110
86
|
if fail_fast and should_raise_on_fail_fast(e):
|
@@ -113,40 +89,30 @@ class TestSuiteTest:
|
|
113
89
|
logger.error(
|
114
90
|
f"Failed to run test '{self.test_id}': " f"({e.__class__.__name__}) {e}"
|
115
91
|
)
|
116
|
-
self.result =
|
117
|
-
name=f"Failed {self._test_instance.test_type}",
|
92
|
+
self.result = ErrorResult(
|
118
93
|
error=e,
|
119
94
|
message=f"Failed to run '{self.name}'",
|
120
95
|
result_id=self.test_id,
|
121
96
|
)
|
122
97
|
|
123
|
-
|
124
|
-
|
125
|
-
if self._test_instance.result is None:
|
126
|
-
self.result = FailedResultWrapper(
|
127
|
-
name=f"Failed {self._test_instance.test_type}",
|
98
|
+
if self.result is None:
|
99
|
+
self.result = ErrorResult(
|
128
100
|
error=None,
|
129
101
|
message=f"'{self.name}' did not return a result",
|
130
102
|
result_id=self.test_id,
|
131
103
|
)
|
132
104
|
|
133
|
-
|
134
|
-
|
135
|
-
if not isinstance(self._test_instance.result, ResultWrapper):
|
136
|
-
self.result = FailedResultWrapper(
|
137
|
-
name=f"Failed {self._test_instance.test_type}",
|
105
|
+
if not isinstance(self.result, Result):
|
106
|
+
self.result = ErrorResult(
|
138
107
|
error=None,
|
139
108
|
message=f"{self.name} returned an invalid result: {self._test_instance.result}",
|
140
109
|
result_id=self.test_id,
|
141
110
|
)
|
142
111
|
|
143
|
-
return
|
144
|
-
|
145
|
-
self.result = self._test_instance.result
|
146
|
-
|
147
112
|
async def log_async(self):
|
148
113
|
"""Log the result for this test to ValidMind"""
|
149
114
|
if not self.result:
|
150
115
|
raise ValueError("Cannot log test result before running the test")
|
151
116
|
|
152
|
-
|
117
|
+
if isinstance(self.result, TestResult):
|
118
|
+
return await self.result.log_async()
|
@@ -48,48 +48,16 @@ class TestSuiteSection:
|
|
48
48
|
section_id: str = None
|
49
49
|
description: Optional[str] = None
|
50
50
|
|
51
|
-
def get_required_inputs_for_test(self, test: TestSuiteTest) -> List[str]:
|
52
|
-
"""
|
53
|
-
Returns the required inputs for a specific test. Returns an input
|
54
|
-
dictionary that can be passed directly to run_test() or run_documentation_test()
|
55
|
-
|
56
|
-
Args:
|
57
|
-
test (TestSuiteTest): The test to get the required inputs for
|
58
|
-
|
59
|
-
Returns:
|
60
|
-
dict: A dictionary of required inputs
|
61
|
-
"""
|
62
|
-
test_class = test._test_class
|
63
|
-
inputs_dict = {}
|
64
|
-
if (
|
65
|
-
not hasattr(test_class, "required_inputs")
|
66
|
-
or test_class.required_inputs is None
|
67
|
-
):
|
68
|
-
return inputs_dict
|
69
|
-
|
70
|
-
for input_name in test_class.required_inputs:
|
71
|
-
# This required input is not valid but the behavior in this function
|
72
|
-
# is consistent with required_inputs as defined in the test class so
|
73
|
-
# we will ignore it for now
|
74
|
-
#
|
75
|
-
# if input_name == "model.train_ds" or input_name == "model.test_ds":
|
76
|
-
# continue
|
77
|
-
|
78
|
-
# Assign None to the input to indicate that it is required
|
79
|
-
inputs_dict[input_name] = None
|
80
|
-
|
81
|
-
return inputs_dict
|
82
|
-
|
83
51
|
def get_default_config(self):
|
84
52
|
"""Returns the default configuration for the test suite section"""
|
85
53
|
# TODO: configuration across sections/tests needs more work
|
86
54
|
section_default_config = {}
|
87
55
|
|
88
56
|
for test in self.tests:
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
57
|
+
default_config = test.get_default_config()
|
58
|
+
|
59
|
+
if default_config:
|
60
|
+
section_default_config[test.test_id] = default_config
|
93
61
|
|
94
62
|
return section_default_config
|
95
63
|
|
@@ -162,13 +130,13 @@ class TestSuite:
|
|
162
130
|
return self.suite_id.title().replace("_", " ")
|
163
131
|
|
164
132
|
def get_tests(self) -> List[str]:
|
165
|
-
"""Get all test
|
166
|
-
|
133
|
+
"""Get all test suite test objects from all sections"""
|
134
|
+
tests = []
|
167
135
|
|
168
136
|
for section in self.sections:
|
169
|
-
|
137
|
+
tests.extend(section.tests)
|
170
138
|
|
171
|
-
return
|
139
|
+
return tests
|
172
140
|
|
173
141
|
def num_tests(self) -> int:
|
174
142
|
"""Returns the total number of tests in the test suite"""
|