validmind 2.5.25__py3-none-any.whl → 2.6.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +8 -17
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +66 -85
- validmind/ai/test_result_description/context.py +2 -2
- validmind/ai/utils.py +26 -1
- validmind/api_client.py +43 -79
- validmind/client.py +5 -7
- validmind/client_config.py +1 -1
- validmind/datasets/__init__.py +1 -1
- validmind/datasets/classification/customer_churn.py +7 -5
- validmind/datasets/nlp/__init__.py +2 -2
- validmind/errors.py +6 -10
- validmind/html_templates/content_blocks.py +18 -16
- validmind/logging.py +21 -16
- validmind/tests/__init__.py +28 -5
- validmind/tests/__types__.py +186 -170
- validmind/tests/_store.py +7 -21
- validmind/tests/comparison.py +362 -0
- validmind/tests/data_validation/ACFandPACFPlot.py +44 -73
- validmind/tests/data_validation/ADF.py +49 -83
- validmind/tests/data_validation/AutoAR.py +59 -96
- validmind/tests/data_validation/AutoMA.py +59 -96
- validmind/tests/data_validation/AutoStationarity.py +66 -114
- validmind/tests/data_validation/ClassImbalance.py +48 -117
- validmind/tests/data_validation/DatasetDescription.py +180 -209
- validmind/tests/data_validation/DatasetSplit.py +50 -75
- validmind/tests/data_validation/DescriptiveStatistics.py +59 -85
- validmind/tests/data_validation/{DFGLSArch.py → DickeyFullerGLS.py} +44 -76
- validmind/tests/data_validation/Duplicates.py +21 -90
- validmind/tests/data_validation/EngleGrangerCoint.py +53 -75
- validmind/tests/data_validation/HighCardinality.py +32 -80
- validmind/tests/data_validation/HighPearsonCorrelation.py +29 -97
- validmind/tests/data_validation/IQROutliersBarPlot.py +63 -94
- validmind/tests/data_validation/IQROutliersTable.py +40 -80
- validmind/tests/data_validation/IsolationForestOutliers.py +41 -63
- validmind/tests/data_validation/KPSS.py +33 -81
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +47 -95
- validmind/tests/data_validation/MissingValues.py +17 -58
- validmind/tests/data_validation/MissingValuesBarPlot.py +61 -87
- validmind/tests/data_validation/PhillipsPerronArch.py +56 -79
- validmind/tests/data_validation/RollingStatsPlot.py +50 -81
- validmind/tests/data_validation/SeasonalDecompose.py +102 -184
- validmind/tests/data_validation/Skewness.py +27 -64
- validmind/tests/data_validation/SpreadPlot.py +34 -57
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +46 -65
- validmind/tests/data_validation/TabularDateTimeHistograms.py +23 -45
- validmind/tests/data_validation/TabularNumericalHistograms.py +27 -46
- validmind/tests/data_validation/TargetRateBarPlots.py +54 -93
- validmind/tests/data_validation/TimeSeriesFrequency.py +48 -133
- validmind/tests/data_validation/TimeSeriesHistogram.py +24 -3
- validmind/tests/data_validation/TimeSeriesLinePlot.py +29 -47
- validmind/tests/data_validation/TimeSeriesMissingValues.py +59 -135
- validmind/tests/data_validation/TimeSeriesOutliers.py +54 -171
- validmind/tests/data_validation/TooManyZeroValues.py +21 -70
- validmind/tests/data_validation/UniqueRows.py +23 -62
- validmind/tests/data_validation/WOEBinPlots.py +83 -109
- validmind/tests/data_validation/WOEBinTable.py +28 -69
- validmind/tests/data_validation/ZivotAndrewsArch.py +33 -75
- validmind/tests/data_validation/nlp/CommonWords.py +49 -57
- validmind/tests/data_validation/nlp/Hashtags.py +27 -49
- validmind/tests/data_validation/nlp/LanguageDetection.py +7 -13
- validmind/tests/data_validation/nlp/Mentions.py +32 -63
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +89 -14
- validmind/tests/data_validation/nlp/Punctuations.py +63 -47
- validmind/tests/data_validation/nlp/Sentiment.py +4 -0
- validmind/tests/data_validation/nlp/StopWords.py +62 -91
- validmind/tests/data_validation/nlp/TextDescription.py +116 -159
- validmind/tests/data_validation/nlp/Toxicity.py +12 -4
- validmind/tests/decorator.py +33 -242
- validmind/tests/load.py +212 -153
- validmind/tests/model_validation/BertScore.py +13 -7
- validmind/tests/model_validation/BleuScore.py +4 -0
- validmind/tests/model_validation/ClusterSizeDistribution.py +24 -47
- validmind/tests/model_validation/ContextualRecall.py +3 -0
- validmind/tests/model_validation/FeaturesAUC.py +43 -74
- validmind/tests/model_validation/MeteorScore.py +3 -0
- validmind/tests/model_validation/RegardScore.py +5 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +54 -75
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +10 -33
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +11 -29
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +19 -31
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +40 -49
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +29 -15
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +25 -11
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +28 -13
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +67 -38
- validmind/tests/model_validation/embeddings/utils.py +53 -0
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +37 -32
- validmind/tests/model_validation/ragas/{AspectCritique.py → AspectCritic.py} +33 -27
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +44 -41
- validmind/tests/model_validation/ragas/ContextPrecision.py +40 -35
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +133 -0
- validmind/tests/model_validation/ragas/ContextRecall.py +40 -35
- validmind/tests/model_validation/ragas/Faithfulness.py +42 -30
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +59 -35
- validmind/tests/model_validation/ragas/{AnswerRelevance.py → ResponseRelevancy.py} +52 -41
- validmind/tests/model_validation/ragas/{AnswerSimilarity.py → SemanticSimilarity.py} +39 -34
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +13 -16
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +13 -16
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +51 -89
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +31 -61
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +118 -83
- validmind/tests/model_validation/sklearn/CompletenessScore.py +13 -16
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +62 -94
- validmind/tests/model_validation/sklearn/FeatureImportance.py +7 -8
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -15
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +12 -15
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +23 -53
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +60 -74
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +16 -84
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +22 -72
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +29 -78
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +52 -82
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +51 -145
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +60 -78
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +130 -172
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +26 -55
- validmind/tests/model_validation/sklearn/ROCCurve.py +43 -77
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +41 -94
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +47 -136
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +164 -208
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +54 -99
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +50 -124
- validmind/tests/model_validation/sklearn/VMeasure.py +12 -15
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +225 -281
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +40 -45
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +22 -47
- validmind/tests/model_validation/statsmodels/Lilliefors.py +17 -28
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +37 -81
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +37 -105
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +62 -166
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +57 -119
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +20 -57
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +47 -80
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +2 -0
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -2
- validmind/tests/output.py +120 -0
- validmind/tests/prompt_validation/Bias.py +55 -98
- validmind/tests/prompt_validation/Clarity.py +56 -99
- validmind/tests/prompt_validation/Conciseness.py +63 -101
- validmind/tests/prompt_validation/Delimitation.py +48 -89
- validmind/tests/prompt_validation/NegativeInstruction.py +62 -96
- validmind/tests/prompt_validation/Robustness.py +80 -121
- validmind/tests/prompt_validation/Specificity.py +61 -95
- validmind/tests/prompt_validation/ai_powered_test.py +2 -2
- validmind/tests/run.py +314 -496
- validmind/tests/test_providers.py +109 -79
- validmind/tests/utils.py +91 -0
- validmind/unit_metrics/__init__.py +16 -155
- validmind/unit_metrics/classification/F1.py +1 -0
- validmind/unit_metrics/classification/Precision.py +1 -0
- validmind/unit_metrics/classification/ROC_AUC.py +1 -0
- validmind/unit_metrics/classification/Recall.py +1 -0
- validmind/unit_metrics/regression/AdjustedRSquaredScore.py +1 -0
- validmind/unit_metrics/regression/GiniCoefficient.py +1 -0
- validmind/unit_metrics/regression/HuberLoss.py +1 -0
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -0
- validmind/unit_metrics/regression/MeanAbsoluteError.py +1 -0
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -0
- validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -0
- validmind/unit_metrics/regression/MeanSquaredError.py +1 -0
- validmind/unit_metrics/regression/QuantileLoss.py +1 -0
- validmind/unit_metrics/regression/RSquaredScore.py +2 -1
- validmind/unit_metrics/regression/RootMeanSquaredError.py +1 -0
- validmind/utils.py +66 -17
- validmind/vm_models/__init__.py +2 -17
- validmind/vm_models/dataset/dataset.py +31 -4
- validmind/vm_models/figure.py +7 -37
- validmind/vm_models/model.py +3 -0
- validmind/vm_models/result/__init__.py +7 -0
- validmind/vm_models/result/result.jinja +21 -0
- validmind/vm_models/result/result.py +337 -0
- validmind/vm_models/result/utils.py +160 -0
- validmind/vm_models/test_suite/runner.py +16 -54
- validmind/vm_models/test_suite/summary.py +3 -3
- validmind/vm_models/test_suite/test.py +43 -77
- validmind/vm_models/test_suite/test_suite.py +8 -40
- validmind-2.6.8.dist-info/METADATA +137 -0
- {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/RECORD +182 -189
- validmind/tests/data_validation/AutoSeasonality.py +0 -190
- validmind/tests/metadata.py +0 -59
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -176
- validmind/tests/model_validation/ragas/ContextUtilization.py +0 -161
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -80
- validmind/unit_metrics/composite.py +0 -238
- validmind/vm_models/test/metric.py +0 -98
- validmind/vm_models/test/metric_result.py +0 -61
- validmind/vm_models/test/output_template.py +0 -55
- validmind/vm_models/test/result_summary.py +0 -76
- validmind/vm_models/test/result_wrapper.py +0 -488
- validmind/vm_models/test/test.py +0 -103
- validmind/vm_models/test/threshold_test.py +0 -106
- validmind/vm_models/test/threshold_test_result.py +0 -75
- validmind/vm_models/test_context.py +0 -259
- validmind-2.5.25.dist-info/METADATA +0 -118
- {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/LICENSE +0 -0
- {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/WHEEL +0 -0
- {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/entry_points.txt +0 -0
@@ -1,488 +0,0 @@
|
|
1
|
-
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
-
# See the LICENSE file in the root of this repository for details.
|
3
|
-
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
-
|
5
|
-
"""
|
6
|
-
Result Wrappers for test and metric results
|
7
|
-
"""
|
8
|
-
import asyncio
|
9
|
-
import json
|
10
|
-
from abc import ABC, abstractmethod
|
11
|
-
from dataclasses import dataclass
|
12
|
-
from typing import Dict, List, Optional, Union
|
13
|
-
|
14
|
-
import pandas as pd
|
15
|
-
from ipywidgets import HTML, GridBox, Layout, VBox
|
16
|
-
|
17
|
-
from ... import api_client
|
18
|
-
from ...ai.test_descriptions import AI_REVISION_NAME, DescriptionFuture
|
19
|
-
from ...input_registry import input_registry
|
20
|
-
from ...logging import get_logger
|
21
|
-
from ...utils import NumpyEncoder, display, run_async, test_id_to_name
|
22
|
-
from ..dataset import VMDataset
|
23
|
-
from ..figure import Figure
|
24
|
-
from .metric_result import MetricResult
|
25
|
-
from .output_template import OutputTemplate
|
26
|
-
from .result_summary import ResultSummary
|
27
|
-
from .threshold_test_result import ThresholdTestResults
|
28
|
-
|
29
|
-
logger = get_logger(__name__)
|
30
|
-
|
31
|
-
|
32
|
-
async def update_metadata(content_id: str, text: str, _json: Union[Dict, List] = None):
|
33
|
-
"""Create or Update a Metadata Object"""
|
34
|
-
parts = content_id.split("::")
|
35
|
-
content_id = parts[0]
|
36
|
-
revision_name = parts[1] if len(parts) > 1 else None
|
37
|
-
|
38
|
-
# we always want composite metric definitions to be updated
|
39
|
-
should_update = content_id.startswith("composite_metric_def:")
|
40
|
-
|
41
|
-
# if we are updating a metric or test description, we check if the text
|
42
|
-
# has changed from the last time it was logged, and only update if it has
|
43
|
-
if content_id.split(":", 1)[0] in ["metric_description", "test_description"]:
|
44
|
-
try:
|
45
|
-
md = await api_client.get_metadata(content_id)
|
46
|
-
# if there is an existing description, only update it if the new one
|
47
|
-
# is different and is an AI-generated description
|
48
|
-
should_update = (
|
49
|
-
md["text"] != text if revision_name == AI_REVISION_NAME else False
|
50
|
-
)
|
51
|
-
logger.debug(f"Check if description has changed: {should_update}")
|
52
|
-
except Exception:
|
53
|
-
# if exception, assume its not created yet TODO: don't catch all
|
54
|
-
should_update = True
|
55
|
-
|
56
|
-
if should_update:
|
57
|
-
if revision_name:
|
58
|
-
content_id = f"{content_id}::{revision_name}"
|
59
|
-
|
60
|
-
logger.debug(f"Updating metadata for `{content_id}`")
|
61
|
-
|
62
|
-
await api_client.log_metadata(content_id, text, _json)
|
63
|
-
|
64
|
-
|
65
|
-
def plot_figures(figures: List[Figure]) -> None:
|
66
|
-
"""Plot figures to a ipywidgets GridBox"""
|
67
|
-
plots = [figure.to_widget() for figure in figures]
|
68
|
-
num_columns = 2 if len(figures) > 1 else 1
|
69
|
-
|
70
|
-
return GridBox(
|
71
|
-
plots,
|
72
|
-
layout=Layout(grid_template_columns=f"repeat({num_columns}, 1fr)"),
|
73
|
-
)
|
74
|
-
|
75
|
-
|
76
|
-
def _summary_tables_to_widget(summary: ResultSummary):
|
77
|
-
"""Convert summary (list of json tables) into ipywidgets"""
|
78
|
-
widgets = []
|
79
|
-
|
80
|
-
for table in summary.results:
|
81
|
-
if table.metadata and table.metadata.title:
|
82
|
-
widgets.append(HTML(f"<h4>{table.metadata.title}</h4>"))
|
83
|
-
|
84
|
-
df_html = (
|
85
|
-
pd.DataFrame(table.data)
|
86
|
-
.style.format(precision=4)
|
87
|
-
.hide(axis="index")
|
88
|
-
.set_table_styles(
|
89
|
-
[
|
90
|
-
{
|
91
|
-
"selector": "",
|
92
|
-
"props": [("width", "100%")],
|
93
|
-
},
|
94
|
-
{
|
95
|
-
"selector": "th",
|
96
|
-
"props": [("text-align", "left")],
|
97
|
-
},
|
98
|
-
{
|
99
|
-
"selector": "tbody tr:nth-child(even)",
|
100
|
-
"props": [("background-color", "#FFFFFF")],
|
101
|
-
},
|
102
|
-
{
|
103
|
-
"selector": "tbody tr:nth-child(odd)",
|
104
|
-
"props": [("background-color", "#F5F5F5")],
|
105
|
-
},
|
106
|
-
{
|
107
|
-
"selector": "td, th",
|
108
|
-
"props": [
|
109
|
-
("padding-left", "5px"),
|
110
|
-
("padding-right", "5px"),
|
111
|
-
],
|
112
|
-
},
|
113
|
-
]
|
114
|
-
)
|
115
|
-
.set_properties(**{"text-align": "left"})
|
116
|
-
.to_html(escape=False)
|
117
|
-
)
|
118
|
-
widgets.append(HTML(df_html))
|
119
|
-
|
120
|
-
return widgets
|
121
|
-
|
122
|
-
|
123
|
-
@dataclass
|
124
|
-
class ResultWrapper(ABC):
|
125
|
-
"""Base Class for test suite results"""
|
126
|
-
|
127
|
-
name: str = "ResultWrapper"
|
128
|
-
# id of the result, can be set by the subclass. This helps
|
129
|
-
# looking up results later on
|
130
|
-
result_id: str = None
|
131
|
-
# Text description from test or metric (docstring usually)
|
132
|
-
result_description: str = None
|
133
|
-
# Text metadata about the result, can include description, etc.
|
134
|
-
result_metadata: List[dict] = None
|
135
|
-
# Output template to use for rendering the result
|
136
|
-
output_template: Optional[str] = None
|
137
|
-
|
138
|
-
def __str__(self) -> str:
|
139
|
-
"""May be overridden by subclasses"""
|
140
|
-
return self.__class__.__name__
|
141
|
-
|
142
|
-
@abstractmethod
|
143
|
-
def to_widget(self):
|
144
|
-
"""Create an ipywdiget representation of the result... Must be overridden by subclasses"""
|
145
|
-
raise NotImplementedError
|
146
|
-
|
147
|
-
def render(self, output_template=None):
|
148
|
-
"""Helper method thats lets the user try out output templates"""
|
149
|
-
if output_template:
|
150
|
-
self.output_template = output_template
|
151
|
-
|
152
|
-
return self.to_widget()
|
153
|
-
|
154
|
-
def _validate_section_id_for_block(self, section_id: str, position: int = None):
|
155
|
-
"""
|
156
|
-
Validate the section_id exits on the template before logging. We validate
|
157
|
-
if the section exists and if the user provided position is within the bounds
|
158
|
-
of the section. When the position is None, we assume it goes to the end of the section.
|
159
|
-
"""
|
160
|
-
if section_id is None:
|
161
|
-
return
|
162
|
-
|
163
|
-
api_client.reload()
|
164
|
-
found = False
|
165
|
-
client_config = api_client.client_config
|
166
|
-
|
167
|
-
for section in client_config.documentation_template["sections"]:
|
168
|
-
if section["id"] == section_id:
|
169
|
-
found = True
|
170
|
-
break
|
171
|
-
|
172
|
-
if not found:
|
173
|
-
raise ValueError(
|
174
|
-
f"Section with id {section_id} not found in the model's document"
|
175
|
-
)
|
176
|
-
|
177
|
-
# Check if the block already exists in the section
|
178
|
-
block_definition = {
|
179
|
-
"content_id": self.result_id,
|
180
|
-
"content_type": (
|
181
|
-
"metric" if isinstance(self, MetricResultWrapper) else "test"
|
182
|
-
),
|
183
|
-
}
|
184
|
-
blocks = section.get("contents", [])
|
185
|
-
for block in blocks:
|
186
|
-
if (
|
187
|
-
block["content_id"] == block_definition["content_id"]
|
188
|
-
and block["content_type"] == block_definition["content_type"]
|
189
|
-
):
|
190
|
-
logger.info(
|
191
|
-
f"Test driven block with content_id {block_definition['content_id']} already exists in the document's section"
|
192
|
-
)
|
193
|
-
return
|
194
|
-
|
195
|
-
# Validate that the position is within the bounds of the section
|
196
|
-
if position is not None:
|
197
|
-
num_blocks = len(blocks)
|
198
|
-
if position < 0 or position > num_blocks:
|
199
|
-
raise ValueError(
|
200
|
-
f"Invalid position {position}. Must be between 0 and {num_blocks}"
|
201
|
-
)
|
202
|
-
|
203
|
-
def show(self):
|
204
|
-
"""Display the result... May be overridden by subclasses"""
|
205
|
-
display(self.to_widget())
|
206
|
-
|
207
|
-
@abstractmethod
|
208
|
-
async def log_async(self):
|
209
|
-
"""Log the result... Must be overridden by subclasses"""
|
210
|
-
raise NotImplementedError
|
211
|
-
|
212
|
-
def log(self, section_id: str = None, position: int = None):
|
213
|
-
"""Log the result... May be overridden by subclasses"""
|
214
|
-
|
215
|
-
self._validate_section_id_for_block(section_id, position)
|
216
|
-
run_async(self.log_async, section_id=section_id, position=position)
|
217
|
-
|
218
|
-
|
219
|
-
@dataclass
|
220
|
-
class FailedResultWrapper(ResultWrapper):
|
221
|
-
"""
|
222
|
-
Result wrapper for test suites that fail to load or run properly
|
223
|
-
"""
|
224
|
-
|
225
|
-
name: str = "Failed"
|
226
|
-
error: Exception = None
|
227
|
-
message: str = None
|
228
|
-
|
229
|
-
def __repr__(self) -> str:
|
230
|
-
return f'FailedResult(result_id="{self.result_id}")'
|
231
|
-
|
232
|
-
def to_widget(self):
|
233
|
-
return HTML(f"<h3 style='color: red;'>{self.message}</h3><p>{self.error}</p>")
|
234
|
-
|
235
|
-
async def log_async(self):
|
236
|
-
pass
|
237
|
-
|
238
|
-
|
239
|
-
@dataclass
|
240
|
-
class MetricResultWrapper(ResultWrapper):
|
241
|
-
"""
|
242
|
-
Result wrapper for metrics that run as part of a test suite
|
243
|
-
"""
|
244
|
-
|
245
|
-
name: str = "Metric"
|
246
|
-
scalar: Optional[Union[int, float]] = None
|
247
|
-
metric: Optional[MetricResult] = None
|
248
|
-
figures: Optional[List[Figure]] = None
|
249
|
-
inputs: List[str] = None # List of input ids
|
250
|
-
params: Dict = None
|
251
|
-
|
252
|
-
def __repr__(self) -> str:
|
253
|
-
if self.metric:
|
254
|
-
return f'{self.__class__.__name__}(result_id="{self.result_id}", metric, figures)'
|
255
|
-
else:
|
256
|
-
return f'{self.__class__.__name__}(result_id="{self.result_id}", figures)'
|
257
|
-
|
258
|
-
def to_widget(self):
|
259
|
-
if self.metric and self.metric.key == "dataset_description":
|
260
|
-
return ""
|
261
|
-
|
262
|
-
vbox_children = [
|
263
|
-
HTML(f"<h1>{test_id_to_name(self.result_id)}</h1>"),
|
264
|
-
]
|
265
|
-
|
266
|
-
if self.result_metadata:
|
267
|
-
metric_description = self.result_metadata[0].get("text", "")
|
268
|
-
if isinstance(metric_description, DescriptionFuture):
|
269
|
-
metric_description = metric_description.get_description()
|
270
|
-
self.result_metadata[0]["text"] = metric_description
|
271
|
-
|
272
|
-
vbox_children.append(HTML(metric_description))
|
273
|
-
|
274
|
-
if self.scalar is not None:
|
275
|
-
vbox_children.append(
|
276
|
-
HTML(
|
277
|
-
"<h3>Unit Metrics</h3>"
|
278
|
-
f"<p>{test_id_to_name(self.result_id)} "
|
279
|
-
f"(<i>{self.result_id}</i>): "
|
280
|
-
f"<code>{self.scalar}</code></p>"
|
281
|
-
)
|
282
|
-
)
|
283
|
-
|
284
|
-
if self.metric:
|
285
|
-
vbox_children.append(HTML("<h3>Tables</h3>"))
|
286
|
-
if self.output_template:
|
287
|
-
vbox_children.append(
|
288
|
-
HTML(
|
289
|
-
OutputTemplate(self.output_template).render(
|
290
|
-
value=self.metric.value
|
291
|
-
)
|
292
|
-
)
|
293
|
-
)
|
294
|
-
elif self.metric.summary:
|
295
|
-
vbox_children.extend(_summary_tables_to_widget(self.metric.summary))
|
296
|
-
|
297
|
-
if self.figures:
|
298
|
-
vbox_children.append(HTML("<h3>Plots</h3>"))
|
299
|
-
plot_widgets = plot_figures(self.figures)
|
300
|
-
vbox_children.append(plot_widgets)
|
301
|
-
|
302
|
-
return VBox(vbox_children)
|
303
|
-
|
304
|
-
def _get_filtered_summary(self):
|
305
|
-
"""Check if the metric summary has columns from input datasets with matching row counts."""
|
306
|
-
dataset_columns = self._get_dataset_columns()
|
307
|
-
filtered_results = []
|
308
|
-
|
309
|
-
for table in self.metric.summary.results:
|
310
|
-
table_columns = self._get_table_columns(table)
|
311
|
-
sensitive_columns = self._find_sensitive_columns(
|
312
|
-
dataset_columns, table_columns
|
313
|
-
)
|
314
|
-
|
315
|
-
if sensitive_columns:
|
316
|
-
self._log_sensitive_data_warning(sensitive_columns)
|
317
|
-
else:
|
318
|
-
filtered_results.append(table)
|
319
|
-
|
320
|
-
self.metric.summary.results = filtered_results
|
321
|
-
return self.metric.summary
|
322
|
-
|
323
|
-
def _get_dataset_columns(self):
|
324
|
-
dataset_columns = {}
|
325
|
-
for input_item in self.inputs:
|
326
|
-
input_id = (
|
327
|
-
input_item if isinstance(input_item, str) else input_item.input_id
|
328
|
-
)
|
329
|
-
input_obj = input_registry.get(input_id)
|
330
|
-
if isinstance(input_obj, VMDataset):
|
331
|
-
dataset_columns.update(
|
332
|
-
{col: len(input_obj.df) for col in input_obj.columns}
|
333
|
-
)
|
334
|
-
return dataset_columns
|
335
|
-
|
336
|
-
def _get_table_columns(self, table):
|
337
|
-
if isinstance(table.data, pd.DataFrame):
|
338
|
-
return {col: len(table.data) for col in table.data.columns}
|
339
|
-
elif isinstance(table.data, list) and table.data:
|
340
|
-
return {col: len(table.data) for col in table.data[0].keys()}
|
341
|
-
else:
|
342
|
-
raise ValueError("Invalid data type in summary table")
|
343
|
-
|
344
|
-
def _find_sensitive_columns(self, dataset_columns, table_columns):
|
345
|
-
return [
|
346
|
-
col
|
347
|
-
for col, row_count in table_columns.items()
|
348
|
-
if col in dataset_columns and row_count == dataset_columns[col]
|
349
|
-
]
|
350
|
-
|
351
|
-
def _log_sensitive_data_warning(self, sensitive_columns):
|
352
|
-
logger.warning(
|
353
|
-
"Sensitive data in metric summary table. Not logging to API automatically. "
|
354
|
-
"Pass `unsafe=True` to result.log() method to override manually."
|
355
|
-
)
|
356
|
-
logger.warning(
|
357
|
-
f"The following columns are present in the table with matching row counts: {sensitive_columns}"
|
358
|
-
)
|
359
|
-
|
360
|
-
async def log_async(
|
361
|
-
self, section_id: str = None, position: int = None, unsafe=False
|
362
|
-
):
|
363
|
-
tasks = [] # collect tasks to run in parallel (async)
|
364
|
-
|
365
|
-
if self.scalar is not None:
|
366
|
-
# scalars (unit metrics) are logged as key-value pairs associated with the inventory model
|
367
|
-
tasks.append(
|
368
|
-
api_client.alog_metric(
|
369
|
-
key=self.result_id,
|
370
|
-
value=self.scalar,
|
371
|
-
inputs=self.inputs,
|
372
|
-
params=self.params,
|
373
|
-
)
|
374
|
-
)
|
375
|
-
|
376
|
-
if self.metric:
|
377
|
-
if self.metric.summary and not unsafe:
|
378
|
-
self.metric.summary = self._get_filtered_summary()
|
379
|
-
|
380
|
-
tasks.append(
|
381
|
-
api_client.log_metric_result(
|
382
|
-
metric=self.metric,
|
383
|
-
inputs=self.inputs,
|
384
|
-
output_template=self.output_template,
|
385
|
-
section_id=section_id,
|
386
|
-
position=position,
|
387
|
-
)
|
388
|
-
)
|
389
|
-
|
390
|
-
if self.figures:
|
391
|
-
tasks.extend([api_client.log_figure(figure) for figure in self.figures])
|
392
|
-
|
393
|
-
if hasattr(self, "result_metadata") and self.result_metadata:
|
394
|
-
description = self.result_metadata[0].get("text", "")
|
395
|
-
if isinstance(description, DescriptionFuture):
|
396
|
-
description = description.get_description()
|
397
|
-
self.result_metadata[0]["text"] = description
|
398
|
-
|
399
|
-
for metadata in self.result_metadata:
|
400
|
-
tasks.append(
|
401
|
-
update_metadata(
|
402
|
-
content_id=metadata["content_id"],
|
403
|
-
text=metadata.get("text", ""),
|
404
|
-
_json=metadata.get("json"),
|
405
|
-
)
|
406
|
-
)
|
407
|
-
|
408
|
-
return await asyncio.gather(*tasks)
|
409
|
-
|
410
|
-
|
411
|
-
@dataclass
|
412
|
-
class ThresholdTestResultWrapper(ResultWrapper):
|
413
|
-
"""
|
414
|
-
Result wrapper for test results produced by the tests that run as part of a test suite
|
415
|
-
"""
|
416
|
-
|
417
|
-
name: str = "Threshold Test"
|
418
|
-
figures: Optional[List[Figure]] = None
|
419
|
-
test_results: ThresholdTestResults = None
|
420
|
-
inputs: List[str] = None
|
421
|
-
|
422
|
-
def __repr__(self) -> str:
|
423
|
-
if self.test_results:
|
424
|
-
return (
|
425
|
-
f'{self.__class__.__name__}(result_id="{self.result_id}", test_results)'
|
426
|
-
)
|
427
|
-
else:
|
428
|
-
return f'{self.__class__.__name__}(result_id="{self.result_id}", figures)'
|
429
|
-
|
430
|
-
def to_widget(self):
|
431
|
-
vbox_children = []
|
432
|
-
description_html = []
|
433
|
-
|
434
|
-
description_html.append(
|
435
|
-
f"""
|
436
|
-
<h1>{test_id_to_name(self.test_results.test_name)} {"✅" if self.test_results.passed else "❌"}</h1>
|
437
|
-
"""
|
438
|
-
)
|
439
|
-
|
440
|
-
if self.result_metadata:
|
441
|
-
metric_description = self.result_metadata[0].get("text", "")
|
442
|
-
if isinstance(metric_description, DescriptionFuture):
|
443
|
-
metric_description = metric_description.get_description()
|
444
|
-
self.result_metadata[0]["text"] = metric_description
|
445
|
-
|
446
|
-
description_html.append(metric_description)
|
447
|
-
|
448
|
-
test_params = json.dumps(self.test_results.params, cls=NumpyEncoder, indent=2)
|
449
|
-
description_html.append(
|
450
|
-
f"""
|
451
|
-
<h4>Test Parameters</h4>
|
452
|
-
<pre>{test_params}</pre>
|
453
|
-
"""
|
454
|
-
)
|
455
|
-
|
456
|
-
vbox_children.append(HTML("".join(description_html)))
|
457
|
-
|
458
|
-
if self.test_results.summary:
|
459
|
-
vbox_children.append(HTML("<h3>Tables</h3>"))
|
460
|
-
vbox_children.extend(_summary_tables_to_widget(self.test_results.summary))
|
461
|
-
|
462
|
-
if self.figures:
|
463
|
-
vbox_children.append(HTML("<h3>Plots</h3>"))
|
464
|
-
plot_widgets = plot_figures(self.figures)
|
465
|
-
vbox_children.append(plot_widgets)
|
466
|
-
|
467
|
-
return VBox(vbox_children)
|
468
|
-
|
469
|
-
async def log_async(self, section_id: str = None, position: int = None):
|
470
|
-
tasks = [
|
471
|
-
api_client.log_test_result(
|
472
|
-
self.test_results, self.inputs, section_id, position
|
473
|
-
)
|
474
|
-
]
|
475
|
-
|
476
|
-
if self.figures:
|
477
|
-
tasks.extend([api_client.log_figure(figure) for figure in self.figures])
|
478
|
-
|
479
|
-
if hasattr(self, "result_metadata") and self.result_metadata:
|
480
|
-
description = self.result_metadata[0].get("text", "")
|
481
|
-
if isinstance(description, DescriptionFuture):
|
482
|
-
description = description.get_description()
|
483
|
-
self.result_metadata[0]["text"] = description
|
484
|
-
|
485
|
-
for metadata in self.result_metadata:
|
486
|
-
tasks.append(update_metadata(metadata["content_id"], metadata["text"]))
|
487
|
-
|
488
|
-
await asyncio.gather(*tasks)
|
validmind/vm_models/test/test.py
DELETED
@@ -1,103 +0,0 @@
|
|
1
|
-
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
-
# See the LICENSE file in the root of this repository for details.
|
3
|
-
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
-
|
5
|
-
"""Base Class for Metric, ThresholdTest and any other test type"""
|
6
|
-
|
7
|
-
from abc import abstractmethod
|
8
|
-
from dataclasses import dataclass
|
9
|
-
from inspect import getdoc
|
10
|
-
from typing import ClassVar, List
|
11
|
-
from uuid import uuid4
|
12
|
-
|
13
|
-
from ..test_context import TestUtils
|
14
|
-
from .result_wrapper import ResultWrapper
|
15
|
-
|
16
|
-
|
17
|
-
@dataclass
|
18
|
-
class Test(TestUtils):
|
19
|
-
# Class Variables
|
20
|
-
name: ClassVar[str] = "" # should be overridden by leaf classes
|
21
|
-
test_type: ClassVar[str] # should be overridden by parent classes
|
22
|
-
tasks: List[str] = None # should be overridden by leaf classes
|
23
|
-
tags: List[str] = None # should be overridden by leaf classes
|
24
|
-
|
25
|
-
required_inputs: ClassVar[List[str]] = None # should be overridden by leaf classes
|
26
|
-
default_params: ClassVar[dict] = None # should be overridden by leaf classes
|
27
|
-
|
28
|
-
# Instance Variables
|
29
|
-
_ref_id: str = None # unique identifier (populated at init)
|
30
|
-
_section_id: str = None # which section of template this test belongs to
|
31
|
-
test_id: str = None # populated when loading tests from suites
|
32
|
-
result: ResultWrapper = None # type should be overridden by parent classes
|
33
|
-
|
34
|
-
params: dict = None # populated by test suite from user-passed config
|
35
|
-
|
36
|
-
output_template: str = None # optional output template
|
37
|
-
|
38
|
-
generate_description: bool = (
|
39
|
-
True # whether to generate a description when caching result
|
40
|
-
)
|
41
|
-
|
42
|
-
def __post_init__(self):
|
43
|
-
"""
|
44
|
-
Set default params if not provided
|
45
|
-
"""
|
46
|
-
if not self.test_id:
|
47
|
-
raise Exception(
|
48
|
-
"test_id is missing. It must be passed when initializing the test"
|
49
|
-
)
|
50
|
-
|
51
|
-
self._ref_id = str(uuid4())
|
52
|
-
self.key = (
|
53
|
-
self.test_id
|
54
|
-
) # for backwards compatibility - figures really should get keyed automatically
|
55
|
-
|
56
|
-
# TODO: add validation for required inputs
|
57
|
-
if self.default_params is None:
|
58
|
-
self.default_params = {}
|
59
|
-
if self.required_inputs is None:
|
60
|
-
self.required_inputs = []
|
61
|
-
if self.tags is None:
|
62
|
-
self.tags = []
|
63
|
-
if self.tasks is None:
|
64
|
-
self.tasks = []
|
65
|
-
|
66
|
-
self.params = {
|
67
|
-
**(self.default_params or {}),
|
68
|
-
**(self.params if self.params is not None else {}),
|
69
|
-
}
|
70
|
-
|
71
|
-
def description(self):
|
72
|
-
"""
|
73
|
-
Return the test description. May be overridden by subclasses. Defaults
|
74
|
-
to returning the class' docstring
|
75
|
-
"""
|
76
|
-
return getdoc(self).strip()
|
77
|
-
|
78
|
-
@abstractmethod
|
79
|
-
def summary(self, *args, **kwargs):
|
80
|
-
"""
|
81
|
-
Return the summary. Should be overridden by subclasses.
|
82
|
-
"""
|
83
|
-
raise NotImplementedError("base class method should not be called")
|
84
|
-
|
85
|
-
@abstractmethod
|
86
|
-
def run(self, *args, **kwargs):
|
87
|
-
"""
|
88
|
-
Run the calculation and cache its results
|
89
|
-
"""
|
90
|
-
raise NotImplementedError("base class method should not be called")
|
91
|
-
|
92
|
-
@abstractmethod
|
93
|
-
def cache_results(self, *args, **kwargs):
|
94
|
-
"""
|
95
|
-
Cache the results of the calculation
|
96
|
-
"""
|
97
|
-
raise NotImplementedError("base class method should not be called")
|
98
|
-
|
99
|
-
def log(self):
|
100
|
-
"""
|
101
|
-
Log the test results to ValidMind
|
102
|
-
"""
|
103
|
-
return self.result.log()
|
@@ -1,106 +0,0 @@
|
|
1
|
-
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
-
# See the LICENSE file in the root of this repository for details.
|
3
|
-
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
-
|
5
|
-
"""
|
6
|
-
(Threshold)Test class wrapper. Our API exposes the concept of of a
|
7
|
-
Test (as test_results) but we'll refer to it as a ThresholdTest to
|
8
|
-
avoid confusion with the "tests" in the general data science/modeling sense.
|
9
|
-
"""
|
10
|
-
|
11
|
-
from dataclasses import dataclass
|
12
|
-
from typing import ClassVar, List, Optional
|
13
|
-
|
14
|
-
from ...ai.test_descriptions import get_description_metadata
|
15
|
-
from ..figure import Figure
|
16
|
-
from .result_summary import ResultSummary, ResultTable
|
17
|
-
from .result_wrapper import ThresholdTestResultWrapper
|
18
|
-
from .test import Test
|
19
|
-
from .threshold_test_result import ThresholdTestResult, ThresholdTestResults
|
20
|
-
|
21
|
-
|
22
|
-
@dataclass
|
23
|
-
class ThresholdTest(Test):
|
24
|
-
"""
|
25
|
-
A threshold test is a combination of a metric/plot we track and a
|
26
|
-
corresponding set of parameters and thresholds values that allow
|
27
|
-
us to determine whether the metric/plot passes or fails.
|
28
|
-
"""
|
29
|
-
|
30
|
-
# Class Variables
|
31
|
-
test_type: ClassVar[str] = "ThresholdTest"
|
32
|
-
category: ClassVar[str] # should be overridden by test classes
|
33
|
-
|
34
|
-
# Instance Variables
|
35
|
-
result: ThresholdTestResults = None # populated by cache_results() method
|
36
|
-
|
37
|
-
def summary(self, results: Optional[List[ThresholdTestResult]], all_passed: bool):
|
38
|
-
"""
|
39
|
-
Return the threshold test summary. May be overridden by subclasses. Defaults to showing
|
40
|
-
a table with test_name(optional), column and passed.
|
41
|
-
|
42
|
-
The test summary allows renderers (e.g. Word and ValidMind UI) to display a
|
43
|
-
short summary of the test results.
|
44
|
-
"""
|
45
|
-
if results is None:
|
46
|
-
return None
|
47
|
-
|
48
|
-
results_table = []
|
49
|
-
for test_result in results:
|
50
|
-
result_object = {
|
51
|
-
"passed": test_result.passed,
|
52
|
-
}
|
53
|
-
|
54
|
-
if test_result.test_name is not None:
|
55
|
-
result_object["test_name"] = test_result.test_name
|
56
|
-
if test_result.column is not None:
|
57
|
-
result_object["column"] = test_result.column
|
58
|
-
|
59
|
-
results_table.append(result_object)
|
60
|
-
|
61
|
-
return ResultSummary(results=[ResultTable(data=results_table)])
|
62
|
-
|
63
|
-
def cache_results(
|
64
|
-
self,
|
65
|
-
test_results_list: List[ThresholdTestResult],
|
66
|
-
passed: bool,
|
67
|
-
figures: Optional[List[Figure]] = None,
|
68
|
-
):
|
69
|
-
"""
|
70
|
-
Cache the individual results of the threshold test as a list of ThresholdTestResult objects
|
71
|
-
|
72
|
-
Args:
|
73
|
-
result (List[ThresholdTestResult]): The results of the threshold test
|
74
|
-
passed (bool): Whether the threshold test passed or failed
|
75
|
-
|
76
|
-
Returns:
|
77
|
-
TestSuiteResult: The test suite result object
|
78
|
-
"""
|
79
|
-
result_summary = self.summary(test_results_list, passed)
|
80
|
-
|
81
|
-
self.result = ThresholdTestResultWrapper(
|
82
|
-
result_id=self.test_id,
|
83
|
-
result_description=self.description(),
|
84
|
-
result_metadata=[
|
85
|
-
get_description_metadata(
|
86
|
-
test_id=self.test_id,
|
87
|
-
default_description=self.description(),
|
88
|
-
summary=result_summary.serialize(),
|
89
|
-
figures=figures,
|
90
|
-
prefix="test_description",
|
91
|
-
should_generate=self.generate_description,
|
92
|
-
)
|
93
|
-
],
|
94
|
-
inputs=self.get_accessed_inputs(),
|
95
|
-
test_results=ThresholdTestResults(
|
96
|
-
test_name=self.test_id,
|
97
|
-
ref_id=self._ref_id,
|
98
|
-
params=self.params,
|
99
|
-
passed=passed,
|
100
|
-
results=test_results_list,
|
101
|
-
summary=result_summary,
|
102
|
-
),
|
103
|
-
figures=figures,
|
104
|
-
)
|
105
|
-
|
106
|
-
return self.result
|