validmind 2.5.24__py3-none-any.whl → 2.6.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +8 -17
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +66 -85
- validmind/ai/test_result_description/context.py +2 -2
- validmind/ai/utils.py +26 -1
- validmind/api_client.py +43 -79
- validmind/client.py +5 -7
- validmind/client_config.py +1 -1
- validmind/datasets/__init__.py +1 -1
- validmind/datasets/classification/customer_churn.py +7 -5
- validmind/datasets/nlp/__init__.py +2 -2
- validmind/errors.py +6 -10
- validmind/html_templates/content_blocks.py +18 -16
- validmind/logging.py +21 -16
- validmind/tests/__init__.py +28 -5
- validmind/tests/__types__.py +186 -170
- validmind/tests/_store.py +7 -21
- validmind/tests/comparison.py +362 -0
- validmind/tests/data_validation/ACFandPACFPlot.py +44 -73
- validmind/tests/data_validation/ADF.py +49 -83
- validmind/tests/data_validation/AutoAR.py +59 -96
- validmind/tests/data_validation/AutoMA.py +59 -96
- validmind/tests/data_validation/AutoStationarity.py +66 -114
- validmind/tests/data_validation/ClassImbalance.py +48 -117
- validmind/tests/data_validation/DatasetDescription.py +180 -209
- validmind/tests/data_validation/DatasetSplit.py +50 -75
- validmind/tests/data_validation/DescriptiveStatistics.py +59 -85
- validmind/tests/data_validation/{DFGLSArch.py → DickeyFullerGLS.py} +44 -76
- validmind/tests/data_validation/Duplicates.py +21 -90
- validmind/tests/data_validation/EngleGrangerCoint.py +53 -75
- validmind/tests/data_validation/HighCardinality.py +32 -80
- validmind/tests/data_validation/HighPearsonCorrelation.py +29 -97
- validmind/tests/data_validation/IQROutliersBarPlot.py +63 -94
- validmind/tests/data_validation/IQROutliersTable.py +40 -80
- validmind/tests/data_validation/IsolationForestOutliers.py +41 -63
- validmind/tests/data_validation/KPSS.py +33 -81
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +47 -95
- validmind/tests/data_validation/MissingValues.py +17 -58
- validmind/tests/data_validation/MissingValuesBarPlot.py +61 -87
- validmind/tests/data_validation/PhillipsPerronArch.py +56 -79
- validmind/tests/data_validation/RollingStatsPlot.py +50 -81
- validmind/tests/data_validation/SeasonalDecompose.py +102 -184
- validmind/tests/data_validation/Skewness.py +27 -64
- validmind/tests/data_validation/SpreadPlot.py +34 -57
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +46 -65
- validmind/tests/data_validation/TabularDateTimeHistograms.py +23 -45
- validmind/tests/data_validation/TabularNumericalHistograms.py +27 -46
- validmind/tests/data_validation/TargetRateBarPlots.py +54 -93
- validmind/tests/data_validation/TimeSeriesFrequency.py +48 -133
- validmind/tests/data_validation/TimeSeriesHistogram.py +24 -3
- validmind/tests/data_validation/TimeSeriesLinePlot.py +29 -47
- validmind/tests/data_validation/TimeSeriesMissingValues.py +59 -135
- validmind/tests/data_validation/TimeSeriesOutliers.py +54 -171
- validmind/tests/data_validation/TooManyZeroValues.py +21 -70
- validmind/tests/data_validation/UniqueRows.py +23 -62
- validmind/tests/data_validation/WOEBinPlots.py +83 -109
- validmind/tests/data_validation/WOEBinTable.py +28 -69
- validmind/tests/data_validation/ZivotAndrewsArch.py +33 -75
- validmind/tests/data_validation/nlp/CommonWords.py +49 -57
- validmind/tests/data_validation/nlp/Hashtags.py +27 -49
- validmind/tests/data_validation/nlp/LanguageDetection.py +7 -13
- validmind/tests/data_validation/nlp/Mentions.py +32 -63
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +89 -14
- validmind/tests/data_validation/nlp/Punctuations.py +63 -47
- validmind/tests/data_validation/nlp/Sentiment.py +4 -0
- validmind/tests/data_validation/nlp/StopWords.py +62 -91
- validmind/tests/data_validation/nlp/TextDescription.py +116 -159
- validmind/tests/data_validation/nlp/Toxicity.py +12 -4
- validmind/tests/decorator.py +33 -242
- validmind/tests/load.py +212 -153
- validmind/tests/model_validation/BertScore.py +13 -7
- validmind/tests/model_validation/BleuScore.py +4 -0
- validmind/tests/model_validation/ClusterSizeDistribution.py +24 -47
- validmind/tests/model_validation/ContextualRecall.py +3 -0
- validmind/tests/model_validation/FeaturesAUC.py +43 -74
- validmind/tests/model_validation/MeteorScore.py +3 -0
- validmind/tests/model_validation/RegardScore.py +5 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +54 -75
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +10 -33
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +11 -29
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +19 -31
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +40 -49
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +29 -15
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +25 -11
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +28 -13
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +67 -38
- validmind/tests/model_validation/embeddings/utils.py +53 -0
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +37 -32
- validmind/tests/model_validation/ragas/{AspectCritique.py → AspectCritic.py} +33 -27
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +44 -41
- validmind/tests/model_validation/ragas/ContextPrecision.py +40 -35
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +133 -0
- validmind/tests/model_validation/ragas/ContextRecall.py +40 -35
- validmind/tests/model_validation/ragas/Faithfulness.py +42 -30
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +59 -35
- validmind/tests/model_validation/ragas/{AnswerRelevance.py → ResponseRelevancy.py} +52 -41
- validmind/tests/model_validation/ragas/{AnswerSimilarity.py → SemanticSimilarity.py} +39 -34
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +13 -16
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +13 -16
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +51 -89
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +31 -61
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +118 -83
- validmind/tests/model_validation/sklearn/CompletenessScore.py +13 -16
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +62 -94
- validmind/tests/model_validation/sklearn/FeatureImportance.py +7 -8
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -15
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +12 -15
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +23 -53
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +60 -74
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +16 -84
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +22 -72
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +29 -78
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +52 -82
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +51 -145
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +60 -78
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +130 -172
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +26 -55
- validmind/tests/model_validation/sklearn/ROCCurve.py +43 -77
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +41 -94
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +47 -136
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +164 -208
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +54 -99
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +50 -124
- validmind/tests/model_validation/sklearn/VMeasure.py +12 -15
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +225 -281
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +40 -45
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +22 -47
- validmind/tests/model_validation/statsmodels/Lilliefors.py +17 -28
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +37 -81
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +37 -105
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +62 -166
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +57 -119
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +20 -57
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +47 -80
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +2 -0
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -2
- validmind/tests/output.py +120 -0
- validmind/tests/prompt_validation/Bias.py +55 -98
- validmind/tests/prompt_validation/Clarity.py +56 -99
- validmind/tests/prompt_validation/Conciseness.py +63 -101
- validmind/tests/prompt_validation/Delimitation.py +48 -89
- validmind/tests/prompt_validation/NegativeInstruction.py +62 -96
- validmind/tests/prompt_validation/Robustness.py +80 -121
- validmind/tests/prompt_validation/Specificity.py +61 -95
- validmind/tests/prompt_validation/ai_powered_test.py +2 -2
- validmind/tests/run.py +314 -496
- validmind/tests/test_providers.py +109 -79
- validmind/tests/utils.py +91 -0
- validmind/unit_metrics/__init__.py +16 -155
- validmind/unit_metrics/classification/F1.py +1 -0
- validmind/unit_metrics/classification/Precision.py +1 -0
- validmind/unit_metrics/classification/ROC_AUC.py +1 -0
- validmind/unit_metrics/classification/Recall.py +1 -0
- validmind/unit_metrics/regression/AdjustedRSquaredScore.py +1 -0
- validmind/unit_metrics/regression/GiniCoefficient.py +1 -0
- validmind/unit_metrics/regression/HuberLoss.py +1 -0
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -0
- validmind/unit_metrics/regression/MeanAbsoluteError.py +1 -0
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -0
- validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -0
- validmind/unit_metrics/regression/MeanSquaredError.py +1 -0
- validmind/unit_metrics/regression/QuantileLoss.py +1 -0
- validmind/unit_metrics/regression/RSquaredScore.py +2 -1
- validmind/unit_metrics/regression/RootMeanSquaredError.py +1 -0
- validmind/utils.py +66 -17
- validmind/vm_models/__init__.py +2 -17
- validmind/vm_models/dataset/dataset.py +31 -4
- validmind/vm_models/figure.py +7 -37
- validmind/vm_models/model.py +3 -0
- validmind/vm_models/result/__init__.py +7 -0
- validmind/vm_models/result/result.jinja +21 -0
- validmind/vm_models/result/result.py +337 -0
- validmind/vm_models/result/utils.py +160 -0
- validmind/vm_models/test_suite/runner.py +16 -54
- validmind/vm_models/test_suite/summary.py +3 -3
- validmind/vm_models/test_suite/test.py +43 -77
- validmind/vm_models/test_suite/test_suite.py +8 -40
- validmind-2.6.7.dist-info/METADATA +137 -0
- {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/RECORD +182 -189
- validmind/tests/data_validation/AutoSeasonality.py +0 -190
- validmind/tests/metadata.py +0 -59
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -176
- validmind/tests/model_validation/ragas/ContextUtilization.py +0 -161
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -80
- validmind/unit_metrics/composite.py +0 -238
- validmind/vm_models/test/metric.py +0 -98
- validmind/vm_models/test/metric_result.py +0 -61
- validmind/vm_models/test/output_template.py +0 -55
- validmind/vm_models/test/result_summary.py +0 -76
- validmind/vm_models/test/result_wrapper.py +0 -488
- validmind/vm_models/test/test.py +0 -103
- validmind/vm_models/test/threshold_test.py +0 -106
- validmind/vm_models/test/threshold_test_result.py +0 -75
- validmind/vm_models/test_context.py +0 -259
- validmind-2.5.24.dist-info/METADATA +0 -118
- {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/LICENSE +0 -0
- {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/WHEEL +0 -0
- {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/entry_points.txt +0 -0
@@ -1,238 +0,0 @@
|
|
1
|
-
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
-
# See the LICENSE file in the root of this repository for details.
|
3
|
-
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
-
|
5
|
-
from dataclasses import dataclass
|
6
|
-
from typing import List, Tuple, Union
|
7
|
-
from uuid import uuid4
|
8
|
-
|
9
|
-
from ..ai.test_descriptions import get_description_metadata
|
10
|
-
from ..logging import get_logger
|
11
|
-
from ..tests.decorator import _inspect_signature
|
12
|
-
from ..utils import run_async, test_id_to_name
|
13
|
-
from ..vm_models.test.metric import Metric
|
14
|
-
from ..vm_models.test.metric_result import MetricResult
|
15
|
-
from ..vm_models.test.result_summary import ResultSummary, ResultTable
|
16
|
-
from ..vm_models.test.result_wrapper import MetricResultWrapper
|
17
|
-
from . import load_metric, run_metric
|
18
|
-
|
19
|
-
logger = get_logger(__name__)
|
20
|
-
|
21
|
-
|
22
|
-
@dataclass
|
23
|
-
class CompositeMetric(Metric):
|
24
|
-
unit_metrics: List[str] = None
|
25
|
-
|
26
|
-
def __post_init__(self):
|
27
|
-
if self._unit_metrics:
|
28
|
-
self.unit_metrics = self._unit_metrics
|
29
|
-
elif self.unit_metrics is None:
|
30
|
-
raise ValueError("unit_metrics must be provided")
|
31
|
-
|
32
|
-
if hasattr(self, "_output_template") and self._output_template:
|
33
|
-
self.output_template = self._output_template
|
34
|
-
|
35
|
-
def run(self):
|
36
|
-
self.result = run_metrics(
|
37
|
-
test_id=self.test_id,
|
38
|
-
metric_ids=self.unit_metrics,
|
39
|
-
description=self.description(),
|
40
|
-
inputs=self._get_input_dict(),
|
41
|
-
accessed_inputs=self.get_accessed_inputs(),
|
42
|
-
params=self.params,
|
43
|
-
output_template=self.output_template,
|
44
|
-
show=False,
|
45
|
-
generate_description=self.generate_description,
|
46
|
-
)
|
47
|
-
|
48
|
-
return self.result
|
49
|
-
|
50
|
-
def summary(self, result: dict):
|
51
|
-
return ResultSummary(results=[ResultTable(data=[result])])
|
52
|
-
|
53
|
-
|
54
|
-
def load_composite_metric(
|
55
|
-
test_id: str = None,
|
56
|
-
metric_name: str = None,
|
57
|
-
unit_metrics: List[str] = None,
|
58
|
-
output_template: str = None,
|
59
|
-
) -> Tuple[Union[None, str], Union[CompositeMetric, None]]:
|
60
|
-
# this function can either create a composite metric from a list of unit metrics or
|
61
|
-
# load a stored composite metric based on the test id
|
62
|
-
|
63
|
-
# TODO: figure out this circular import thing:
|
64
|
-
from ..api_client import get_metadata
|
65
|
-
|
66
|
-
if test_id:
|
67
|
-
# get the unit metric ids and output template (if any) from the metadata
|
68
|
-
try:
|
69
|
-
unit_metrics = run_async(
|
70
|
-
get_metadata, f"composite_metric_def:{test_id}:unit_metrics"
|
71
|
-
)["json"]
|
72
|
-
output_template = run_async(
|
73
|
-
get_metadata, f"composite_metric_def:{test_id}:output_template"
|
74
|
-
)["json"]["output_template"]
|
75
|
-
except Exception:
|
76
|
-
return f"Could not load composite metric {test_id}", None
|
77
|
-
|
78
|
-
description = f"""
|
79
|
-
Composite metric built from the following unit metrics:
|
80
|
-
{', '.join([metric_id.split('.')[-1] for metric_id in unit_metrics])}
|
81
|
-
"""
|
82
|
-
|
83
|
-
class_def = type(
|
84
|
-
test_id.split(".")[-1] if test_id else metric_name,
|
85
|
-
(CompositeMetric,),
|
86
|
-
{
|
87
|
-
"__doc__": description,
|
88
|
-
"_unit_metrics": unit_metrics,
|
89
|
-
"_output_template": output_template,
|
90
|
-
},
|
91
|
-
)
|
92
|
-
|
93
|
-
required_inputs = set()
|
94
|
-
for metric_id in unit_metrics:
|
95
|
-
inputs, _ = _inspect_signature(load_metric(metric_id))
|
96
|
-
required_inputs.update(inputs.keys())
|
97
|
-
|
98
|
-
class_def.required_inputs = list(required_inputs)
|
99
|
-
|
100
|
-
return None, class_def
|
101
|
-
|
102
|
-
|
103
|
-
def run_metrics(
|
104
|
-
name: str = None,
|
105
|
-
metric_ids: List[str] = None,
|
106
|
-
description: str = None,
|
107
|
-
output_template: str = None,
|
108
|
-
inputs: dict = None,
|
109
|
-
accessed_inputs: List[str] = None,
|
110
|
-
params: dict = None,
|
111
|
-
test_id: str = None,
|
112
|
-
show: bool = True,
|
113
|
-
generate_description: bool = True,
|
114
|
-
) -> MetricResultWrapper:
|
115
|
-
"""Run a composite metric
|
116
|
-
|
117
|
-
Composite metrics are metrics that are composed of multiple unit metrics. This
|
118
|
-
works by running individual unit metrics and then combining the results into a
|
119
|
-
single "MetricResult" object that can be logged and displayed just like any other
|
120
|
-
metric result. The special thing about composite metrics is that when they are
|
121
|
-
logged to the platform, metadata describing the unit metrics and output template
|
122
|
-
used to generate the composite metric is also logged. This means that by grabbing
|
123
|
-
the metadata for a composite metric (identified by the test ID
|
124
|
-
`validmind.composite_metric.<name>`) the framework can rebuild and rerun it at
|
125
|
-
any time.
|
126
|
-
|
127
|
-
Args:
|
128
|
-
name (str, optional): Name of the composite metric. Required if test_id is not
|
129
|
-
provided. Defaults to None.
|
130
|
-
metric_ids (list[str]): List of unit metric IDs to run. Required.
|
131
|
-
description (str, optional): Description of the composite metric. Defaults to
|
132
|
-
None.
|
133
|
-
output_template (_type_, optional): Output template to customize the result
|
134
|
-
table.
|
135
|
-
inputs (_type_, optional): Inputs to pass to the unit metrics. Defaults to None
|
136
|
-
accessed_inputs (_type_, optional): Inputs that were accessed when running the
|
137
|
-
unit metrics - used for input tracking. Defaults to None.
|
138
|
-
params (_type_, optional): Parameters to pass to the unit metrics. Defaults to
|
139
|
-
None.
|
140
|
-
test_id (str, optional): Test ID of the composite metric. Required if name is
|
141
|
-
not provided. Defaults to None.
|
142
|
-
show (bool, optional): Whether to show the result immediately. Defaults to True
|
143
|
-
|
144
|
-
Raises:
|
145
|
-
ValueError: If metric_ids is not provided
|
146
|
-
ValueError: If name or key is not provided
|
147
|
-
|
148
|
-
Returns:
|
149
|
-
MetricResultWrapper: The result wrapper object
|
150
|
-
"""
|
151
|
-
if not metric_ids:
|
152
|
-
raise ValueError("metric_ids must be provided")
|
153
|
-
|
154
|
-
if not name and not test_id:
|
155
|
-
raise ValueError("name or key must be provided")
|
156
|
-
|
157
|
-
# if name is provided, make sure to squash it into a camel case string
|
158
|
-
if name:
|
159
|
-
name = "".join(word[0].upper() + word[1:] for word in name.split())
|
160
|
-
|
161
|
-
results = {}
|
162
|
-
|
163
|
-
for metric_id in metric_ids:
|
164
|
-
metric_name = test_id_to_name(metric_id)
|
165
|
-
results[metric_name] = run_metric(
|
166
|
-
metric_id=metric_id,
|
167
|
-
inputs=inputs,
|
168
|
-
params=params,
|
169
|
-
show=False,
|
170
|
-
value_only=True,
|
171
|
-
)
|
172
|
-
|
173
|
-
test_id = f"validmind.composite_metric.{name}" if not test_id else test_id
|
174
|
-
|
175
|
-
if not output_template:
|
176
|
-
|
177
|
-
def row(name):
|
178
|
-
return f"""
|
179
|
-
<tr>
|
180
|
-
<td><strong>{name}</strong></td>
|
181
|
-
<td>{{{{ value['{name}'] | number }}}}</td>
|
182
|
-
</tr>
|
183
|
-
"""
|
184
|
-
|
185
|
-
output_template = f"""
|
186
|
-
<h1{test_id_to_name(test_id)}</h1>
|
187
|
-
<table>
|
188
|
-
<thead>
|
189
|
-
<tr>
|
190
|
-
<th>Metric</th>
|
191
|
-
<th>Value</th>
|
192
|
-
</tr>
|
193
|
-
</thead>
|
194
|
-
<tbody>
|
195
|
-
{"".join([row(name) for name in results.keys()])}
|
196
|
-
</tbody>
|
197
|
-
</table>
|
198
|
-
<style>
|
199
|
-
th, td {{
|
200
|
-
padding: 5px;
|
201
|
-
text-align: left;
|
202
|
-
}}
|
203
|
-
</style>
|
204
|
-
"""
|
205
|
-
|
206
|
-
result_summary = ResultSummary(results=[ResultTable(data=[results])])
|
207
|
-
result_wrapper = MetricResultWrapper(
|
208
|
-
result_id=test_id,
|
209
|
-
result_metadata=[
|
210
|
-
get_description_metadata(
|
211
|
-
test_id=test_id,
|
212
|
-
default_description=description,
|
213
|
-
summary=result_summary.serialize(),
|
214
|
-
should_generate=generate_description,
|
215
|
-
),
|
216
|
-
{
|
217
|
-
"content_id": f"composite_metric_def:{test_id}:unit_metrics",
|
218
|
-
"json": metric_ids,
|
219
|
-
},
|
220
|
-
{
|
221
|
-
"content_id": f"composite_metric_def:{test_id}:output_template",
|
222
|
-
"json": {"output_template": output_template},
|
223
|
-
},
|
224
|
-
],
|
225
|
-
inputs=accessed_inputs,
|
226
|
-
output_template=output_template,
|
227
|
-
metric=MetricResult(
|
228
|
-
key=test_id,
|
229
|
-
ref_id=str(uuid4()),
|
230
|
-
value=results,
|
231
|
-
summary=result_summary,
|
232
|
-
),
|
233
|
-
)
|
234
|
-
|
235
|
-
if show:
|
236
|
-
result_wrapper.show()
|
237
|
-
|
238
|
-
return result_wrapper
|
@@ -1,98 +0,0 @@
|
|
1
|
-
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
-
# See the LICENSE file in the root of this repository for details.
|
3
|
-
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
-
|
5
|
-
"""
|
6
|
-
Class for storing ValidMind metric objects and associated
|
7
|
-
data for display and reporting purposes
|
8
|
-
"""
|
9
|
-
from abc import abstractmethod
|
10
|
-
from dataclasses import dataclass
|
11
|
-
from typing import ClassVar, List, Optional, Union
|
12
|
-
|
13
|
-
import pandas as pd
|
14
|
-
|
15
|
-
from ...ai.test_descriptions import get_description_metadata
|
16
|
-
from ...errors import MissingCacheResultsArgumentsError
|
17
|
-
from ..figure import Figure
|
18
|
-
from .metric_result import MetricResult
|
19
|
-
from .result_wrapper import MetricResultWrapper
|
20
|
-
from .test import Test
|
21
|
-
|
22
|
-
|
23
|
-
@dataclass
|
24
|
-
class Metric(Test):
|
25
|
-
"""
|
26
|
-
Metric objects track the schema supported by the ValidMind API
|
27
|
-
"""
|
28
|
-
|
29
|
-
# Class Variables
|
30
|
-
test_type: ClassVar[str] = "Metric"
|
31
|
-
|
32
|
-
type: ClassVar[str] = "" # type of metric: "training", "evaluation", etc.
|
33
|
-
scope: ClassVar[str] = "" # scope of metric: "training_dataset"
|
34
|
-
value_formatter: ClassVar[Optional[str]] = None # "records" or "key_values"
|
35
|
-
|
36
|
-
# Instance Variables
|
37
|
-
result: MetricResultWrapper = None # populated by cache_results() method
|
38
|
-
|
39
|
-
@abstractmethod
|
40
|
-
def summary(self, metric_value: Optional[Union[dict, list, pd.DataFrame]] = None):
|
41
|
-
"""
|
42
|
-
Return the metric summary. Should be overridden by subclasses. Defaults to None.
|
43
|
-
The metric summary allows renderers (e.g. Word and ValidMind UI) to display a
|
44
|
-
short summary of the metric results.
|
45
|
-
|
46
|
-
We return None here because the metric summary is optional.
|
47
|
-
"""
|
48
|
-
return None
|
49
|
-
|
50
|
-
def cache_results(
|
51
|
-
self,
|
52
|
-
metric_value: Optional[Union[dict, list, pd.DataFrame]] = None,
|
53
|
-
figures: Optional[List[Figure]] = None,
|
54
|
-
):
|
55
|
-
"""
|
56
|
-
Cache the results of the metric calculation and do any post-processing if needed
|
57
|
-
|
58
|
-
Args:
|
59
|
-
metric_value (Union[dict, list, pd.DataFrame]): The value of the metric
|
60
|
-
figures (Optional[object]): Any figures to attach to the test suite result
|
61
|
-
|
62
|
-
Returns:
|
63
|
-
TestSuiteResult: The test suite result object
|
64
|
-
"""
|
65
|
-
if metric_value is None and figures is None:
|
66
|
-
raise MissingCacheResultsArgumentsError(
|
67
|
-
"Metric must provide a metric value or figures to cache_results"
|
68
|
-
)
|
69
|
-
|
70
|
-
metric = MetricResult(
|
71
|
-
key=self.test_id,
|
72
|
-
ref_id=self._ref_id,
|
73
|
-
value=metric_value if metric_value is not None else {},
|
74
|
-
value_formatter=self.value_formatter,
|
75
|
-
summary=self.summary(metric_value),
|
76
|
-
)
|
77
|
-
|
78
|
-
self.result = MetricResultWrapper(
|
79
|
-
result_id=self.test_id,
|
80
|
-
result_description=self.description(),
|
81
|
-
result_metadata=[
|
82
|
-
(
|
83
|
-
get_description_metadata(
|
84
|
-
test_id=self.test_id,
|
85
|
-
default_description=self.description(),
|
86
|
-
summary=metric.serialize()["summary"],
|
87
|
-
figures=figures,
|
88
|
-
should_generate=self.generate_description,
|
89
|
-
)
|
90
|
-
)
|
91
|
-
],
|
92
|
-
metric=metric,
|
93
|
-
figures=figures,
|
94
|
-
inputs=self.get_accessed_inputs(),
|
95
|
-
output_template=self.output_template,
|
96
|
-
)
|
97
|
-
|
98
|
-
return self.result
|
@@ -1,61 +0,0 @@
|
|
1
|
-
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
-
# See the LICENSE file in the root of this repository for details.
|
3
|
-
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
-
|
5
|
-
"""
|
6
|
-
MetricResult wrapper
|
7
|
-
"""
|
8
|
-
|
9
|
-
|
10
|
-
from dataclasses import dataclass
|
11
|
-
from typing import Optional, Union
|
12
|
-
|
13
|
-
import pandas as pd
|
14
|
-
|
15
|
-
from ...errors import InvalidValueFormatterError
|
16
|
-
from ...utils import format_key_values, format_records
|
17
|
-
from .result_summary import ResultSummary
|
18
|
-
|
19
|
-
|
20
|
-
@dataclass
|
21
|
-
class MetricResult:
|
22
|
-
"""
|
23
|
-
MetricResult class definition. A MetricResult is returned by any internal method
|
24
|
-
that extracts metrics from a dataset or model, and returns 1) Metric and Figure
|
25
|
-
objects that can be sent to the API and 2) and plots and metadata for display purposes
|
26
|
-
"""
|
27
|
-
|
28
|
-
key: dict
|
29
|
-
ref_id: str
|
30
|
-
value: Union[dict, list, pd.DataFrame]
|
31
|
-
summary: Optional[ResultSummary] = None
|
32
|
-
value_formatter: Optional[str] = None
|
33
|
-
|
34
|
-
def serialize(self):
|
35
|
-
"""
|
36
|
-
Serializes the Metric to a dictionary so it can be sent to the API
|
37
|
-
"""
|
38
|
-
if self.value_formatter == "records":
|
39
|
-
value = format_records(self.value)
|
40
|
-
elif self.value_formatter == "key_values":
|
41
|
-
value = format_key_values(self.value)
|
42
|
-
elif self.value_formatter is not None:
|
43
|
-
raise InvalidValueFormatterError(
|
44
|
-
f"Invalid value_formatter: {self.value_formatter}. "
|
45
|
-
"Must be one of 'records' or 'key_values'"
|
46
|
-
)
|
47
|
-
else:
|
48
|
-
# TODO: we need to handle formatting arbitrary shapes of data
|
49
|
-
value = self.value
|
50
|
-
|
51
|
-
if isinstance(value, pd.DataFrame):
|
52
|
-
raise InvalidValueFormatterError(
|
53
|
-
"A DataFrame value was provided but no value_formatter was specified."
|
54
|
-
)
|
55
|
-
|
56
|
-
return {
|
57
|
-
"key": self.key,
|
58
|
-
"ref_id": self.ref_id,
|
59
|
-
"value": value,
|
60
|
-
"summary": self.summary.serialize() if self.summary else None,
|
61
|
-
}
|
@@ -1,55 +0,0 @@
|
|
1
|
-
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
-
# See the LICENSE file in the root of this repository for details.
|
3
|
-
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
-
|
5
|
-
from datetime import datetime
|
6
|
-
|
7
|
-
from dateutil import parser
|
8
|
-
from jinja2 import Environment
|
9
|
-
|
10
|
-
|
11
|
-
def format_date(value, format="%Y-%m-%d"):
|
12
|
-
if value is None:
|
13
|
-
return None
|
14
|
-
|
15
|
-
if isinstance(value, datetime):
|
16
|
-
return value.strftime(format)
|
17
|
-
|
18
|
-
return parser.parse(value).strftime(format)
|
19
|
-
|
20
|
-
|
21
|
-
def format_number(value, format="{:,.4f}"):
|
22
|
-
return format.format(value)
|
23
|
-
|
24
|
-
|
25
|
-
def _generate_empty_historical_data(value):
|
26
|
-
return [
|
27
|
-
{
|
28
|
-
"value": value,
|
29
|
-
"metadata": {
|
30
|
-
"created_at": format_date(datetime.now()),
|
31
|
-
},
|
32
|
-
}
|
33
|
-
]
|
34
|
-
|
35
|
-
|
36
|
-
class OutputTemplate:
|
37
|
-
def __init__(self, template_string, template_engine=None):
|
38
|
-
if template_engine is None:
|
39
|
-
template_engine = Environment()
|
40
|
-
template_engine.filters["date"] = format_date
|
41
|
-
template_engine.filters["number"] = format_number
|
42
|
-
|
43
|
-
self.template_engine = template_engine
|
44
|
-
self.template_string = template_string
|
45
|
-
|
46
|
-
def render(self, value, values_history=None):
|
47
|
-
template = self.template_engine.from_string(self.template_string)
|
48
|
-
|
49
|
-
if not values_history:
|
50
|
-
values_history = _generate_empty_historical_data(value)
|
51
|
-
|
52
|
-
return template.render(
|
53
|
-
value=value,
|
54
|
-
metric_history=values_history,
|
55
|
-
)
|
@@ -1,76 +0,0 @@
|
|
1
|
-
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
-
# See the LICENSE file in the root of this repository for details.
|
3
|
-
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
-
|
5
|
-
from dataclasses import dataclass
|
6
|
-
from typing import Any, List, Union
|
7
|
-
|
8
|
-
import pandas as pd
|
9
|
-
|
10
|
-
|
11
|
-
@dataclass
|
12
|
-
class ResultTableMetadata:
|
13
|
-
"""
|
14
|
-
A dataclass that holds the metadata of a table summary
|
15
|
-
"""
|
16
|
-
|
17
|
-
title: str
|
18
|
-
|
19
|
-
|
20
|
-
@dataclass
|
21
|
-
class ResultTable:
|
22
|
-
"""
|
23
|
-
A dataclass that holds the table summary of result
|
24
|
-
"""
|
25
|
-
|
26
|
-
data: Union[List[Any], pd.DataFrame]
|
27
|
-
type: str = "table"
|
28
|
-
metadata: ResultTableMetadata = None
|
29
|
-
|
30
|
-
def serialize(self, as_df=False):
|
31
|
-
"""
|
32
|
-
Serializes the Figure to a dictionary so it can be sent to the API.
|
33
|
-
|
34
|
-
This method accepts as_df parameter to return the data as a DataFrame
|
35
|
-
if we're returning the data to R.
|
36
|
-
"""
|
37
|
-
table_result = {
|
38
|
-
"type": self.type,
|
39
|
-
}
|
40
|
-
|
41
|
-
# Convert to a DataFrame so that we can round the values in a standard way
|
42
|
-
table_df = pd.DataFrame(self.data) if isinstance(self.data, list) else self.data
|
43
|
-
table_df = table_df.round(4)
|
44
|
-
|
45
|
-
if as_df:
|
46
|
-
table_result["data"] = table_df
|
47
|
-
else:
|
48
|
-
table_result["data"] = table_df.to_dict(orient="records")
|
49
|
-
|
50
|
-
if self.metadata is not None:
|
51
|
-
table_result["metadata"] = vars(self.metadata)
|
52
|
-
|
53
|
-
return table_result
|
54
|
-
|
55
|
-
|
56
|
-
@dataclass()
|
57
|
-
class ResultSummary:
|
58
|
-
"""
|
59
|
-
A dataclass that holds the summary of a metric or threshold test results
|
60
|
-
"""
|
61
|
-
|
62
|
-
results: List[ResultTable] # TBD other types of results
|
63
|
-
|
64
|
-
def add_result(self, result: ResultTable):
|
65
|
-
"""
|
66
|
-
Adds a result to the list of results
|
67
|
-
"""
|
68
|
-
if self.results is None:
|
69
|
-
self.results = []
|
70
|
-
self.results.append(result)
|
71
|
-
|
72
|
-
def serialize(self, as_df=False):
|
73
|
-
"""
|
74
|
-
Serializes the ResultSummary to a list of results
|
75
|
-
"""
|
76
|
-
return [result.serialize(as_df) for result in self.results]
|