validmind 2.5.25__py3-none-any.whl → 2.6.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +8 -17
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +66 -85
- validmind/ai/test_result_description/context.py +2 -2
- validmind/ai/utils.py +26 -1
- validmind/api_client.py +43 -79
- validmind/client.py +5 -7
- validmind/client_config.py +1 -1
- validmind/datasets/__init__.py +1 -1
- validmind/datasets/classification/customer_churn.py +7 -5
- validmind/datasets/nlp/__init__.py +2 -2
- validmind/errors.py +6 -10
- validmind/html_templates/content_blocks.py +18 -16
- validmind/logging.py +21 -16
- validmind/tests/__init__.py +28 -5
- validmind/tests/__types__.py +186 -170
- validmind/tests/_store.py +7 -21
- validmind/tests/comparison.py +362 -0
- validmind/tests/data_validation/ACFandPACFPlot.py +44 -73
- validmind/tests/data_validation/ADF.py +49 -83
- validmind/tests/data_validation/AutoAR.py +59 -96
- validmind/tests/data_validation/AutoMA.py +59 -96
- validmind/tests/data_validation/AutoStationarity.py +66 -114
- validmind/tests/data_validation/ClassImbalance.py +48 -117
- validmind/tests/data_validation/DatasetDescription.py +180 -209
- validmind/tests/data_validation/DatasetSplit.py +50 -75
- validmind/tests/data_validation/DescriptiveStatistics.py +59 -85
- validmind/tests/data_validation/{DFGLSArch.py → DickeyFullerGLS.py} +44 -76
- validmind/tests/data_validation/Duplicates.py +21 -90
- validmind/tests/data_validation/EngleGrangerCoint.py +53 -75
- validmind/tests/data_validation/HighCardinality.py +32 -80
- validmind/tests/data_validation/HighPearsonCorrelation.py +29 -97
- validmind/tests/data_validation/IQROutliersBarPlot.py +63 -94
- validmind/tests/data_validation/IQROutliersTable.py +40 -80
- validmind/tests/data_validation/IsolationForestOutliers.py +41 -63
- validmind/tests/data_validation/KPSS.py +33 -81
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +47 -95
- validmind/tests/data_validation/MissingValues.py +17 -58
- validmind/tests/data_validation/MissingValuesBarPlot.py +61 -87
- validmind/tests/data_validation/PhillipsPerronArch.py +56 -79
- validmind/tests/data_validation/RollingStatsPlot.py +50 -81
- validmind/tests/data_validation/SeasonalDecompose.py +102 -184
- validmind/tests/data_validation/Skewness.py +27 -64
- validmind/tests/data_validation/SpreadPlot.py +34 -57
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +46 -65
- validmind/tests/data_validation/TabularDateTimeHistograms.py +23 -45
- validmind/tests/data_validation/TabularNumericalHistograms.py +27 -46
- validmind/tests/data_validation/TargetRateBarPlots.py +54 -93
- validmind/tests/data_validation/TimeSeriesFrequency.py +48 -133
- validmind/tests/data_validation/TimeSeriesHistogram.py +24 -3
- validmind/tests/data_validation/TimeSeriesLinePlot.py +29 -47
- validmind/tests/data_validation/TimeSeriesMissingValues.py +59 -135
- validmind/tests/data_validation/TimeSeriesOutliers.py +54 -171
- validmind/tests/data_validation/TooManyZeroValues.py +21 -70
- validmind/tests/data_validation/UniqueRows.py +23 -62
- validmind/tests/data_validation/WOEBinPlots.py +83 -109
- validmind/tests/data_validation/WOEBinTable.py +28 -69
- validmind/tests/data_validation/ZivotAndrewsArch.py +33 -75
- validmind/tests/data_validation/nlp/CommonWords.py +49 -57
- validmind/tests/data_validation/nlp/Hashtags.py +27 -49
- validmind/tests/data_validation/nlp/LanguageDetection.py +7 -13
- validmind/tests/data_validation/nlp/Mentions.py +32 -63
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +89 -14
- validmind/tests/data_validation/nlp/Punctuations.py +63 -47
- validmind/tests/data_validation/nlp/Sentiment.py +4 -0
- validmind/tests/data_validation/nlp/StopWords.py +62 -91
- validmind/tests/data_validation/nlp/TextDescription.py +116 -159
- validmind/tests/data_validation/nlp/Toxicity.py +12 -4
- validmind/tests/decorator.py +33 -242
- validmind/tests/load.py +212 -153
- validmind/tests/model_validation/BertScore.py +13 -7
- validmind/tests/model_validation/BleuScore.py +4 -0
- validmind/tests/model_validation/ClusterSizeDistribution.py +24 -47
- validmind/tests/model_validation/ContextualRecall.py +3 -0
- validmind/tests/model_validation/FeaturesAUC.py +43 -74
- validmind/tests/model_validation/MeteorScore.py +3 -0
- validmind/tests/model_validation/RegardScore.py +5 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +54 -75
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +10 -33
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +11 -29
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +19 -31
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +40 -49
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +29 -15
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +25 -11
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +28 -13
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +67 -38
- validmind/tests/model_validation/embeddings/utils.py +53 -0
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +37 -32
- validmind/tests/model_validation/ragas/{AspectCritique.py → AspectCritic.py} +33 -27
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +44 -41
- validmind/tests/model_validation/ragas/ContextPrecision.py +40 -35
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +133 -0
- validmind/tests/model_validation/ragas/ContextRecall.py +40 -35
- validmind/tests/model_validation/ragas/Faithfulness.py +42 -30
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +59 -35
- validmind/tests/model_validation/ragas/{AnswerRelevance.py → ResponseRelevancy.py} +52 -41
- validmind/tests/model_validation/ragas/{AnswerSimilarity.py → SemanticSimilarity.py} +39 -34
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +13 -16
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +13 -16
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +51 -89
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +31 -61
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +118 -83
- validmind/tests/model_validation/sklearn/CompletenessScore.py +13 -16
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +62 -94
- validmind/tests/model_validation/sklearn/FeatureImportance.py +7 -8
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -15
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +12 -15
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +23 -53
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +60 -74
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +16 -84
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +22 -72
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +29 -78
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +52 -82
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +51 -145
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +60 -78
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +130 -172
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +26 -55
- validmind/tests/model_validation/sklearn/ROCCurve.py +43 -77
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +41 -94
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +47 -136
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +164 -208
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +54 -99
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +50 -124
- validmind/tests/model_validation/sklearn/VMeasure.py +12 -15
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +225 -281
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +40 -45
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +22 -47
- validmind/tests/model_validation/statsmodels/Lilliefors.py +17 -28
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +37 -81
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +37 -105
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +62 -166
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +57 -119
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +20 -57
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +47 -80
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +2 -0
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -2
- validmind/tests/output.py +120 -0
- validmind/tests/prompt_validation/Bias.py +55 -98
- validmind/tests/prompt_validation/Clarity.py +56 -99
- validmind/tests/prompt_validation/Conciseness.py +63 -101
- validmind/tests/prompt_validation/Delimitation.py +48 -89
- validmind/tests/prompt_validation/NegativeInstruction.py +62 -96
- validmind/tests/prompt_validation/Robustness.py +80 -121
- validmind/tests/prompt_validation/Specificity.py +61 -95
- validmind/tests/prompt_validation/ai_powered_test.py +2 -2
- validmind/tests/run.py +314 -496
- validmind/tests/test_providers.py +109 -79
- validmind/tests/utils.py +91 -0
- validmind/unit_metrics/__init__.py +16 -155
- validmind/unit_metrics/classification/F1.py +1 -0
- validmind/unit_metrics/classification/Precision.py +1 -0
- validmind/unit_metrics/classification/ROC_AUC.py +1 -0
- validmind/unit_metrics/classification/Recall.py +1 -0
- validmind/unit_metrics/regression/AdjustedRSquaredScore.py +1 -0
- validmind/unit_metrics/regression/GiniCoefficient.py +1 -0
- validmind/unit_metrics/regression/HuberLoss.py +1 -0
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -0
- validmind/unit_metrics/regression/MeanAbsoluteError.py +1 -0
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -0
- validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -0
- validmind/unit_metrics/regression/MeanSquaredError.py +1 -0
- validmind/unit_metrics/regression/QuantileLoss.py +1 -0
- validmind/unit_metrics/regression/RSquaredScore.py +2 -1
- validmind/unit_metrics/regression/RootMeanSquaredError.py +1 -0
- validmind/utils.py +66 -17
- validmind/vm_models/__init__.py +2 -17
- validmind/vm_models/dataset/dataset.py +31 -4
- validmind/vm_models/figure.py +7 -37
- validmind/vm_models/model.py +3 -0
- validmind/vm_models/result/__init__.py +7 -0
- validmind/vm_models/result/result.jinja +21 -0
- validmind/vm_models/result/result.py +337 -0
- validmind/vm_models/result/utils.py +160 -0
- validmind/vm_models/test_suite/runner.py +16 -54
- validmind/vm_models/test_suite/summary.py +3 -3
- validmind/vm_models/test_suite/test.py +43 -77
- validmind/vm_models/test_suite/test_suite.py +8 -40
- validmind-2.6.7.dist-info/METADATA +137 -0
- {validmind-2.5.25.dist-info → validmind-2.6.7.dist-info}/RECORD +182 -189
- validmind/tests/data_validation/AutoSeasonality.py +0 -190
- validmind/tests/metadata.py +0 -59
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -176
- validmind/tests/model_validation/ragas/ContextUtilization.py +0 -161
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -80
- validmind/unit_metrics/composite.py +0 -238
- validmind/vm_models/test/metric.py +0 -98
- validmind/vm_models/test/metric_result.py +0 -61
- validmind/vm_models/test/output_template.py +0 -55
- validmind/vm_models/test/result_summary.py +0 -76
- validmind/vm_models/test/result_wrapper.py +0 -488
- validmind/vm_models/test/test.py +0 -103
- validmind/vm_models/test/threshold_test.py +0 -106
- validmind/vm_models/test/threshold_test_result.py +0 -75
- validmind/vm_models/test_context.py +0 -259
- validmind-2.5.25.dist-info/METADATA +0 -118
- {validmind-2.5.25.dist-info → validmind-2.6.7.dist-info}/LICENSE +0 -0
- {validmind-2.5.25.dist-info → validmind-2.6.7.dist-info}/WHEEL +0 -0
- {validmind-2.5.25.dist-info → validmind-2.6.7.dist-info}/entry_points.txt +0 -0
validmind/utils.py
CHANGED
@@ -9,8 +9,10 @@ import json
|
|
9
9
|
import math
|
10
10
|
import re
|
11
11
|
import sys
|
12
|
+
import warnings
|
13
|
+
from datetime import date, datetime, time
|
12
14
|
from platform import python_version
|
13
|
-
from typing import Any
|
15
|
+
from typing import Any, Dict, List
|
14
16
|
|
15
17
|
import matplotlib.pylab as pylab
|
16
18
|
import mistune
|
@@ -24,6 +26,7 @@ from IPython.display import display as ipy_display
|
|
24
26
|
from latex2mathml.converter import convert
|
25
27
|
from matplotlib.axes._axes import _log as matplotlib_axes_logger
|
26
28
|
from numpy import ndarray
|
29
|
+
from sklearn.exceptions import UndefinedMetricWarning
|
27
30
|
from tabulate import tabulate
|
28
31
|
|
29
32
|
from .html_templates.content_blocks import math_jax_snippet, python_syntax_highlighting
|
@@ -32,6 +35,11 @@ from .logging import get_logger
|
|
32
35
|
DEFAULT_BIG_NUMBER_DECIMALS = 2
|
33
36
|
DEFAULT_SMALL_NUMBER_DECIMALS = 4
|
34
37
|
|
38
|
+
# Suppress some common warnings
|
39
|
+
warnings.filterwarnings("ignore", category=UndefinedMetricWarning)
|
40
|
+
warnings.filterwarnings(
|
41
|
+
"ignore", category=UserWarning, message=".*valid feature names.*"
|
42
|
+
)
|
35
43
|
|
36
44
|
# SETUP SOME DEFAULTS FOR PLOTS #
|
37
45
|
# Silence this warning: *c* argument looks like a single numeric RGB or
|
@@ -93,23 +101,64 @@ def nan_to_none(obj):
|
|
93
101
|
|
94
102
|
|
95
103
|
class NumpyEncoder(json.JSONEncoder):
|
104
|
+
def __init__(self, *args, **kwargs):
|
105
|
+
super().__init__(*args, **kwargs)
|
106
|
+
self.type_handlers = {
|
107
|
+
self.is_datetime: lambda obj: obj.isoformat(),
|
108
|
+
self.is_pandas_interval: lambda obj: f"[{obj.left}, {obj.right}]",
|
109
|
+
self.is_numpy_integer: lambda obj: int(obj),
|
110
|
+
self.is_numpy_floating: lambda obj: float(obj),
|
111
|
+
self.is_numpy_ndarray: lambda obj: obj.tolist(),
|
112
|
+
self.is_numpy_bool: lambda obj: bool(obj),
|
113
|
+
self.is_pandas_timestamp: lambda obj: str(obj),
|
114
|
+
self.is_set: lambda obj: list(obj),
|
115
|
+
self.is_quantlib_date: lambda obj: obj.ISO(),
|
116
|
+
self.is_generic_object: self.handle_generic_object,
|
117
|
+
}
|
118
|
+
|
96
119
|
def default(self, obj):
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
return int(obj)
|
101
|
-
if isinstance(obj, np.floating):
|
102
|
-
return float(obj)
|
103
|
-
if isinstance(obj, np.ndarray):
|
104
|
-
return obj.tolist()
|
105
|
-
if isinstance(obj, np.bool_):
|
106
|
-
return bool(obj)
|
107
|
-
if isinstance(obj, pd.Timestamp):
|
108
|
-
return str(obj)
|
109
|
-
if isinstance(obj, set):
|
110
|
-
return list(obj)
|
120
|
+
for type_check, handler in self.type_handlers.items():
|
121
|
+
if type_check(obj):
|
122
|
+
return handler(obj)
|
111
123
|
return super().default(obj)
|
112
124
|
|
125
|
+
def is_datetime(self, obj):
|
126
|
+
return isinstance(obj, (datetime, date, time))
|
127
|
+
|
128
|
+
def is_pandas_interval(self, obj):
|
129
|
+
return isinstance(obj, pd.Interval)
|
130
|
+
|
131
|
+
def is_numpy_integer(self, obj):
|
132
|
+
return isinstance(obj, np.integer)
|
133
|
+
|
134
|
+
def is_numpy_floating(self, obj):
|
135
|
+
return isinstance(obj, np.floating)
|
136
|
+
|
137
|
+
def is_numpy_ndarray(self, obj):
|
138
|
+
return isinstance(obj, np.ndarray)
|
139
|
+
|
140
|
+
def is_numpy_bool(self, obj):
|
141
|
+
return isinstance(obj, np.bool_)
|
142
|
+
|
143
|
+
def is_pandas_timestamp(self, obj):
|
144
|
+
return isinstance(obj, pd.Timestamp)
|
145
|
+
|
146
|
+
def is_set(self, obj):
|
147
|
+
return isinstance(obj, set)
|
148
|
+
|
149
|
+
def is_quantlib_date(self, obj):
|
150
|
+
return "QuantLib.Date" in str(type(obj))
|
151
|
+
|
152
|
+
def is_generic_object(self, obj):
|
153
|
+
return isinstance(obj, object)
|
154
|
+
|
155
|
+
def handle_generic_object(self, obj):
|
156
|
+
return (
|
157
|
+
obj.__str__()
|
158
|
+
if type(obj).__dict__.get("__str__")
|
159
|
+
else str(obj).split(".")[1].split(" ")[0]
|
160
|
+
)
|
161
|
+
|
113
162
|
def encode(self, obj):
|
114
163
|
obj = nan_to_none(obj)
|
115
164
|
return super().encode(obj)
|
@@ -158,7 +207,7 @@ def precision_and_scale(x):
|
|
158
207
|
return (magnitude + scale, scale)
|
159
208
|
|
160
209
|
|
161
|
-
def format_records(df):
|
210
|
+
def format_records(df: pd.DataFrame) -> List[Dict[str, Any]]:
|
162
211
|
"""
|
163
212
|
Round the values on each dataframe's column to a given number of decimal places.
|
164
213
|
The returned value is converted to a dict in "records" with Pandas's to_dict() function.
|
@@ -191,7 +240,7 @@ def format_records(df):
|
|
191
240
|
return df.to_dict("records")
|
192
241
|
|
193
242
|
|
194
|
-
def format_key_values(key_values):
|
243
|
+
def format_key_values(key_values: Dict[str, Any]) -> Dict[str, Any]:
|
195
244
|
"""
|
196
245
|
Round the values on each dict's value to a given number of decimal places.
|
197
246
|
|
validmind/vm_models/__init__.py
CHANGED
@@ -10,13 +10,7 @@ from .dataset.dataset import VMDataset
|
|
10
10
|
from .figure import Figure
|
11
11
|
from .input import VMInput
|
12
12
|
from .model import R_MODEL_TYPES, ModelAttributes, VMModel
|
13
|
-
from .
|
14
|
-
from .test.metric_result import MetricResult
|
15
|
-
from .test.result_summary import ResultSummary, ResultTable, ResultTableMetadata
|
16
|
-
from .test.test import Test
|
17
|
-
from .test.threshold_test import ThresholdTest
|
18
|
-
from .test.threshold_test_result import ThresholdTestResult, ThresholdTestResults
|
19
|
-
from .test_context import TestContext, TestInput
|
13
|
+
from .result import ResultTable, TestResult
|
20
14
|
from .test_suite.runner import TestSuiteRunner
|
21
15
|
from .test_suite.test_suite import TestSuite
|
22
16
|
|
@@ -27,17 +21,8 @@ __all__ = [
|
|
27
21
|
"Figure",
|
28
22
|
"ModelAttributes",
|
29
23
|
"R_MODEL_TYPES",
|
30
|
-
"ResultSummary",
|
31
24
|
"ResultTable",
|
32
|
-
"
|
33
|
-
"Test",
|
34
|
-
"Metric",
|
35
|
-
"MetricResult",
|
36
|
-
"ThresholdTest",
|
37
|
-
"ThresholdTestResult",
|
38
|
-
"ThresholdTestResults",
|
39
|
-
"TestContext",
|
40
|
-
"TestInput",
|
25
|
+
"TestResult",
|
41
26
|
"TestSuite",
|
42
27
|
"TestSuiteRunner",
|
43
28
|
]
|
@@ -48,6 +48,9 @@ class VMDataset(VMInput):
|
|
48
48
|
extra_columns (Dict): Extra columns to include in the dataset.
|
49
49
|
"""
|
50
50
|
|
51
|
+
def __repr__(self):
|
52
|
+
return f"VMDataset(input_id={self.input_id})"
|
53
|
+
|
51
54
|
def __init__(
|
52
55
|
self,
|
53
56
|
raw_dataset: np.ndarray,
|
@@ -430,7 +433,12 @@ class VMDataset(VMInput):
|
|
430
433
|
Returns:
|
431
434
|
np.ndarray: The predictions for the model
|
432
435
|
"""
|
433
|
-
|
436
|
+
pred_col = self.prediction_column(model)
|
437
|
+
|
438
|
+
if pred_col is None:
|
439
|
+
raise ValueError(f"No prediction column found for model `{model.input_id}`")
|
440
|
+
|
441
|
+
return np.stack(self._df[pred_col].values)
|
434
442
|
|
435
443
|
def y_prob(self, model) -> np.ndarray:
|
436
444
|
"""Returns the probabilities for a given model.
|
@@ -441,7 +449,14 @@ class VMDataset(VMInput):
|
|
441
449
|
Returns:
|
442
450
|
np.ndarray: The probability variables.
|
443
451
|
"""
|
444
|
-
|
452
|
+
prob_col = self.probability_column(model)
|
453
|
+
|
454
|
+
if prob_col is None:
|
455
|
+
raise ValueError(
|
456
|
+
f"No probability column found for model `{model.input_id}`"
|
457
|
+
)
|
458
|
+
|
459
|
+
return self._df[prob_col].values
|
445
460
|
|
446
461
|
def x_df(self):
|
447
462
|
"""Returns a dataframe containing only the feature columns"""
|
@@ -453,11 +468,23 @@ class VMDataset(VMInput):
|
|
453
468
|
|
454
469
|
def y_pred_df(self, model) -> pd.DataFrame:
|
455
470
|
"""Returns a dataframe containing the predictions for a given model"""
|
456
|
-
|
471
|
+
pred_col = self.prediction_column(model)
|
472
|
+
|
473
|
+
if pred_col is None:
|
474
|
+
raise ValueError(f"No prediction column found for model `{model.input_id}`")
|
475
|
+
|
476
|
+
return as_df(self._df[pred_col])
|
457
477
|
|
458
478
|
def y_prob_df(self, model) -> pd.DataFrame:
|
459
479
|
"""Returns a dataframe containing the probabilities for a given model"""
|
460
|
-
|
480
|
+
prob_col = self.probability_column(model)
|
481
|
+
|
482
|
+
if prob_col is None:
|
483
|
+
raise ValueError(
|
484
|
+
f"No probability column found for model `{model.input_id}`"
|
485
|
+
)
|
486
|
+
|
487
|
+
return as_df(self._df[prob_col])
|
461
488
|
|
462
489
|
def target_classes(self):
|
463
490
|
"""Returns the target class labels or unique values of the target column."""
|
validmind/vm_models/figure.py
CHANGED
@@ -10,14 +10,14 @@ import base64
|
|
10
10
|
import json
|
11
11
|
from dataclasses import dataclass
|
12
12
|
from io import BytesIO
|
13
|
-
from typing import
|
13
|
+
from typing import Union
|
14
14
|
|
15
15
|
import ipywidgets as widgets
|
16
16
|
import matplotlib
|
17
17
|
import plotly.graph_objs as go
|
18
18
|
|
19
19
|
from ..client_config import client_config
|
20
|
-
from ..errors import
|
20
|
+
from ..errors import UnsupportedFigureError
|
21
21
|
from ..utils import get_full_typename
|
22
22
|
|
23
23
|
|
@@ -40,25 +40,12 @@ class Figure:
|
|
40
40
|
"""
|
41
41
|
|
42
42
|
key: str
|
43
|
-
figure:
|
44
|
-
|
45
|
-
for_object: Optional[object] = None
|
46
|
-
extras: Optional[dict] = None
|
43
|
+
figure: Union[matplotlib.figure.Figure, go.Figure, go.FigureWidget, bytes]
|
44
|
+
ref_id: str # used to link figures to results
|
47
45
|
|
48
|
-
_type: str = "plot"
|
46
|
+
_type: str = "plot" # for now this is the only figure type
|
49
47
|
|
50
48
|
def __post_init__(self):
|
51
|
-
"""
|
52
|
-
Set default params if not provided
|
53
|
-
"""
|
54
|
-
if self.for_object is not None:
|
55
|
-
metadata = self.metadata or {}
|
56
|
-
# Use underscore to avoid name collisions with user-defined metadata
|
57
|
-
metadata["_type"] = self._get_for_object_type()
|
58
|
-
metadata["_name"] = getattr(self.for_object, "test_id", None)
|
59
|
-
metadata["_ref_id"] = getattr(self.for_object, "_ref_id", None)
|
60
|
-
self.metadata = metadata
|
61
|
-
|
62
49
|
# Wrap around with FigureWidget so that we can display interactive Plotly
|
63
50
|
# plots in regular Jupyter notebooks. This is not supported on Google Colab.
|
64
51
|
if (
|
@@ -68,23 +55,6 @@ class Figure:
|
|
68
55
|
):
|
69
56
|
self.figure = go.FigureWidget(self.figure)
|
70
57
|
|
71
|
-
def _get_for_object_type(self):
|
72
|
-
"""
|
73
|
-
Returns the type of the object this figure is for
|
74
|
-
"""
|
75
|
-
# Avoid circular imports
|
76
|
-
from .test.metric import Metric
|
77
|
-
from .test.threshold_test import ThresholdTest
|
78
|
-
|
79
|
-
if issubclass(self.for_object.__class__, Metric):
|
80
|
-
return "metric"
|
81
|
-
elif issubclass(self.for_object.__class__, ThresholdTest):
|
82
|
-
return "threshold_test"
|
83
|
-
else:
|
84
|
-
raise InvalidFigureForObjectError(
|
85
|
-
"Figure for_object must be a Metric or ThresholdTest object"
|
86
|
-
)
|
87
|
-
|
88
58
|
def to_widget(self):
|
89
59
|
"""
|
90
60
|
Returns the ipywidget compatible representation of the figure. Ideally
|
@@ -135,7 +105,7 @@ class Figure:
|
|
135
105
|
return {
|
136
106
|
"type": self._type,
|
137
107
|
"key": self.key,
|
138
|
-
"metadata": json.dumps(self.
|
108
|
+
"metadata": json.dumps({"_ref_id": self.ref_id}, allow_nan=False),
|
139
109
|
}
|
140
110
|
|
141
111
|
def _get_b64_url(self):
|
@@ -176,7 +146,7 @@ class Figure:
|
|
176
146
|
|
177
147
|
elif is_plotly_figure(self.figure):
|
178
148
|
# When using plotly, we need to use we will produce two files:
|
179
|
-
# - a JSON file that will be used to display the figure in the
|
149
|
+
# - a JSON file that will be used to display the figure in the ValidMind Platform
|
180
150
|
# - a PNG file that will be used to display the figure in documents
|
181
151
|
return {
|
182
152
|
"image": (
|
validmind/vm_models/model.py
CHANGED
@@ -0,0 +1,7 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from .result import ErrorResult, Result, ResultTable, TestResult
|
6
|
+
|
7
|
+
__all__ = ["ErrorResult", "Result", "ResultTable", "TestResult"]
|
@@ -0,0 +1,21 @@
|
|
1
|
+
<h2>{{ test_name }} {{ passed_icon }}</h2>
|
2
|
+
|
3
|
+
{{ description }}
|
4
|
+
|
5
|
+
{% if inputs %}
|
6
|
+
<h3>Test Inputs</h3>
|
7
|
+
<pre>
|
8
|
+
{{ inputs }}
|
9
|
+
</pre>
|
10
|
+
{% endif %}
|
11
|
+
|
12
|
+
{% if params %}
|
13
|
+
<h3>Test Parameters</h3>
|
14
|
+
<pre>
|
15
|
+
{{ params }}
|
16
|
+
</pre>
|
17
|
+
{% endif %}
|
18
|
+
|
19
|
+
{% if show_metric %}
|
20
|
+
<h3>Metric: <code language="json">{{ metric }}</code></h3>
|
21
|
+
{% endif %}
|
@@ -0,0 +1,337 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
"""
|
6
|
+
Result Objects for test results
|
7
|
+
"""
|
8
|
+
import asyncio
|
9
|
+
import json
|
10
|
+
from abc import abstractmethod
|
11
|
+
from dataclasses import dataclass
|
12
|
+
from typing import Any, Dict, List, Optional, Union
|
13
|
+
from uuid import uuid4
|
14
|
+
|
15
|
+
import pandas as pd
|
16
|
+
from ipywidgets import HTML, VBox
|
17
|
+
|
18
|
+
from ... import api_client
|
19
|
+
from ...ai.utils import DescriptionFuture
|
20
|
+
from ...logging import get_logger
|
21
|
+
from ...utils import NumpyEncoder, display, run_async, test_id_to_name
|
22
|
+
from ..figure import Figure
|
23
|
+
from ..input import VMInput
|
24
|
+
from .utils import (
|
25
|
+
AI_REVISION_NAME,
|
26
|
+
DEFAULT_REVISION_NAME,
|
27
|
+
check_for_sensitive_data,
|
28
|
+
figures_to_widgets,
|
29
|
+
get_result_template,
|
30
|
+
tables_to_widgets,
|
31
|
+
update_metadata,
|
32
|
+
)
|
33
|
+
|
34
|
+
logger = get_logger(__name__)
|
35
|
+
|
36
|
+
|
37
|
+
@dataclass
|
38
|
+
class ResultTable:
|
39
|
+
"""
|
40
|
+
A dataclass that holds the table summary of result
|
41
|
+
"""
|
42
|
+
|
43
|
+
data: Union[List[Any], pd.DataFrame]
|
44
|
+
title: str
|
45
|
+
|
46
|
+
def __repr__(self) -> str:
|
47
|
+
return f'ResultTable(title="{self.title}")' if self.title else "ResultTable"
|
48
|
+
|
49
|
+
def __post_init__(self):
|
50
|
+
if isinstance(self.data, list):
|
51
|
+
self.data = pd.DataFrame(self.data)
|
52
|
+
|
53
|
+
self.data = self.data.round(4)
|
54
|
+
|
55
|
+
def serialize(self):
|
56
|
+
data = {
|
57
|
+
"type": "table",
|
58
|
+
"data": self.data.to_dict(orient="records"),
|
59
|
+
}
|
60
|
+
|
61
|
+
if self.title:
|
62
|
+
data["metadata"] = {"title": self.title}
|
63
|
+
|
64
|
+
return data
|
65
|
+
|
66
|
+
|
67
|
+
@dataclass
|
68
|
+
class Result:
|
69
|
+
"""Base Class for test suite results"""
|
70
|
+
|
71
|
+
result_id: str = None
|
72
|
+
name: str = None
|
73
|
+
|
74
|
+
def __str__(self) -> str:
|
75
|
+
"""May be overridden by subclasses"""
|
76
|
+
return self.__class__.__name__
|
77
|
+
|
78
|
+
@abstractmethod
|
79
|
+
def to_widget(self):
|
80
|
+
"""Create an ipywdiget representation of the result... Must be overridden by subclasses"""
|
81
|
+
raise NotImplementedError
|
82
|
+
|
83
|
+
@abstractmethod
|
84
|
+
def log(self):
|
85
|
+
"""Log the result... Must be overridden by subclasses"""
|
86
|
+
raise NotImplementedError
|
87
|
+
|
88
|
+
def show(self):
|
89
|
+
"""Display the result... May be overridden by subclasses"""
|
90
|
+
display(self.to_widget())
|
91
|
+
|
92
|
+
|
93
|
+
@dataclass
|
94
|
+
class ErrorResult(Result):
|
95
|
+
"""Result for test suites that fail to load or run properly"""
|
96
|
+
|
97
|
+
name: str = "Failed Test"
|
98
|
+
error: Exception = None
|
99
|
+
message: str = None
|
100
|
+
|
101
|
+
def __repr__(self) -> str:
|
102
|
+
return f'ErrorResult(result_id="{self.result_id}")'
|
103
|
+
|
104
|
+
def to_widget(self):
|
105
|
+
return HTML(f"<h3 style='color: red;'>{self.message}</h3><p>{self.error}</p>")
|
106
|
+
|
107
|
+
async def log_async(self):
|
108
|
+
pass
|
109
|
+
|
110
|
+
|
111
|
+
@dataclass
|
112
|
+
class TestResult(Result):
|
113
|
+
"""Test result"""
|
114
|
+
|
115
|
+
name: str = "Test Result"
|
116
|
+
ref_id: str = None
|
117
|
+
title: Optional[str] = None
|
118
|
+
description: Optional[Union[str, DescriptionFuture]] = None
|
119
|
+
metric: Optional[Union[int, float]] = None
|
120
|
+
tables: Optional[List[ResultTable]] = None
|
121
|
+
figures: Optional[List[Figure]] = None
|
122
|
+
passed: Optional[bool] = None
|
123
|
+
params: Optional[Dict[str, Any]] = None
|
124
|
+
inputs: Optional[Dict[str, Union[List[VMInput], VMInput]]] = None
|
125
|
+
metadata: Optional[Dict[str, Any]] = None
|
126
|
+
title: Optional[str] = None
|
127
|
+
_was_description_generated: bool = False
|
128
|
+
_unsafe: bool = False
|
129
|
+
|
130
|
+
@property
|
131
|
+
def test_name(self) -> str:
|
132
|
+
"""Get the test name, using custom title if available."""
|
133
|
+
return self.title or test_id_to_name(self.result_id)
|
134
|
+
|
135
|
+
def __repr__(self) -> str:
|
136
|
+
attrs = [
|
137
|
+
attr
|
138
|
+
for attr in [
|
139
|
+
"description",
|
140
|
+
"params",
|
141
|
+
"tables",
|
142
|
+
"figures",
|
143
|
+
"metric",
|
144
|
+
"passed",
|
145
|
+
]
|
146
|
+
if getattr(self, attr) is not None
|
147
|
+
]
|
148
|
+
|
149
|
+
return f'TestResult("{self.result_id}", {", ".join(attrs)})'
|
150
|
+
|
151
|
+
def __post_init__(self):
|
152
|
+
if self.ref_id is None:
|
153
|
+
self.ref_id = str(uuid4())
|
154
|
+
|
155
|
+
def _get_flat_inputs(self):
|
156
|
+
# remove duplicates by `input_id`
|
157
|
+
inputs = {}
|
158
|
+
|
159
|
+
for input_or_list in self.inputs.values():
|
160
|
+
if isinstance(input_or_list, list):
|
161
|
+
inputs.update({input.input_id: input for input in input_or_list})
|
162
|
+
else:
|
163
|
+
inputs[input_or_list.input_id] = input_or_list
|
164
|
+
|
165
|
+
return list(inputs.values())
|
166
|
+
|
167
|
+
def add_table(self, table: ResultTable):
|
168
|
+
if self.tables is None:
|
169
|
+
self.tables = []
|
170
|
+
|
171
|
+
self.tables.append(table)
|
172
|
+
|
173
|
+
def add_figure(self, figure: Figure):
|
174
|
+
if self.figures is None:
|
175
|
+
self.figures = []
|
176
|
+
|
177
|
+
if figure.ref_id != self.ref_id:
|
178
|
+
figure.ref_id = self.ref_id
|
179
|
+
|
180
|
+
self.figures.append(figure)
|
181
|
+
|
182
|
+
def to_widget(self):
|
183
|
+
if isinstance(self.description, DescriptionFuture):
|
184
|
+
self.description = self.description.get_description()
|
185
|
+
self._was_description_generated = True
|
186
|
+
|
187
|
+
if self.metric is not None and not self.tables and not self.figures:
|
188
|
+
return HTML(f"<h3>{self.test_name}: <code>{self.metric}</code></h3>")
|
189
|
+
|
190
|
+
template_data = {
|
191
|
+
"test_name": self.test_name,
|
192
|
+
"passed_icon": "" if self.passed is None else "✅" if self.passed else "❌",
|
193
|
+
"description": self.description.replace("h3", "strong"),
|
194
|
+
"params": (
|
195
|
+
json.dumps(self.params, cls=NumpyEncoder, indent=2)
|
196
|
+
if self.params
|
197
|
+
else None
|
198
|
+
),
|
199
|
+
"show_metric": self.metric is not None,
|
200
|
+
"metric": self.metric,
|
201
|
+
"tables": self.tables,
|
202
|
+
"figures": self.figures,
|
203
|
+
}
|
204
|
+
rendered = get_result_template().render(**template_data)
|
205
|
+
|
206
|
+
widgets = [HTML(rendered)]
|
207
|
+
|
208
|
+
if self.tables:
|
209
|
+
widgets.extend(tables_to_widgets(self.tables))
|
210
|
+
if self.figures:
|
211
|
+
widgets.extend(figures_to_widgets(self.figures))
|
212
|
+
|
213
|
+
return VBox(widgets)
|
214
|
+
|
215
|
+
def _validate_section_id_for_block(
|
216
|
+
self, section_id: str, position: Union[int, None] = None
|
217
|
+
):
|
218
|
+
"""Validate the section_id exits on the template before logging"""
|
219
|
+
api_client.reload()
|
220
|
+
found = False
|
221
|
+
client_config = api_client.client_config
|
222
|
+
|
223
|
+
for section in client_config.documentation_template["sections"]:
|
224
|
+
if section["id"] == section_id:
|
225
|
+
found = True
|
226
|
+
break
|
227
|
+
|
228
|
+
if not found:
|
229
|
+
raise ValueError(
|
230
|
+
f"Section with id {section_id} not found in the model's document"
|
231
|
+
)
|
232
|
+
|
233
|
+
# Check if the block already exists in the section
|
234
|
+
block_definition = {
|
235
|
+
"content_id": self.result_id,
|
236
|
+
"content_type": "test",
|
237
|
+
}
|
238
|
+
blocks = section.get("contents", [])
|
239
|
+
for block in blocks:
|
240
|
+
if (
|
241
|
+
block["content_id"] == block_definition["content_id"]
|
242
|
+
and block["content_type"] == block_definition["content_type"]
|
243
|
+
):
|
244
|
+
logger.info(
|
245
|
+
f"Test driven block with content_id {block_definition['content_id']} already exists in the document's section"
|
246
|
+
)
|
247
|
+
return
|
248
|
+
|
249
|
+
# Validate that the position is within the bounds of the section
|
250
|
+
if position is not None:
|
251
|
+
num_blocks = len(blocks)
|
252
|
+
if position < 0 or position > num_blocks:
|
253
|
+
raise ValueError(
|
254
|
+
f"Invalid position {position}. Must be between 0 and {num_blocks}"
|
255
|
+
)
|
256
|
+
|
257
|
+
def serialize(self):
|
258
|
+
"""Serialize the result for the API"""
|
259
|
+
return {
|
260
|
+
"test_name": self.result_id,
|
261
|
+
"title": self.title,
|
262
|
+
"ref_id": self.ref_id,
|
263
|
+
"params": self.params,
|
264
|
+
"inputs": [_input.input_id for _input in self._get_flat_inputs()],
|
265
|
+
"passed": self.passed,
|
266
|
+
"summary": [table.serialize() for table in (self.tables or [])],
|
267
|
+
"metadata": self.metadata,
|
268
|
+
}
|
269
|
+
|
270
|
+
async def log_async(
|
271
|
+
self, section_id: str = None, position: int = None, unsafe: bool = False
|
272
|
+
):
|
273
|
+
tasks = [] # collect tasks to run in parallel (async)
|
274
|
+
|
275
|
+
if self.metric is not None:
|
276
|
+
# metrics are logged as separate entities
|
277
|
+
tasks.append(
|
278
|
+
api_client.alog_metric(
|
279
|
+
key=self.result_id,
|
280
|
+
value=self.metric,
|
281
|
+
inputs=[input.input_id for input in self._get_flat_inputs()],
|
282
|
+
params=self.params,
|
283
|
+
)
|
284
|
+
)
|
285
|
+
|
286
|
+
if self.tables or self.figures:
|
287
|
+
tasks.append(
|
288
|
+
api_client.alog_test_result(
|
289
|
+
result=self.serialize(),
|
290
|
+
section_id=section_id,
|
291
|
+
position=position,
|
292
|
+
)
|
293
|
+
)
|
294
|
+
|
295
|
+
tasks.extend(
|
296
|
+
[api_client.alog_figure(figure) for figure in (self.figures or [])]
|
297
|
+
)
|
298
|
+
|
299
|
+
if self.description:
|
300
|
+
if isinstance(self.description, DescriptionFuture):
|
301
|
+
self.description = self.description.get_description()
|
302
|
+
self._was_description_generated = True
|
303
|
+
|
304
|
+
revision_name = (
|
305
|
+
AI_REVISION_NAME
|
306
|
+
if self._was_description_generated
|
307
|
+
else DEFAULT_REVISION_NAME
|
308
|
+
)
|
309
|
+
|
310
|
+
tasks.append(
|
311
|
+
update_metadata(
|
312
|
+
content_id=f"test_description:{self.result_id}::{revision_name}",
|
313
|
+
text=self.description,
|
314
|
+
)
|
315
|
+
)
|
316
|
+
|
317
|
+
return await asyncio.gather(*tasks)
|
318
|
+
|
319
|
+
def log(self, section_id: str = None, position: int = None, unsafe: bool = False):
|
320
|
+
"""Log the result to ValidMind
|
321
|
+
|
322
|
+
Args:
|
323
|
+
section_id (str): The section ID within the model document to insert the
|
324
|
+
test result
|
325
|
+
position (int): The position (index) within the section to insert the test
|
326
|
+
result
|
327
|
+
unsafe (bool): If True, log the result even if it contains sensitive data
|
328
|
+
i.e. raw data from input datasets
|
329
|
+
"""
|
330
|
+
if not unsafe:
|
331
|
+
for table in self.tables or []:
|
332
|
+
check_for_sensitive_data(table.data, self._get_flat_inputs())
|
333
|
+
|
334
|
+
if section_id:
|
335
|
+
self._validate_section_id_for_block(section_id, position)
|
336
|
+
|
337
|
+
run_async(self.log_async, section_id=section_id, position=position)
|