validmind 2.5.24__py3-none-any.whl → 2.6.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +8 -17
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +66 -85
- validmind/ai/test_result_description/context.py +2 -2
- validmind/ai/utils.py +26 -1
- validmind/api_client.py +43 -79
- validmind/client.py +5 -7
- validmind/client_config.py +1 -1
- validmind/datasets/__init__.py +1 -1
- validmind/datasets/classification/customer_churn.py +7 -5
- validmind/datasets/nlp/__init__.py +2 -2
- validmind/errors.py +6 -10
- validmind/html_templates/content_blocks.py +18 -16
- validmind/logging.py +21 -16
- validmind/tests/__init__.py +28 -5
- validmind/tests/__types__.py +186 -170
- validmind/tests/_store.py +7 -21
- validmind/tests/comparison.py +362 -0
- validmind/tests/data_validation/ACFandPACFPlot.py +44 -73
- validmind/tests/data_validation/ADF.py +49 -83
- validmind/tests/data_validation/AutoAR.py +59 -96
- validmind/tests/data_validation/AutoMA.py +59 -96
- validmind/tests/data_validation/AutoStationarity.py +66 -114
- validmind/tests/data_validation/ClassImbalance.py +48 -117
- validmind/tests/data_validation/DatasetDescription.py +180 -209
- validmind/tests/data_validation/DatasetSplit.py +50 -75
- validmind/tests/data_validation/DescriptiveStatistics.py +59 -85
- validmind/tests/data_validation/{DFGLSArch.py → DickeyFullerGLS.py} +44 -76
- validmind/tests/data_validation/Duplicates.py +21 -90
- validmind/tests/data_validation/EngleGrangerCoint.py +53 -75
- validmind/tests/data_validation/HighCardinality.py +32 -80
- validmind/tests/data_validation/HighPearsonCorrelation.py +29 -97
- validmind/tests/data_validation/IQROutliersBarPlot.py +63 -94
- validmind/tests/data_validation/IQROutliersTable.py +40 -80
- validmind/tests/data_validation/IsolationForestOutliers.py +41 -63
- validmind/tests/data_validation/KPSS.py +33 -81
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +47 -95
- validmind/tests/data_validation/MissingValues.py +17 -58
- validmind/tests/data_validation/MissingValuesBarPlot.py +61 -87
- validmind/tests/data_validation/PhillipsPerronArch.py +56 -79
- validmind/tests/data_validation/RollingStatsPlot.py +50 -81
- validmind/tests/data_validation/SeasonalDecompose.py +102 -184
- validmind/tests/data_validation/Skewness.py +27 -64
- validmind/tests/data_validation/SpreadPlot.py +34 -57
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +46 -65
- validmind/tests/data_validation/TabularDateTimeHistograms.py +23 -45
- validmind/tests/data_validation/TabularNumericalHistograms.py +27 -46
- validmind/tests/data_validation/TargetRateBarPlots.py +54 -93
- validmind/tests/data_validation/TimeSeriesFrequency.py +48 -133
- validmind/tests/data_validation/TimeSeriesHistogram.py +24 -3
- validmind/tests/data_validation/TimeSeriesLinePlot.py +29 -47
- validmind/tests/data_validation/TimeSeriesMissingValues.py +59 -135
- validmind/tests/data_validation/TimeSeriesOutliers.py +54 -171
- validmind/tests/data_validation/TooManyZeroValues.py +21 -70
- validmind/tests/data_validation/UniqueRows.py +23 -62
- validmind/tests/data_validation/WOEBinPlots.py +83 -109
- validmind/tests/data_validation/WOEBinTable.py +28 -69
- validmind/tests/data_validation/ZivotAndrewsArch.py +33 -75
- validmind/tests/data_validation/nlp/CommonWords.py +49 -57
- validmind/tests/data_validation/nlp/Hashtags.py +27 -49
- validmind/tests/data_validation/nlp/LanguageDetection.py +7 -13
- validmind/tests/data_validation/nlp/Mentions.py +32 -63
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +89 -14
- validmind/tests/data_validation/nlp/Punctuations.py +63 -47
- validmind/tests/data_validation/nlp/Sentiment.py +4 -0
- validmind/tests/data_validation/nlp/StopWords.py +62 -91
- validmind/tests/data_validation/nlp/TextDescription.py +116 -159
- validmind/tests/data_validation/nlp/Toxicity.py +12 -4
- validmind/tests/decorator.py +33 -242
- validmind/tests/load.py +212 -153
- validmind/tests/model_validation/BertScore.py +13 -7
- validmind/tests/model_validation/BleuScore.py +4 -0
- validmind/tests/model_validation/ClusterSizeDistribution.py +24 -47
- validmind/tests/model_validation/ContextualRecall.py +3 -0
- validmind/tests/model_validation/FeaturesAUC.py +43 -74
- validmind/tests/model_validation/MeteorScore.py +3 -0
- validmind/tests/model_validation/RegardScore.py +5 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +54 -75
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +10 -33
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +11 -29
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +19 -31
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +40 -49
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +29 -15
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +25 -11
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +28 -13
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +67 -38
- validmind/tests/model_validation/embeddings/utils.py +53 -0
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +37 -32
- validmind/tests/model_validation/ragas/{AspectCritique.py → AspectCritic.py} +33 -27
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +44 -41
- validmind/tests/model_validation/ragas/ContextPrecision.py +40 -35
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +133 -0
- validmind/tests/model_validation/ragas/ContextRecall.py +40 -35
- validmind/tests/model_validation/ragas/Faithfulness.py +42 -30
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +59 -35
- validmind/tests/model_validation/ragas/{AnswerRelevance.py → ResponseRelevancy.py} +52 -41
- validmind/tests/model_validation/ragas/{AnswerSimilarity.py → SemanticSimilarity.py} +39 -34
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +13 -16
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +13 -16
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +51 -89
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +31 -61
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +118 -83
- validmind/tests/model_validation/sklearn/CompletenessScore.py +13 -16
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +62 -94
- validmind/tests/model_validation/sklearn/FeatureImportance.py +7 -8
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -15
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +12 -15
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +23 -53
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +60 -74
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +16 -84
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +22 -72
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +29 -78
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +52 -82
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +51 -145
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +60 -78
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +130 -172
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +26 -55
- validmind/tests/model_validation/sklearn/ROCCurve.py +43 -77
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +41 -94
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +47 -136
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +164 -208
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +54 -99
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +50 -124
- validmind/tests/model_validation/sklearn/VMeasure.py +12 -15
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +225 -281
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +40 -45
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +22 -47
- validmind/tests/model_validation/statsmodels/Lilliefors.py +17 -28
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +37 -81
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +37 -105
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +62 -166
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +57 -119
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +20 -57
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +47 -80
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +2 -0
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -2
- validmind/tests/output.py +120 -0
- validmind/tests/prompt_validation/Bias.py +55 -98
- validmind/tests/prompt_validation/Clarity.py +56 -99
- validmind/tests/prompt_validation/Conciseness.py +63 -101
- validmind/tests/prompt_validation/Delimitation.py +48 -89
- validmind/tests/prompt_validation/NegativeInstruction.py +62 -96
- validmind/tests/prompt_validation/Robustness.py +80 -121
- validmind/tests/prompt_validation/Specificity.py +61 -95
- validmind/tests/prompt_validation/ai_powered_test.py +2 -2
- validmind/tests/run.py +314 -496
- validmind/tests/test_providers.py +109 -79
- validmind/tests/utils.py +91 -0
- validmind/unit_metrics/__init__.py +16 -155
- validmind/unit_metrics/classification/F1.py +1 -0
- validmind/unit_metrics/classification/Precision.py +1 -0
- validmind/unit_metrics/classification/ROC_AUC.py +1 -0
- validmind/unit_metrics/classification/Recall.py +1 -0
- validmind/unit_metrics/regression/AdjustedRSquaredScore.py +1 -0
- validmind/unit_metrics/regression/GiniCoefficient.py +1 -0
- validmind/unit_metrics/regression/HuberLoss.py +1 -0
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -0
- validmind/unit_metrics/regression/MeanAbsoluteError.py +1 -0
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -0
- validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -0
- validmind/unit_metrics/regression/MeanSquaredError.py +1 -0
- validmind/unit_metrics/regression/QuantileLoss.py +1 -0
- validmind/unit_metrics/regression/RSquaredScore.py +2 -1
- validmind/unit_metrics/regression/RootMeanSquaredError.py +1 -0
- validmind/utils.py +66 -17
- validmind/vm_models/__init__.py +2 -17
- validmind/vm_models/dataset/dataset.py +31 -4
- validmind/vm_models/figure.py +7 -37
- validmind/vm_models/model.py +3 -0
- validmind/vm_models/result/__init__.py +7 -0
- validmind/vm_models/result/result.jinja +21 -0
- validmind/vm_models/result/result.py +337 -0
- validmind/vm_models/result/utils.py +160 -0
- validmind/vm_models/test_suite/runner.py +16 -54
- validmind/vm_models/test_suite/summary.py +3 -3
- validmind/vm_models/test_suite/test.py +43 -77
- validmind/vm_models/test_suite/test_suite.py +8 -40
- validmind-2.6.7.dist-info/METADATA +137 -0
- {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/RECORD +182 -189
- validmind/tests/data_validation/AutoSeasonality.py +0 -190
- validmind/tests/metadata.py +0 -59
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -176
- validmind/tests/model_validation/ragas/ContextUtilization.py +0 -161
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -80
- validmind/unit_metrics/composite.py +0 -238
- validmind/vm_models/test/metric.py +0 -98
- validmind/vm_models/test/metric_result.py +0 -61
- validmind/vm_models/test/output_template.py +0 -55
- validmind/vm_models/test/result_summary.py +0 -76
- validmind/vm_models/test/result_wrapper.py +0 -488
- validmind/vm_models/test/test.py +0 -103
- validmind/vm_models/test/threshold_test.py +0 -106
- validmind/vm_models/test/threshold_test_result.py +0 -75
- validmind/vm_models/test_context.py +0 -259
- validmind-2.5.24.dist-info/METADATA +0 -118
- {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/LICENSE +0 -0
- {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/WHEEL +0 -0
- {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/entry_points.txt +0 -0
validmind/__init__.py
CHANGED
@@ -3,25 +3,19 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
"""
|
6
|
-
ValidMind
|
7
|
-
the documentation and validation of your models.
|
6
|
+
The ValidMind Library is a suite of developer tools and methods designed to automate the documentation and validation of your models.
|
8
7
|
|
9
|
-
|
10
|
-
Python library will provide all the standard functionality without requiring your developers to rewrite any functions.
|
8
|
+
Designed to be model agnostic, the ValidMind Library provides all the standard functionality without requiring you to rewrite any functions as long as your model is built in Python.
|
11
9
|
|
12
|
-
|
13
|
-
descriptions of your dataset to testing your models for weak spots and overfit areas. The Library
|
14
|
-
helps you automate the generation of model documentation by feeding the ValidMind platform with documentation
|
15
|
-
artifacts and test results to the ValidMind platform.
|
10
|
+
With a rich array of documentation tools and test suites, from documenting descriptions of your datasets to testing your models for weak spots and overfit areas, the ValidMind Library helps you automate model documentation by feeding the ValidMind Platform with documentation artifacts and test results.
|
16
11
|
|
17
|
-
To install the
|
12
|
+
To install the ValidMind Library:
|
18
13
|
|
19
14
|
```bash
|
20
15
|
pip install validmind
|
21
16
|
```
|
22
17
|
|
23
|
-
To initialize the
|
24
|
-
development source code, replacing this example with your own:
|
18
|
+
To initialize the ValidMind Library, paste the code snippet with the model identifier credentials directly into your development source code, replacing this example with your own:
|
25
19
|
|
26
20
|
```python
|
27
21
|
import validmind as vm
|
@@ -34,9 +28,7 @@ vm.init(
|
|
34
28
|
)
|
35
29
|
```
|
36
30
|
|
37
|
-
After you have pasted the code snippet into your development source code and executed the code, the Python
|
38
|
-
library will register with ValidMind. You can now use the Library to document and test your models,
|
39
|
-
and to upload to the ValidMind Platform.
|
31
|
+
After you have pasted the code snippet into your development source code and executed the code, the Python Library API will register with ValidMind. You can now use the ValidMind Library to document and test your models, and to upload to the ValidMind Platform.
|
40
32
|
"""
|
41
33
|
import warnings
|
42
34
|
|
@@ -57,11 +49,11 @@ from .client import ( # noqa: E402
|
|
57
49
|
run_documentation_tests,
|
58
50
|
run_test_suite,
|
59
51
|
)
|
60
|
-
from .tests.decorator import
|
52
|
+
from .tests.decorator import tags, tasks, test
|
61
53
|
|
62
54
|
__all__ = [ # noqa
|
63
55
|
"__version__",
|
64
|
-
#
|
56
|
+
# Python Library API
|
65
57
|
"datasets",
|
66
58
|
"errors",
|
67
59
|
"get_test_suite",
|
@@ -69,7 +61,6 @@ __all__ = [ # noqa
|
|
69
61
|
"init_dataset",
|
70
62
|
"init_model",
|
71
63
|
"init_r_model",
|
72
|
-
"metric",
|
73
64
|
"preview_template",
|
74
65
|
"reload",
|
75
66
|
"run_documentation_tests",
|
validmind/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "2.
|
1
|
+
__version__ = "2.6.7"
|
@@ -2,17 +2,20 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
import json
|
5
6
|
import os
|
6
7
|
import re
|
7
8
|
from concurrent.futures import ThreadPoolExecutor
|
8
|
-
from typing import Union
|
9
|
+
from typing import List, Optional, Union
|
9
10
|
|
10
11
|
from jinja2 import Template
|
11
12
|
|
12
|
-
from validmind.utils import md_to_html
|
13
|
-
|
14
13
|
from ..client_config import client_config
|
15
14
|
from ..logging import get_logger
|
15
|
+
from ..utils import NumpyEncoder, md_to_html, test_id_to_name
|
16
|
+
from ..vm_models.figure import Figure
|
17
|
+
from ..vm_models.result import ResultTable
|
18
|
+
from .utils import DescriptionFuture, get_client_and_model
|
16
19
|
|
17
20
|
__executor = ThreadPoolExecutor()
|
18
21
|
__prompt = None
|
@@ -20,10 +23,6 @@ __prompt = None
|
|
20
23
|
logger = get_logger(__name__)
|
21
24
|
|
22
25
|
|
23
|
-
AI_REVISION_NAME = "Generated by ValidMind AI"
|
24
|
-
DEFAULT_REVISION_NAME = "Default Description"
|
25
|
-
|
26
|
-
|
27
26
|
def _load_prompt():
|
28
27
|
global __prompt
|
29
28
|
|
@@ -55,7 +54,6 @@ def prompt_to_message(role, prompt):
|
|
55
54
|
if start > last_index:
|
56
55
|
content.append({"type": "text", "text": prompt[last_index:start]})
|
57
56
|
|
58
|
-
# Image
|
59
57
|
content.append({"type": "image_url", "image_url": {"url": match.group(1)}})
|
60
58
|
|
61
59
|
last_index = end
|
@@ -67,79 +65,66 @@ def prompt_to_message(role, prompt):
|
|
67
65
|
return {"role": role, "content": content}
|
68
66
|
|
69
67
|
|
70
|
-
class DescriptionFuture:
|
71
|
-
"""This will be immediately returned from generate_description so that
|
72
|
-
the tests can continue to be run in parallel while the description is
|
73
|
-
retrieved asynchronously.
|
74
|
-
|
75
|
-
The value will be retrieved later and if its not ready yet, it should
|
76
|
-
block until it is.
|
77
|
-
"""
|
78
|
-
|
79
|
-
def __init__(self, future):
|
80
|
-
self._future = future
|
81
|
-
|
82
|
-
def get_description(self):
|
83
|
-
if isinstance(self._future, str):
|
84
|
-
description = self._future
|
85
|
-
else:
|
86
|
-
# This will block until the future is completed
|
87
|
-
description = self._future.result()
|
88
|
-
|
89
|
-
return md_to_html(description, mathml=True)
|
90
|
-
|
91
|
-
|
92
68
|
def generate_description(
|
93
69
|
test_id: str,
|
94
70
|
test_description: str,
|
95
|
-
|
71
|
+
tables: List[ResultTable] = None,
|
96
72
|
metric: Union[float, int] = None,
|
97
|
-
figures:
|
73
|
+
figures: List[Figure] = None,
|
74
|
+
title: Optional[str] = None,
|
98
75
|
):
|
99
76
|
"""Generate the description for the test results"""
|
100
|
-
if not
|
77
|
+
if not tables and not figures and not metric:
|
101
78
|
raise ValueError(
|
102
|
-
"No
|
79
|
+
"No tables, unit metric or figures provided - cannot generate description"
|
103
80
|
)
|
104
81
|
|
105
|
-
# TODO: fix circular import
|
106
|
-
from validmind.ai.utils import get_client_and_model
|
82
|
+
# # TODO: fix circular import
|
83
|
+
# from validmind.ai.utils import get_client_and_model
|
107
84
|
|
108
85
|
client, model = get_client_and_model()
|
109
86
|
|
110
87
|
# get last part of test id
|
111
|
-
test_name = test_id.split(".")[-1]
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
if
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
else:
|
124
|
-
test_summary = metric_summary
|
88
|
+
test_name = title or test_id.split(".")[-1]
|
89
|
+
|
90
|
+
# TODO: fully support metrics
|
91
|
+
if metric is not None:
|
92
|
+
tables = [] if not tables else tables
|
93
|
+
tables.append(
|
94
|
+
ResultTable(
|
95
|
+
data=[
|
96
|
+
{"Metric": test_id_to_name(test_id), "Value": metric},
|
97
|
+
],
|
98
|
+
)
|
99
|
+
)
|
125
100
|
|
126
|
-
|
101
|
+
if tables:
|
102
|
+
summary = "\n---\n".join(
|
103
|
+
[
|
104
|
+
json.dumps(table.serialize(), cls=NumpyEncoder, separators=(",", ":"))
|
105
|
+
for table in tables
|
106
|
+
]
|
107
|
+
)
|
108
|
+
else:
|
109
|
+
summary = None
|
127
110
|
|
128
111
|
input_data = {
|
129
112
|
"test_name": test_name,
|
130
113
|
"test_description": test_description,
|
131
|
-
"
|
132
|
-
"
|
114
|
+
"title": title,
|
115
|
+
"summary": summary,
|
116
|
+
"figures": [figure._get_b64_url() for figure in ([] if tables else figures)],
|
133
117
|
}
|
134
118
|
system, user = _load_prompt()
|
135
119
|
|
120
|
+
messages = [
|
121
|
+
prompt_to_message("system", system.render(input_data)),
|
122
|
+
prompt_to_message("user", user.render(input_data)),
|
123
|
+
]
|
136
124
|
response = client.chat.completions.create(
|
137
125
|
model=model,
|
138
126
|
temperature=0.0,
|
139
|
-
messages=
|
140
|
-
prompt_to_message("system", system.render(input_data)),
|
141
|
-
prompt_to_message("user", user.render(input_data)),
|
142
|
-
],
|
127
|
+
messages=messages,
|
143
128
|
)
|
144
129
|
|
145
130
|
return response.choices[0].message.content
|
@@ -148,18 +133,20 @@ def generate_description(
|
|
148
133
|
def background_generate_description(
|
149
134
|
test_id: str,
|
150
135
|
test_description: str,
|
151
|
-
|
152
|
-
figures:
|
136
|
+
tables: List[ResultTable] = None,
|
137
|
+
figures: List[Figure] = None,
|
153
138
|
metric: Union[int, float] = None,
|
139
|
+
title: Optional[str] = None,
|
154
140
|
):
|
155
141
|
def wrapped():
|
156
142
|
try:
|
157
143
|
return generate_description(
|
158
144
|
test_id=test_id,
|
159
145
|
test_description=test_description,
|
160
|
-
|
146
|
+
tables=tables,
|
161
147
|
figures=figures,
|
162
148
|
metric=metric,
|
149
|
+
title=title,
|
163
150
|
)
|
164
151
|
except Exception as e:
|
165
152
|
logger.error(f"Failed to generate description: {e}")
|
@@ -169,14 +156,14 @@ def background_generate_description(
|
|
169
156
|
return DescriptionFuture(__executor.submit(wrapped))
|
170
157
|
|
171
158
|
|
172
|
-
def
|
173
|
-
test_id,
|
174
|
-
|
175
|
-
|
176
|
-
figures=None,
|
177
|
-
metric=None,
|
178
|
-
|
179
|
-
|
159
|
+
def get_result_description(
|
160
|
+
test_id: str,
|
161
|
+
test_description: str,
|
162
|
+
tables: List[ResultTable] = None,
|
163
|
+
figures: List[Figure] = None,
|
164
|
+
metric: Union[int, float] = None,
|
165
|
+
should_generate: bool = True,
|
166
|
+
title: Optional[str] = None,
|
180
167
|
):
|
181
168
|
"""Get Metadata Dictionary for a Test or Metric Result
|
182
169
|
|
@@ -184,24 +171,23 @@ def get_description_metadata(
|
|
184
171
|
description and returns a metadata object that can be logged with the test results.
|
185
172
|
|
186
173
|
By default, the description is generated by an LLM that will interpret the test
|
187
|
-
results and provide a human-readable description. If the
|
174
|
+
results and provide a human-readable description. If the tables or figures are
|
188
175
|
not provided, or the `VALIDMIND_LLM_DESCRIPTIONS_ENABLED` environment variable is
|
189
176
|
set to `0` or `false` or no LLM has been configured, the default description will
|
190
177
|
be used as the test result description.
|
191
178
|
|
192
|
-
Note: Either the
|
179
|
+
Note: Either the tables or figures must be provided to generate the description.
|
193
180
|
|
194
181
|
Args:
|
195
182
|
test_id (str): The test ID
|
196
|
-
|
197
|
-
|
183
|
+
test_description (str): The default description for the test
|
184
|
+
tables (Any): The test tables or results to interpret
|
198
185
|
figures (List[Figure]): The figures to attach to the test suite result
|
199
186
|
metric (Union[int, float]): Unit metrics attached to the test result
|
200
|
-
prefix (str): The prefix to use for the content ID (Default: "metric_description")
|
201
187
|
should_generate (bool): Whether to generate the description or not (Default: True)
|
202
188
|
|
203
189
|
Returns:
|
204
|
-
|
190
|
+
str: The description to be logged with the test results
|
205
191
|
"""
|
206
192
|
# Check the feature flag first, then the environment variable
|
207
193
|
llm_descriptions_enabled = (
|
@@ -214,27 +200,22 @@ def get_description_metadata(
|
|
214
200
|
|
215
201
|
if (
|
216
202
|
should_generate
|
217
|
-
and (
|
203
|
+
and (tables or figures)
|
218
204
|
and llm_descriptions_enabled
|
219
205
|
and is_configured()
|
220
206
|
):
|
221
|
-
revision_name = AI_REVISION_NAME
|
222
|
-
|
223
207
|
# get description future and set it as the description in the metadata
|
224
208
|
# this will lazily retrieved so it can run in the background in parallel
|
225
209
|
description = background_generate_description(
|
226
210
|
test_id=test_id,
|
227
|
-
test_description=
|
228
|
-
|
211
|
+
test_description=test_description,
|
212
|
+
tables=tables,
|
229
213
|
figures=figures,
|
230
214
|
metric=metric,
|
215
|
+
title=title,
|
231
216
|
)
|
232
217
|
|
233
218
|
else:
|
234
|
-
|
235
|
-
description = md_to_html(default_description, mathml=True)
|
219
|
+
description = md_to_html(test_description, mathml=True)
|
236
220
|
|
237
|
-
return
|
238
|
-
"content_id": f"{prefix}:{test_id}::{revision_name}",
|
239
|
-
"text": description,
|
240
|
-
}
|
221
|
+
return description
|
@@ -37,11 +37,11 @@ class Context:
|
|
37
37
|
pass
|
38
38
|
|
39
39
|
def load(self, input_data):
|
40
|
-
# this task can accept a dict or a test result object from the
|
40
|
+
# this task can accept a dict or a test result object from the ValidMind Library
|
41
41
|
if isinstance(input_data, dict):
|
42
42
|
return input_data
|
43
43
|
|
44
|
-
# we are likely running outside of the
|
44
|
+
# we are likely running outside of the ValidMind Library and need to convert
|
45
45
|
# the test result object to a dictionary
|
46
46
|
test_result = input_data
|
47
47
|
|
validmind/ai/utils.py
CHANGED
@@ -7,8 +7,8 @@ from urllib.parse import urljoin
|
|
7
7
|
|
8
8
|
from openai import AzureOpenAI, Client, OpenAI
|
9
9
|
|
10
|
-
from ..api_client import get_ai_key, get_api_host
|
11
10
|
from ..logging import get_logger
|
11
|
+
from ..utils import md_to_html
|
12
12
|
|
13
13
|
logger = get_logger(__name__)
|
14
14
|
|
@@ -19,6 +19,28 @@ __model = None
|
|
19
19
|
__ack = None
|
20
20
|
|
21
21
|
|
22
|
+
class DescriptionFuture:
|
23
|
+
"""This will be immediately returned from generate_description so that
|
24
|
+
the tests can continue to be run in parallel while the description is
|
25
|
+
retrieved asynchronously.
|
26
|
+
|
27
|
+
The value will be retrieved later and if its not ready yet, it should
|
28
|
+
block until it is.
|
29
|
+
"""
|
30
|
+
|
31
|
+
def __init__(self, future):
|
32
|
+
self._future = future
|
33
|
+
|
34
|
+
def get_description(self):
|
35
|
+
if isinstance(self._future, str):
|
36
|
+
description = self._future
|
37
|
+
else:
|
38
|
+
# This will block until the future is completed
|
39
|
+
description = self._future.result()
|
40
|
+
|
41
|
+
return md_to_html(description, mathml=True)
|
42
|
+
|
43
|
+
|
22
44
|
def get_client_and_model():
|
23
45
|
"""Get model and client to use for generating interpretations
|
24
46
|
|
@@ -58,6 +80,9 @@ def get_client_and_model():
|
|
58
80
|
|
59
81
|
else:
|
60
82
|
try:
|
83
|
+
# TODO: fix circular import
|
84
|
+
from ..api_client import get_ai_key, get_api_host
|
85
|
+
|
61
86
|
response = get_ai_key()
|
62
87
|
__client = Client(
|
63
88
|
base_url=(
|
validmind/api_client.py
CHANGED
@@ -23,9 +23,7 @@ from .client_config import client_config
|
|
23
23
|
from .errors import MissingAPICredentialsError, MissingModelIdError, raise_api_error
|
24
24
|
from .logging import get_logger, init_sentry, send_single_error
|
25
25
|
from .utils import NumpyEncoder, run_async
|
26
|
-
from .vm_models import Figure
|
27
|
-
|
28
|
-
# TODO: can't import types from vm_models because of circular dependency
|
26
|
+
from .vm_models import Figure
|
29
27
|
|
30
28
|
logger = get_logger(__name__)
|
31
29
|
|
@@ -50,6 +48,14 @@ def _close_session():
|
|
50
48
|
loop.create_task(__api_session.close())
|
51
49
|
else:
|
52
50
|
loop.run_until_complete(__api_session.close())
|
51
|
+
except RuntimeError as e:
|
52
|
+
# ignore RuntimeError when closing the session from the main thread
|
53
|
+
if "no current event loop in thread" in str(e):
|
54
|
+
pass
|
55
|
+
elif "Event loop is closed" in str(e):
|
56
|
+
pass
|
57
|
+
else:
|
58
|
+
raise e
|
53
59
|
except Exception as e:
|
54
60
|
logger.exception("Error closing aiohttp session at exit: %s", e)
|
55
61
|
|
@@ -187,7 +193,7 @@ def init(
|
|
187
193
|
api_secret: Optional[str] = None,
|
188
194
|
api_host: Optional[str] = None,
|
189
195
|
model: Optional[str] = None,
|
190
|
-
monitoring=False,
|
196
|
+
monitoring: bool = False,
|
191
197
|
):
|
192
198
|
"""
|
193
199
|
Initializes the API client instances and calls the /ping endpoint to ensure
|
@@ -202,7 +208,7 @@ def init(
|
|
202
208
|
api_key (str, optional): The API key. Defaults to None.
|
203
209
|
api_secret (str, optional): The API secret. Defaults to None.
|
204
210
|
api_host (str, optional): The API host. Defaults to None.
|
205
|
-
monitoring (
|
211
|
+
monitoring (bool): The ongoing monitoring flag. Defaults to False.
|
206
212
|
|
207
213
|
Raises:
|
208
214
|
ValueError: If the API key and secret are not provided
|
@@ -212,7 +218,7 @@ def init(
|
|
212
218
|
if api_key == "...":
|
213
219
|
# special case to detect when running a notebook placeholder (...)
|
214
220
|
# will override with environment variables for easier local development
|
215
|
-
api_host = api_key = api_secret = project = None
|
221
|
+
api_host = api_key = api_secret = project = model = None
|
216
222
|
|
217
223
|
_model_cuid = project or model or os.getenv("VM_API_MODEL")
|
218
224
|
if _model_cuid is None:
|
@@ -244,30 +250,7 @@ def reload():
|
|
244
250
|
raise e
|
245
251
|
|
246
252
|
|
247
|
-
async def
|
248
|
-
"""Logs a figure
|
249
|
-
|
250
|
-
Args:
|
251
|
-
figure (Figure): The Figure object wrapper
|
252
|
-
|
253
|
-
Raises:
|
254
|
-
Exception: If the API call fails
|
255
|
-
|
256
|
-
Returns:
|
257
|
-
dict: The response from the API
|
258
|
-
"""
|
259
|
-
try:
|
260
|
-
return await _post(
|
261
|
-
"log_figure",
|
262
|
-
data=figure.serialize(),
|
263
|
-
files=figure.serialize_files(),
|
264
|
-
)
|
265
|
-
except Exception as e:
|
266
|
-
logger.error("Error logging figure to ValidMind API")
|
267
|
-
raise e
|
268
|
-
|
269
|
-
|
270
|
-
async def get_metadata(content_id: str) -> Dict[str, Any]:
|
253
|
+
async def aget_metadata(content_id: str) -> Dict[str, Any]:
|
271
254
|
"""Gets a metadata object from ValidMind API.
|
272
255
|
|
273
256
|
Args:
|
@@ -279,11 +262,10 @@ async def get_metadata(content_id: str) -> Dict[str, Any]:
|
|
279
262
|
Returns:
|
280
263
|
dict: Metadata object
|
281
264
|
"""
|
282
|
-
# TODO: add a more accurate type hint/documentation
|
283
265
|
return await _get(f"get_metadata/{content_id}")
|
284
266
|
|
285
267
|
|
286
|
-
async def
|
268
|
+
async def alog_metadata(
|
287
269
|
content_id: str,
|
288
270
|
text: Optional[str] = None,
|
289
271
|
_json: Optional[Dict[str, Any]] = None,
|
@@ -317,21 +299,11 @@ async def log_metadata(
|
|
317
299
|
raise e
|
318
300
|
|
319
301
|
|
320
|
-
async def
|
321
|
-
|
322
|
-
inputs: List[str],
|
323
|
-
output_template: str = None,
|
324
|
-
section_id: str = None,
|
325
|
-
position: int = None,
|
326
|
-
) -> Dict[str, Any]:
|
327
|
-
"""Logs metrics to ValidMind API.
|
302
|
+
async def alog_figure(figure: Figure) -> Dict[str, Any]:
|
303
|
+
"""Logs a figure
|
328
304
|
|
329
305
|
Args:
|
330
|
-
|
331
|
-
inputs (list): A list of input keys (names) that were used to run the test
|
332
|
-
output_template (str): The optional output template for the test
|
333
|
-
section_id (str): The section ID add a test driven block to the documentation
|
334
|
-
position (int): The position in the section to add the test driven block
|
306
|
+
figure (Figure): The Figure object wrapper
|
335
307
|
|
336
308
|
Raises:
|
337
309
|
Exception: If the API call fails
|
@@ -339,33 +311,19 @@ async def log_metric_result(
|
|
339
311
|
Returns:
|
340
312
|
dict: The response from the API
|
341
313
|
"""
|
342
|
-
request_params = {}
|
343
|
-
if section_id:
|
344
|
-
request_params["section_id"] = section_id
|
345
|
-
if position is not None:
|
346
|
-
request_params["position"] = position
|
347
|
-
|
348
|
-
metric_data = {
|
349
|
-
**metric.serialize(),
|
350
|
-
"inputs": inputs,
|
351
|
-
}
|
352
|
-
if output_template:
|
353
|
-
metric_data["output_template"] = output_template
|
354
|
-
|
355
314
|
try:
|
356
315
|
return await _post(
|
357
|
-
"
|
358
|
-
|
359
|
-
|
316
|
+
"log_figure",
|
317
|
+
data=figure.serialize(),
|
318
|
+
files=figure.serialize_files(),
|
360
319
|
)
|
361
320
|
except Exception as e:
|
362
|
-
logger.error("Error logging
|
321
|
+
logger.error("Error logging figure to ValidMind API")
|
363
322
|
raise e
|
364
323
|
|
365
324
|
|
366
|
-
async def
|
367
|
-
result:
|
368
|
-
inputs: List[str],
|
325
|
+
async def alog_test_result(
|
326
|
+
result: Dict[str, Any],
|
369
327
|
section_id: str = None,
|
370
328
|
position: int = None,
|
371
329
|
) -> Dict[str, Any]:
|
@@ -375,8 +333,7 @@ async def log_test_result(
|
|
375
333
|
can also be called directly if the user wants to run tests on their own.
|
376
334
|
|
377
335
|
Args:
|
378
|
-
result (
|
379
|
-
inputs (list): A list of input keys (names) that were used to run the test
|
336
|
+
result (dict): A dictionary representing the test result
|
380
337
|
section_id (str, optional): The section ID add a test driven block to the documentation
|
381
338
|
position (int): The position in the section to add the test driven block
|
382
339
|
|
@@ -391,16 +348,12 @@ async def log_test_result(
|
|
391
348
|
request_params["section_id"] = section_id
|
392
349
|
if position is not None:
|
393
350
|
request_params["position"] = position
|
394
|
-
|
395
351
|
try:
|
396
352
|
return await _post(
|
397
353
|
"log_test_results",
|
398
354
|
params=request_params,
|
399
355
|
data=json.dumps(
|
400
|
-
|
401
|
-
**result.serialize(),
|
402
|
-
"inputs": inputs,
|
403
|
-
},
|
356
|
+
result,
|
404
357
|
cls=NumpyEncoder,
|
405
358
|
allow_nan=False,
|
406
359
|
),
|
@@ -410,7 +363,9 @@ async def log_test_result(
|
|
410
363
|
raise e
|
411
364
|
|
412
365
|
|
413
|
-
def
|
366
|
+
async def alog_input(
|
367
|
+
input_id: str, type: str, metadata: Dict[str, Any]
|
368
|
+
) -> Dict[str, Any]:
|
414
369
|
"""Logs input information - internal use for now (don't expose via public API)
|
415
370
|
|
416
371
|
Args:
|
@@ -425,8 +380,7 @@ def log_input(input_id: str, type: str, metadata: Dict[str, Any]) -> Dict[str, A
|
|
425
380
|
dict: The response from the API
|
426
381
|
"""
|
427
382
|
try:
|
428
|
-
return
|
429
|
-
_post,
|
383
|
+
return await _post(
|
430
384
|
"log_input",
|
431
385
|
data=json.dumps(
|
432
386
|
{
|
@@ -443,9 +397,13 @@ def log_input(input_id: str, type: str, metadata: Dict[str, Any]) -> Dict[str, A
|
|
443
397
|
raise e
|
444
398
|
|
445
399
|
|
400
|
+
def log_input(input_id: str, type: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
401
|
+
return run_async(alog_input, input_id, type, metadata)
|
402
|
+
|
403
|
+
|
446
404
|
async def alog_metric(
|
447
405
|
key: str,
|
448
|
-
value: float,
|
406
|
+
value: Union[int, float],
|
449
407
|
inputs: Optional[List[str]] = None,
|
450
408
|
params: Optional[Dict[str, Any]] = None,
|
451
409
|
recorded_at: Optional[str] = None,
|
@@ -454,8 +412,14 @@ async def alog_metric(
|
|
454
412
|
if not key or not isinstance(key, str):
|
455
413
|
raise ValueError("`key` must be a non-empty string")
|
456
414
|
|
457
|
-
if
|
458
|
-
raise ValueError("
|
415
|
+
if value is None:
|
416
|
+
raise ValueError("Must provide a value for the metric")
|
417
|
+
|
418
|
+
if not isinstance(value, (int, float)):
|
419
|
+
try:
|
420
|
+
value = float(value)
|
421
|
+
except (ValueError, TypeError):
|
422
|
+
raise ValueError("`value` must be a scalar (int or float)")
|
459
423
|
|
460
424
|
try:
|
461
425
|
return await _post(
|
@@ -489,7 +453,7 @@ def log_metric(
|
|
489
453
|
Unit metrics are key-value pairs where the key is the metric name and the value is
|
490
454
|
a scalar (int or float). These key-value pairs are associated with the currently
|
491
455
|
selected model (inventory model in the ValidMind Platform) and keys can be logged
|
492
|
-
to over time to create a history of the metric. On the
|
456
|
+
to over time to create a history of the metric. On the ValidMind Platform, these metrics
|
493
457
|
will be used to create plots/visualizations for documentation and dashboards etc.
|
494
458
|
|
495
459
|
Args:
|