validmind 2.1.0__py3-none-any.whl → 2.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai.py +3 -3
- validmind/api_client.py +2 -3
- validmind/client.py +68 -25
- validmind/datasets/llm/rag/__init__.py +11 -0
- validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_1.csv +30 -0
- validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_2.csv +30 -0
- validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_3.csv +53 -0
- validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_4.csv +53 -0
- validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_5.csv +53 -0
- validmind/datasets/llm/rag/rfp.py +41 -0
- validmind/html_templates/__init__.py +0 -0
- validmind/html_templates/content_blocks.py +89 -14
- validmind/models/__init__.py +7 -4
- validmind/models/foundation.py +8 -34
- validmind/models/function.py +51 -0
- validmind/models/huggingface.py +16 -46
- validmind/models/metadata.py +42 -0
- validmind/models/pipeline.py +66 -0
- validmind/models/pytorch.py +8 -42
- validmind/models/r_model.py +33 -82
- validmind/models/sklearn.py +39 -38
- validmind/template.py +8 -26
- validmind/tests/__init__.py +43 -20
- validmind/tests/data_validation/ANOVAOneWayTable.py +1 -1
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +1 -1
- validmind/tests/data_validation/DescriptiveStatistics.py +2 -4
- validmind/tests/data_validation/Duplicates.py +1 -1
- validmind/tests/data_validation/IsolationForestOutliers.py +2 -2
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +1 -1
- validmind/tests/data_validation/TargetRateBarPlots.py +1 -1
- validmind/tests/data_validation/nlp/LanguageDetection.py +59 -0
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +48 -0
- validmind/tests/data_validation/nlp/Punctuations.py +11 -12
- validmind/tests/data_validation/nlp/Sentiment.py +57 -0
- validmind/tests/data_validation/nlp/Toxicity.py +45 -0
- validmind/tests/decorator.py +2 -2
- validmind/tests/model_validation/BertScore.py +100 -98
- validmind/tests/model_validation/BleuScore.py +93 -64
- validmind/tests/model_validation/ContextualRecall.py +74 -91
- validmind/tests/model_validation/MeteorScore.py +86 -74
- validmind/tests/model_validation/RegardScore.py +103 -121
- validmind/tests/model_validation/RougeScore.py +118 -0
- validmind/tests/model_validation/TokenDisparity.py +84 -121
- validmind/tests/model_validation/ToxicityScore.py +109 -123
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +96 -0
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +71 -0
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +92 -0
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +69 -0
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +78 -0
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +35 -23
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +3 -0
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +7 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +3 -0
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +3 -0
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +99 -0
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +131 -0
- validmind/tests/model_validation/ragas/AnswerRelevance.py +134 -0
- validmind/tests/model_validation/ragas/AnswerSimilarity.py +119 -0
- validmind/tests/model_validation/ragas/AspectCritique.py +167 -0
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +133 -0
- validmind/tests/model_validation/ragas/ContextPrecision.py +123 -0
- validmind/tests/model_validation/ragas/ContextRecall.py +123 -0
- validmind/tests/model_validation/ragas/ContextRelevancy.py +114 -0
- validmind/tests/model_validation/ragas/Faithfulness.py +119 -0
- validmind/tests/model_validation/ragas/utils.py +66 -0
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -7
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +8 -9
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +5 -10
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +3 -2
- validmind/tests/model_validation/sklearn/ROCCurve.py +2 -1
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +1 -1
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +2 -3
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +14 -12
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +3 -4
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +1 -1
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +5 -6
- validmind/unit_metrics/__init__.py +26 -49
- validmind/unit_metrics/composite.py +5 -1
- validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +1 -1
- validmind/utils.py +56 -6
- validmind/vm_models/__init__.py +1 -1
- validmind/vm_models/dataset/__init__.py +7 -0
- validmind/vm_models/dataset/dataset.py +558 -0
- validmind/vm_models/dataset/utils.py +146 -0
- validmind/vm_models/model.py +97 -72
- validmind/vm_models/test/result_wrapper.py +61 -24
- validmind/vm_models/test_context.py +1 -1
- validmind/vm_models/test_suite/summary.py +3 -4
- {validmind-2.1.0.dist-info → validmind-2.2.2.dist-info}/METADATA +5 -3
- {validmind-2.1.0.dist-info → validmind-2.2.2.dist-info}/RECORD +100 -75
- validmind/models/catboost.py +0 -33
- validmind/models/statsmodels.py +0 -50
- validmind/models/xgboost.py +0 -30
- validmind/tests/model_validation/BertScoreAggregate.py +0 -90
- validmind/tests/model_validation/RegardHistogram.py +0 -148
- validmind/tests/model_validation/RougeMetrics.py +0 -147
- validmind/tests/model_validation/RougeMetricsAggregate.py +0 -133
- validmind/tests/model_validation/SelfCheckNLIScore.py +0 -112
- validmind/tests/model_validation/ToxicityHistogram.py +0 -136
- validmind/vm_models/dataset.py +0 -1303
- {validmind-2.1.0.dist-info → validmind-2.2.2.dist-info}/LICENSE +0 -0
- {validmind-2.1.0.dist-info → validmind-2.2.2.dist-info}/WHEEL +0 -0
- {validmind-2.1.0.dist-info → validmind-2.2.2.dist-info}/entry_points.txt +0 -0
validmind/vm_models/model.py
CHANGED
@@ -7,9 +7,11 @@ Model class wrapper module
|
|
7
7
|
"""
|
8
8
|
import importlib
|
9
9
|
import inspect
|
10
|
-
from abc import abstractmethod
|
10
|
+
from abc import ABC, abstractmethod
|
11
11
|
from dataclasses import dataclass
|
12
12
|
|
13
|
+
from validmind.errors import MissingOrInvalidModelPredictFnError
|
14
|
+
|
13
15
|
SUPPORTED_LIBRARIES = {
|
14
16
|
"catboost": "CatBoostModel",
|
15
17
|
"xgboost": "XGBoostModel",
|
@@ -17,6 +19,8 @@ SUPPORTED_LIBRARIES = {
|
|
17
19
|
"statsmodels": "StatsModelsModel",
|
18
20
|
"torch": "PyTorchModel",
|
19
21
|
"transformers": "HFModel",
|
22
|
+
"function": "FunctionModel",
|
23
|
+
"pipeline": "PipelineModel",
|
20
24
|
"custom": "SKlearnModel",
|
21
25
|
}
|
22
26
|
|
@@ -32,6 +36,23 @@ R_MODEL_METHODS = [
|
|
32
36
|
]
|
33
37
|
|
34
38
|
|
39
|
+
class ModelPipeline:
|
40
|
+
"""Helper class for chaining models together
|
41
|
+
|
42
|
+
This shouldn't be used directly, it just gets used when chaining models with the
|
43
|
+
`|` operator since you can't use a list directly - you must use a type that
|
44
|
+
overloads the `|` operator.
|
45
|
+
"""
|
46
|
+
|
47
|
+
def __init__(self, models):
|
48
|
+
self.models = models
|
49
|
+
|
50
|
+
def __or__(self, other):
|
51
|
+
self.models.append(other)
|
52
|
+
|
53
|
+
return self
|
54
|
+
|
55
|
+
|
35
56
|
@dataclass
|
36
57
|
class ModelAttributes:
|
37
58
|
"""
|
@@ -41,51 +62,67 @@ class ModelAttributes:
|
|
41
62
|
architecture: str = None
|
42
63
|
framework: str = None
|
43
64
|
framework_version: str = None
|
65
|
+
language: str = None
|
44
66
|
|
67
|
+
@classmethod
|
68
|
+
def from_dict(cls, data):
|
69
|
+
"""
|
70
|
+
Creates a ModelAttributes instance from a dictionary
|
71
|
+
"""
|
72
|
+
return cls(
|
73
|
+
architecture=data.get("architecture"),
|
74
|
+
framework=data.get("framework"),
|
75
|
+
framework_version=data.get("framework_version"),
|
76
|
+
language=data.get("language"),
|
77
|
+
)
|
45
78
|
|
46
|
-
|
79
|
+
|
80
|
+
class VMModel(ABC):
|
47
81
|
"""
|
48
82
|
An base class that wraps a trained model instance and its associated data.
|
49
83
|
|
50
84
|
Attributes:
|
51
|
-
attributes (ModelAttributes, optional): The attributes of the model. Defaults to None.
|
52
85
|
model (object, optional): The trained model instance. Defaults to None.
|
53
|
-
|
86
|
+
input_id (str, optional): The input ID for the model. Defaults to None.
|
87
|
+
attributes (ModelAttributes, optional): The attributes of the model. Defaults to None.
|
88
|
+
name (str, optional): The name of the model. Defaults to the class name.
|
54
89
|
"""
|
55
90
|
|
56
|
-
input_id: str = None
|
57
|
-
|
58
91
|
def __init__(
|
59
92
|
self,
|
60
93
|
input_id: str = None,
|
61
94
|
model: object = None,
|
62
95
|
attributes: ModelAttributes = None,
|
96
|
+
name: str = None,
|
97
|
+
**kwargs,
|
63
98
|
):
|
64
|
-
self.
|
65
|
-
self.
|
66
|
-
self._attributes = attributes
|
99
|
+
self.model = model
|
100
|
+
self.input_id = input_id
|
67
101
|
|
68
|
-
|
69
|
-
self.
|
102
|
+
self.language = "Python"
|
103
|
+
self.library = self.__class__.__name__
|
104
|
+
self.library_version = "N/A"
|
105
|
+
self.class_ = self.__class__.__name__
|
70
106
|
|
71
|
-
|
72
|
-
def attributes(self):
|
73
|
-
return self._attributes
|
107
|
+
self.name = name or self.__class__.__name__
|
74
108
|
|
75
|
-
|
76
|
-
def input_id(self):
|
77
|
-
return self._input_id
|
109
|
+
self.attributes = attributes
|
78
110
|
|
79
|
-
|
80
|
-
|
81
|
-
|
111
|
+
# set any additional attributes passed in (likely for subclasses)
|
112
|
+
for key, value in kwargs.items():
|
113
|
+
setattr(self, key, value)
|
82
114
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
115
|
+
self.__post_init__()
|
116
|
+
|
117
|
+
def __post_init__(self):
|
118
|
+
"""Allows child classes to add their own post-init logic"""
|
119
|
+
pass
|
120
|
+
|
121
|
+
def __or__(self, other):
|
122
|
+
if not isinstance(other, VMModel):
|
123
|
+
raise ValueError("Can only chain VMModel objects")
|
124
|
+
|
125
|
+
return ModelPipeline([self, other])
|
89
126
|
|
90
127
|
def serialize(self):
|
91
128
|
"""
|
@@ -95,13 +132,11 @@ class VMModel:
|
|
95
132
|
"attributes": self.attributes.__dict__,
|
96
133
|
}
|
97
134
|
|
98
|
-
@abstractmethod
|
99
135
|
def predict_proba(self, *args, **kwargs):
|
100
|
-
"""
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
pass
|
136
|
+
"""Predict probabilties - must be implemented by subclass if needed"""
|
137
|
+
raise MissingOrInvalidModelPredictFnError(
|
138
|
+
"`predict_proba()` method not implemented for this model"
|
139
|
+
)
|
105
140
|
|
106
141
|
@abstractmethod
|
107
142
|
def predict(self, *args, **kwargs):
|
@@ -110,42 +145,6 @@ class VMModel:
|
|
110
145
|
"""
|
111
146
|
pass
|
112
147
|
|
113
|
-
@abstractmethod
|
114
|
-
def model_language(self, *args, **kwargs):
|
115
|
-
"""
|
116
|
-
Programming language used to train the model. Assume Python if this
|
117
|
-
method is not implemented
|
118
|
-
"""
|
119
|
-
pass
|
120
|
-
|
121
|
-
@abstractmethod
|
122
|
-
def model_library(self, *args, **kwargs):
|
123
|
-
"""
|
124
|
-
Model framework library
|
125
|
-
"""
|
126
|
-
pass
|
127
|
-
|
128
|
-
@abstractmethod
|
129
|
-
def model_library_version(self, *args, **kwargs):
|
130
|
-
"""
|
131
|
-
Model framework library version
|
132
|
-
"""
|
133
|
-
pass
|
134
|
-
|
135
|
-
@abstractmethod
|
136
|
-
def model_class(self, *args, **kwargs):
|
137
|
-
"""
|
138
|
-
Predict method for the model. This is a wrapper around the model's
|
139
|
-
"""
|
140
|
-
pass
|
141
|
-
|
142
|
-
@abstractmethod
|
143
|
-
def model_name(self, *args, **kwargs):
|
144
|
-
"""
|
145
|
-
Model name
|
146
|
-
"""
|
147
|
-
pass
|
148
|
-
|
149
148
|
|
150
149
|
def has_method_with_arguments(cls, method_name, n_args):
|
151
150
|
if not hasattr(cls, method_name):
|
@@ -195,11 +194,17 @@ def model_module(model):
|
|
195
194
|
return module
|
196
195
|
|
197
196
|
|
198
|
-
def get_model_class(model):
|
199
|
-
|
197
|
+
def get_model_class(model, predict_fn=None):
|
198
|
+
# TODO: more consistent way to determine this?!
|
199
|
+
if predict_fn is not None:
|
200
|
+
model_class_name = SUPPORTED_LIBRARIES["function"]
|
201
|
+
elif isinstance(model, ModelPipeline):
|
202
|
+
model_class_name = SUPPORTED_LIBRARIES["pipeline"]
|
203
|
+
else:
|
204
|
+
model_class_name = SUPPORTED_LIBRARIES.get(model_module(model), None)
|
200
205
|
|
201
|
-
if model_class_name
|
202
|
-
|
206
|
+
if not model_class_name:
|
207
|
+
return None
|
203
208
|
|
204
209
|
model_class = getattr(
|
205
210
|
importlib.import_module("validmind.models"),
|
@@ -207,3 +212,23 @@ def get_model_class(model):
|
|
207
212
|
)
|
208
213
|
|
209
214
|
return model_class
|
215
|
+
|
216
|
+
|
217
|
+
def is_model_metadata(model):
|
218
|
+
"""
|
219
|
+
Checks if the model is a dictionary containing metadata about a model.
|
220
|
+
We want to check if the metadata dictionary contains at least the following keys:
|
221
|
+
|
222
|
+
- architecture
|
223
|
+
- language
|
224
|
+
"""
|
225
|
+
if not isinstance(model, dict):
|
226
|
+
return False
|
227
|
+
|
228
|
+
if "architecture" not in model:
|
229
|
+
return False
|
230
|
+
|
231
|
+
if "language" not in model:
|
232
|
+
return False
|
233
|
+
|
234
|
+
return True
|
@@ -12,20 +12,23 @@ from abc import ABC, abstractmethod
|
|
12
12
|
from dataclasses import dataclass
|
13
13
|
from typing import Dict, List, Optional, Union
|
14
14
|
|
15
|
-
import ipywidgets as widgets
|
16
|
-
import mistune
|
17
15
|
import pandas as pd
|
18
|
-
from
|
16
|
+
from ipywidgets import HTML, GridBox, Layout, VBox
|
19
17
|
|
20
18
|
from ... import api_client
|
21
19
|
from ...ai import DescriptionFuture
|
22
|
-
from ...
|
20
|
+
from ...input_registry import input_registry
|
21
|
+
from ...logging import get_logger
|
22
|
+
from ...utils import NumpyEncoder, display, md_to_html, run_async, test_id_to_name
|
23
|
+
from ..dataset import VMDataset
|
23
24
|
from ..figure import Figure
|
24
25
|
from .metric_result import MetricResult
|
25
26
|
from .output_template import OutputTemplate
|
26
27
|
from .result_summary import ResultSummary
|
27
28
|
from .threshold_test_result import ThresholdTestResults
|
28
29
|
|
30
|
+
logger = get_logger(__name__)
|
31
|
+
|
29
32
|
|
30
33
|
async def update_metadata(content_id: str, text: str, _json: Union[Dict, List] = None):
|
31
34
|
"""
|
@@ -64,9 +67,9 @@ def plot_figures(figures: List[Figure]) -> None:
|
|
64
67
|
plots = [figure.to_widget() for figure in figures]
|
65
68
|
|
66
69
|
num_columns = 2 if len(figures) > 1 else 1
|
67
|
-
return
|
70
|
+
return GridBox(
|
68
71
|
plots,
|
69
|
-
layout=
|
72
|
+
layout=Layout(grid_template_columns=f"repeat({num_columns}, 1fr)"),
|
70
73
|
)
|
71
74
|
|
72
75
|
|
@@ -103,7 +106,7 @@ class ResultWrapper(ABC):
|
|
103
106
|
"""
|
104
107
|
Convert a markdown string to html
|
105
108
|
"""
|
106
|
-
return
|
109
|
+
return md_to_html(description)
|
107
110
|
|
108
111
|
def _summary_tables_to_widget(self, summary: ResultSummary):
|
109
112
|
"""
|
@@ -148,8 +151,8 @@ class ResultWrapper(ABC):
|
|
148
151
|
) # table.data is an orient=records dump
|
149
152
|
|
150
153
|
if table.metadata and table.metadata.title:
|
151
|
-
tables.append(
|
152
|
-
tables.append(
|
154
|
+
tables.append(HTML(value=f"<h3>{table.metadata.title}</h3>"))
|
155
|
+
tables.append(HTML(value=summary_table))
|
153
156
|
return tables
|
154
157
|
|
155
158
|
def show(self):
|
@@ -180,9 +183,7 @@ class FailedResultWrapper(ResultWrapper):
|
|
180
183
|
return f'FailedResult(result_id="{self.result_id}")'
|
181
184
|
|
182
185
|
def to_widget(self):
|
183
|
-
return
|
184
|
-
value=f"<h3 style='color: red;'>{self.message}</h3><p>{self.error}</p>"
|
185
|
-
)
|
186
|
+
return HTML(f"<h3 style='color: red;'>{self.message}</h3><p>{self.error}</p>")
|
186
187
|
|
187
188
|
async def log_async(self):
|
188
189
|
pass
|
@@ -216,7 +217,7 @@ class MetricResultWrapper(ResultWrapper):
|
|
216
217
|
return ""
|
217
218
|
|
218
219
|
vbox_children = [
|
219
|
-
|
220
|
+
HTML(value=f"<h1>{test_id_to_name(self.result_id)}</h1>"),
|
220
221
|
]
|
221
222
|
|
222
223
|
if self.result_metadata:
|
@@ -226,9 +227,7 @@ class MetricResultWrapper(ResultWrapper):
|
|
226
227
|
self.result_metadata[0]["text"] = metric_description
|
227
228
|
|
228
229
|
vbox_children.append(
|
229
|
-
|
230
|
-
value=self._markdown_description_to_html(metric_description)
|
231
|
-
)
|
230
|
+
HTML(value=self._markdown_description_to_html(metric_description))
|
232
231
|
)
|
233
232
|
|
234
233
|
if self.metric:
|
@@ -236,18 +235,18 @@ class MetricResultWrapper(ResultWrapper):
|
|
236
235
|
rendered_output = OutputTemplate(self.output_template).render(
|
237
236
|
value=self.metric.value
|
238
237
|
)
|
239
|
-
vbox_children.append(
|
238
|
+
vbox_children.append(HTML(rendered_output))
|
240
239
|
elif self.metric.summary:
|
241
240
|
tables = self._summary_tables_to_widget(self.metric.summary)
|
242
241
|
vbox_children.extend(tables)
|
243
242
|
|
244
243
|
if self.figures:
|
245
|
-
vbox_children.append(
|
244
|
+
vbox_children.append(HTML(value="<h3>Plots</h3>"))
|
246
245
|
plot_widgets = plot_figures(self.figures)
|
247
246
|
vbox_children.append(plot_widgets)
|
248
247
|
|
249
248
|
vbox_children.append(
|
250
|
-
|
249
|
+
HTML(
|
251
250
|
value="""
|
252
251
|
<style>
|
253
252
|
.metric-result {
|
@@ -293,12 +292,48 @@ class MetricResultWrapper(ResultWrapper):
|
|
293
292
|
)
|
294
293
|
)
|
295
294
|
|
296
|
-
return
|
295
|
+
return VBox(vbox_children)
|
297
296
|
|
298
|
-
|
297
|
+
def _get_filtered_summary(self):
|
298
|
+
"""Check if the metric summary has columns from input datasets"""
|
299
|
+
dataset_columns = set()
|
300
|
+
|
301
|
+
for input_id in self.inputs:
|
302
|
+
input_obj = input_registry.get(input_id)
|
303
|
+
if isinstance(input_obj, VMDataset):
|
304
|
+
dataset_columns.update(input_obj.columns)
|
305
|
+
|
306
|
+
for table in [*self.metric.summary.results]:
|
307
|
+
columns = set()
|
308
|
+
|
309
|
+
if isinstance(table.data, pd.DataFrame):
|
310
|
+
columns.update(table.data.columns)
|
311
|
+
elif isinstance(table.data, list):
|
312
|
+
columns.update(table.data[0].keys())
|
313
|
+
else:
|
314
|
+
raise ValueError("Invalid data type in summary table")
|
315
|
+
|
316
|
+
if bool(columns.intersection(dataset_columns)):
|
317
|
+
logger.warning(
|
318
|
+
"Sensitive data in metric summary table. Not logging to API automatically."
|
319
|
+
" Pass `unsafe=True` to result.log() method to override manually."
|
320
|
+
)
|
321
|
+
logger.warning(
|
322
|
+
f"The following columns are present in the table: {columns}"
|
323
|
+
f" and also present in the dataset: {dataset_columns}"
|
324
|
+
)
|
325
|
+
|
326
|
+
self.metric.summary.results.remove(table)
|
327
|
+
|
328
|
+
return self.metric.summary
|
329
|
+
|
330
|
+
async def log_async(self, unsafe=False):
|
299
331
|
tasks = [] # collect tasks to run in parallel (async)
|
300
332
|
|
301
333
|
if self.metric:
|
334
|
+
if self.metric.summary and not unsafe:
|
335
|
+
self.metric.summary = self._get_filtered_summary()
|
336
|
+
|
302
337
|
tasks.append(
|
303
338
|
api_client.log_metrics(
|
304
339
|
metrics=[self.metric],
|
@@ -306,8 +341,10 @@ class MetricResultWrapper(ResultWrapper):
|
|
306
341
|
output_template=self.output_template,
|
307
342
|
)
|
308
343
|
)
|
344
|
+
|
309
345
|
if self.figures:
|
310
346
|
tasks.append(api_client.log_figures(self.figures))
|
347
|
+
|
311
348
|
if hasattr(self, "result_metadata") and self.result_metadata:
|
312
349
|
description = self.result_metadata[0].get("text", "")
|
313
350
|
if isinstance(description, DescriptionFuture):
|
@@ -383,18 +420,18 @@ class ThresholdTestResultWrapper(ResultWrapper):
|
|
383
420
|
"""
|
384
421
|
)
|
385
422
|
|
386
|
-
vbox_children.append(
|
423
|
+
vbox_children.append(HTML(value="".join(description_html)))
|
387
424
|
|
388
425
|
if self.test_results.summary:
|
389
426
|
tables = self._summary_tables_to_widget(self.test_results.summary)
|
390
427
|
vbox_children.extend(tables)
|
391
428
|
|
392
429
|
if self.figures:
|
393
|
-
vbox_children.append(
|
430
|
+
vbox_children.append(HTML(value="<h3>Plots</h3>"))
|
394
431
|
plot_widgets = plot_figures(self.figures)
|
395
432
|
vbox_children.append(plot_widgets)
|
396
433
|
|
397
|
-
return
|
434
|
+
return VBox(vbox_children)
|
398
435
|
|
399
436
|
async def log_async(self):
|
400
437
|
tasks = [api_client.log_test_result(self.test_results, self.inputs)]
|
@@ -20,7 +20,7 @@ from validmind.input_registry import input_registry
|
|
20
20
|
|
21
21
|
from ..errors import MissingRequiredTestInputError
|
22
22
|
from ..logging import get_logger
|
23
|
-
from .dataset import VMDataset
|
23
|
+
from .dataset.dataset import VMDataset
|
24
24
|
from .model import VMModel
|
25
25
|
|
26
26
|
# More human readable context names for error messages
|
@@ -6,10 +6,9 @@ from dataclasses import dataclass
|
|
6
6
|
from typing import List, Optional
|
7
7
|
|
8
8
|
import ipywidgets as widgets
|
9
|
-
import mistune
|
10
|
-
from IPython.display import display
|
11
9
|
|
12
10
|
from ...logging import get_logger
|
11
|
+
from ...utils import display, md_to_html
|
13
12
|
from ..test.result_wrapper import FailedResultWrapper
|
14
13
|
from .test_suite import TestSuiteSection, TestSuiteTest
|
15
14
|
|
@@ -36,7 +35,7 @@ class TestSuiteSectionSummary:
|
|
36
35
|
self._build_summary()
|
37
36
|
|
38
37
|
def _add_description(self):
|
39
|
-
description = f'<div class="result">{
|
38
|
+
description = f'<div class="result">{md_to_html(self.description)}</div>'
|
40
39
|
self._widgets.append(widgets.HTML(value=description))
|
41
40
|
|
42
41
|
def _add_tests_summary(self):
|
@@ -101,7 +100,7 @@ class TestSuiteSummary:
|
|
101
100
|
self._widgets.append(widgets.HTML(value=results_link))
|
102
101
|
|
103
102
|
def _add_description(self):
|
104
|
-
description = f'<div class="result">{
|
103
|
+
description = f'<div class="result">{md_to_html(self.description)}</div>'
|
105
104
|
self._widgets.append(widgets.HTML(value=description))
|
106
105
|
|
107
106
|
def _add_sections_summary(self):
|
@@ -1,14 +1,13 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: validmind
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.2.2
|
4
4
|
Summary: ValidMind Developer Framework
|
5
5
|
License: Commercial License
|
6
6
|
Author: Andres Rodriguez
|
7
7
|
Author-email: andres@validmind.ai
|
8
|
-
Requires-Python: >=3.8,<3.12
|
8
|
+
Requires-Python: >=3.8.1,<3.12
|
9
9
|
Classifier: License :: Other/Proprietary License
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
11
|
-
Classifier: Programming Language :: Python :: 3.8
|
12
11
|
Classifier: Programming Language :: Python :: 3.9
|
13
12
|
Classifier: Programming Language :: Python :: 3.10
|
14
13
|
Classifier: Programming Language :: Python :: 3.11
|
@@ -26,6 +25,7 @@ Requires-Dist: evaluate (>=0.4.0,<0.5.0)
|
|
26
25
|
Requires-Dist: ipywidgets (>=8.0.6,<9.0.0)
|
27
26
|
Requires-Dist: kaleido (>=0.2.1,<0.3.0,!=0.2.1.post1)
|
28
27
|
Requires-Dist: langdetect (>=1.0.9,<2.0.0)
|
28
|
+
Requires-Dist: latex2mathml (>=3.77.0,<4.0.0)
|
29
29
|
Requires-Dist: levenshtein (>=0.21.1,<0.22.0) ; extra == "all" or extra == "llm"
|
30
30
|
Requires-Dist: llvmlite (>=0.42.0) ; python_version >= "3.12"
|
31
31
|
Requires-Dist: llvmlite ; python_version >= "3.8" and python_full_version <= "3.11.0"
|
@@ -43,6 +43,7 @@ Requires-Dist: polars (>=0.20.15,<0.21.0)
|
|
43
43
|
Requires-Dist: pycocoevalcap (>=1.2,<2.0) ; extra == "all" or extra == "llm"
|
44
44
|
Requires-Dist: pypmml (>=0.9.17,<0.10.0)
|
45
45
|
Requires-Dist: python-dotenv (>=0.20.0,<0.21.0)
|
46
|
+
Requires-Dist: ragas (>=0.1.7,<0.2.0)
|
46
47
|
Requires-Dist: rouge (>=1.0.1,<2.0.0)
|
47
48
|
Requires-Dist: rpy2 (>=3.5.10,<4.0.0) ; extra == "all" or extra == "r-support"
|
48
49
|
Requires-Dist: scikit-learn (>=1.0.2,<2.0.0)
|
@@ -55,6 +56,7 @@ Requires-Dist: sentry-sdk (>=1.24.0,<2.0.0)
|
|
55
56
|
Requires-Dist: shap (>=0.42.0,<0.43.0)
|
56
57
|
Requires-Dist: statsmodels (>=0.13.5,<0.14.0)
|
57
58
|
Requires-Dist: tabulate (>=0.8.9,<0.9.0)
|
59
|
+
Requires-Dist: textblob (>=0.18.0.post0,<0.19.0)
|
58
60
|
Requires-Dist: textstat (>=0.7.3,<0.8.0)
|
59
61
|
Requires-Dist: torch (>=1.10.0) ; extra == "all" or extra == "llm" or extra == "pytorch"
|
60
62
|
Requires-Dist: torchmetrics (>=1.1.1,<2.0.0) ; extra == "all" or extra == "llm"
|