validmind 2.5.25__py3-none-any.whl → 2.6.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. validmind/__init__.py +8 -17
  2. validmind/__version__.py +1 -1
  3. validmind/ai/test_descriptions.py +66 -85
  4. validmind/ai/test_result_description/context.py +2 -2
  5. validmind/ai/utils.py +26 -1
  6. validmind/api_client.py +43 -79
  7. validmind/client.py +5 -7
  8. validmind/client_config.py +1 -1
  9. validmind/datasets/__init__.py +1 -1
  10. validmind/datasets/classification/customer_churn.py +7 -5
  11. validmind/datasets/nlp/__init__.py +2 -2
  12. validmind/errors.py +6 -10
  13. validmind/html_templates/content_blocks.py +18 -16
  14. validmind/logging.py +21 -16
  15. validmind/tests/__init__.py +28 -5
  16. validmind/tests/__types__.py +186 -170
  17. validmind/tests/_store.py +7 -21
  18. validmind/tests/comparison.py +362 -0
  19. validmind/tests/data_validation/ACFandPACFPlot.py +44 -73
  20. validmind/tests/data_validation/ADF.py +49 -83
  21. validmind/tests/data_validation/AutoAR.py +59 -96
  22. validmind/tests/data_validation/AutoMA.py +59 -96
  23. validmind/tests/data_validation/AutoStationarity.py +66 -114
  24. validmind/tests/data_validation/ClassImbalance.py +48 -117
  25. validmind/tests/data_validation/DatasetDescription.py +180 -209
  26. validmind/tests/data_validation/DatasetSplit.py +50 -75
  27. validmind/tests/data_validation/DescriptiveStatistics.py +59 -85
  28. validmind/tests/data_validation/{DFGLSArch.py → DickeyFullerGLS.py} +44 -76
  29. validmind/tests/data_validation/Duplicates.py +21 -90
  30. validmind/tests/data_validation/EngleGrangerCoint.py +53 -75
  31. validmind/tests/data_validation/HighCardinality.py +32 -80
  32. validmind/tests/data_validation/HighPearsonCorrelation.py +29 -97
  33. validmind/tests/data_validation/IQROutliersBarPlot.py +63 -94
  34. validmind/tests/data_validation/IQROutliersTable.py +40 -80
  35. validmind/tests/data_validation/IsolationForestOutliers.py +41 -63
  36. validmind/tests/data_validation/KPSS.py +33 -81
  37. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +47 -95
  38. validmind/tests/data_validation/MissingValues.py +17 -58
  39. validmind/tests/data_validation/MissingValuesBarPlot.py +61 -87
  40. validmind/tests/data_validation/PhillipsPerronArch.py +56 -79
  41. validmind/tests/data_validation/RollingStatsPlot.py +50 -81
  42. validmind/tests/data_validation/SeasonalDecompose.py +102 -184
  43. validmind/tests/data_validation/Skewness.py +27 -64
  44. validmind/tests/data_validation/SpreadPlot.py +34 -57
  45. validmind/tests/data_validation/TabularCategoricalBarPlots.py +46 -65
  46. validmind/tests/data_validation/TabularDateTimeHistograms.py +23 -45
  47. validmind/tests/data_validation/TabularNumericalHistograms.py +27 -46
  48. validmind/tests/data_validation/TargetRateBarPlots.py +54 -93
  49. validmind/tests/data_validation/TimeSeriesFrequency.py +48 -133
  50. validmind/tests/data_validation/TimeSeriesHistogram.py +24 -3
  51. validmind/tests/data_validation/TimeSeriesLinePlot.py +29 -47
  52. validmind/tests/data_validation/TimeSeriesMissingValues.py +59 -135
  53. validmind/tests/data_validation/TimeSeriesOutliers.py +54 -171
  54. validmind/tests/data_validation/TooManyZeroValues.py +21 -70
  55. validmind/tests/data_validation/UniqueRows.py +23 -62
  56. validmind/tests/data_validation/WOEBinPlots.py +83 -109
  57. validmind/tests/data_validation/WOEBinTable.py +28 -69
  58. validmind/tests/data_validation/ZivotAndrewsArch.py +33 -75
  59. validmind/tests/data_validation/nlp/CommonWords.py +49 -57
  60. validmind/tests/data_validation/nlp/Hashtags.py +27 -49
  61. validmind/tests/data_validation/nlp/LanguageDetection.py +7 -13
  62. validmind/tests/data_validation/nlp/Mentions.py +32 -63
  63. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +89 -14
  64. validmind/tests/data_validation/nlp/Punctuations.py +63 -47
  65. validmind/tests/data_validation/nlp/Sentiment.py +4 -0
  66. validmind/tests/data_validation/nlp/StopWords.py +62 -91
  67. validmind/tests/data_validation/nlp/TextDescription.py +116 -159
  68. validmind/tests/data_validation/nlp/Toxicity.py +12 -4
  69. validmind/tests/decorator.py +33 -242
  70. validmind/tests/load.py +212 -153
  71. validmind/tests/model_validation/BertScore.py +13 -7
  72. validmind/tests/model_validation/BleuScore.py +4 -0
  73. validmind/tests/model_validation/ClusterSizeDistribution.py +24 -47
  74. validmind/tests/model_validation/ContextualRecall.py +3 -0
  75. validmind/tests/model_validation/FeaturesAUC.py +43 -74
  76. validmind/tests/model_validation/MeteorScore.py +3 -0
  77. validmind/tests/model_validation/RegardScore.py +5 -1
  78. validmind/tests/model_validation/RegressionResidualsPlot.py +54 -75
  79. validmind/tests/model_validation/embeddings/ClusterDistribution.py +10 -33
  80. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +11 -29
  81. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +19 -31
  82. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +40 -49
  83. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +29 -15
  84. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +25 -11
  85. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +28 -13
  86. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +67 -38
  87. validmind/tests/model_validation/embeddings/utils.py +53 -0
  88. validmind/tests/model_validation/ragas/AnswerCorrectness.py +37 -32
  89. validmind/tests/model_validation/ragas/{AspectCritique.py → AspectCritic.py} +33 -27
  90. validmind/tests/model_validation/ragas/ContextEntityRecall.py +44 -41
  91. validmind/tests/model_validation/ragas/ContextPrecision.py +40 -35
  92. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +133 -0
  93. validmind/tests/model_validation/ragas/ContextRecall.py +40 -35
  94. validmind/tests/model_validation/ragas/Faithfulness.py +42 -30
  95. validmind/tests/model_validation/ragas/NoiseSensitivity.py +59 -35
  96. validmind/tests/model_validation/ragas/{AnswerRelevance.py → ResponseRelevancy.py} +52 -41
  97. validmind/tests/model_validation/ragas/{AnswerSimilarity.py → SemanticSimilarity.py} +39 -34
  98. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +13 -16
  99. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +13 -16
  100. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +51 -89
  101. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +31 -61
  102. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +118 -83
  103. validmind/tests/model_validation/sklearn/CompletenessScore.py +13 -16
  104. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +62 -94
  105. validmind/tests/model_validation/sklearn/FeatureImportance.py +7 -8
  106. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -15
  107. validmind/tests/model_validation/sklearn/HomogeneityScore.py +12 -15
  108. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +23 -53
  109. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +60 -74
  110. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +16 -84
  111. validmind/tests/model_validation/sklearn/MinimumF1Score.py +22 -72
  112. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +29 -78
  113. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +52 -82
  114. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +51 -145
  115. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +60 -78
  116. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +130 -172
  117. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +26 -55
  118. validmind/tests/model_validation/sklearn/ROCCurve.py +43 -77
  119. validmind/tests/model_validation/sklearn/RegressionPerformance.py +41 -94
  120. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +47 -136
  121. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +164 -208
  122. validmind/tests/model_validation/sklearn/SilhouettePlot.py +54 -99
  123. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +50 -124
  124. validmind/tests/model_validation/sklearn/VMeasure.py +12 -15
  125. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +225 -281
  126. validmind/tests/model_validation/statsmodels/AutoARIMA.py +40 -45
  127. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +22 -47
  128. validmind/tests/model_validation/statsmodels/Lilliefors.py +17 -28
  129. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +37 -81
  130. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +37 -105
  131. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +62 -166
  132. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +57 -119
  133. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +20 -57
  134. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +47 -80
  135. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +2 -0
  136. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -2
  137. validmind/tests/output.py +120 -0
  138. validmind/tests/prompt_validation/Bias.py +55 -98
  139. validmind/tests/prompt_validation/Clarity.py +56 -99
  140. validmind/tests/prompt_validation/Conciseness.py +63 -101
  141. validmind/tests/prompt_validation/Delimitation.py +48 -89
  142. validmind/tests/prompt_validation/NegativeInstruction.py +62 -96
  143. validmind/tests/prompt_validation/Robustness.py +80 -121
  144. validmind/tests/prompt_validation/Specificity.py +61 -95
  145. validmind/tests/prompt_validation/ai_powered_test.py +2 -2
  146. validmind/tests/run.py +314 -496
  147. validmind/tests/test_providers.py +109 -79
  148. validmind/tests/utils.py +91 -0
  149. validmind/unit_metrics/__init__.py +16 -155
  150. validmind/unit_metrics/classification/F1.py +1 -0
  151. validmind/unit_metrics/classification/Precision.py +1 -0
  152. validmind/unit_metrics/classification/ROC_AUC.py +1 -0
  153. validmind/unit_metrics/classification/Recall.py +1 -0
  154. validmind/unit_metrics/regression/AdjustedRSquaredScore.py +1 -0
  155. validmind/unit_metrics/regression/GiniCoefficient.py +1 -0
  156. validmind/unit_metrics/regression/HuberLoss.py +1 -0
  157. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -0
  158. validmind/unit_metrics/regression/MeanAbsoluteError.py +1 -0
  159. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -0
  160. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -0
  161. validmind/unit_metrics/regression/MeanSquaredError.py +1 -0
  162. validmind/unit_metrics/regression/QuantileLoss.py +1 -0
  163. validmind/unit_metrics/regression/RSquaredScore.py +2 -1
  164. validmind/unit_metrics/regression/RootMeanSquaredError.py +1 -0
  165. validmind/utils.py +66 -17
  166. validmind/vm_models/__init__.py +2 -17
  167. validmind/vm_models/dataset/dataset.py +31 -4
  168. validmind/vm_models/figure.py +7 -37
  169. validmind/vm_models/model.py +3 -0
  170. validmind/vm_models/result/__init__.py +7 -0
  171. validmind/vm_models/result/result.jinja +21 -0
  172. validmind/vm_models/result/result.py +337 -0
  173. validmind/vm_models/result/utils.py +160 -0
  174. validmind/vm_models/test_suite/runner.py +16 -54
  175. validmind/vm_models/test_suite/summary.py +3 -3
  176. validmind/vm_models/test_suite/test.py +43 -77
  177. validmind/vm_models/test_suite/test_suite.py +8 -40
  178. validmind-2.6.7.dist-info/METADATA +137 -0
  179. {validmind-2.5.25.dist-info → validmind-2.6.7.dist-info}/RECORD +182 -189
  180. validmind/tests/data_validation/AutoSeasonality.py +0 -190
  181. validmind/tests/metadata.py +0 -59
  182. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -176
  183. validmind/tests/model_validation/ragas/ContextUtilization.py +0 -161
  184. validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -80
  185. validmind/unit_metrics/composite.py +0 -238
  186. validmind/vm_models/test/metric.py +0 -98
  187. validmind/vm_models/test/metric_result.py +0 -61
  188. validmind/vm_models/test/output_template.py +0 -55
  189. validmind/vm_models/test/result_summary.py +0 -76
  190. validmind/vm_models/test/result_wrapper.py +0 -488
  191. validmind/vm_models/test/test.py +0 -103
  192. validmind/vm_models/test/threshold_test.py +0 -106
  193. validmind/vm_models/test/threshold_test_result.py +0 -75
  194. validmind/vm_models/test_context.py +0 -259
  195. validmind-2.5.25.dist-info/METADATA +0 -118
  196. {validmind-2.5.25.dist-info → validmind-2.6.7.dist-info}/LICENSE +0 -0
  197. {validmind-2.5.25.dist-info → validmind-2.6.7.dist-info}/WHEEL +0 -0
  198. {validmind-2.5.25.dist-info → validmind-2.6.7.dist-info}/entry_points.txt +0 -0
@@ -1,488 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- """
6
- Result Wrappers for test and metric results
7
- """
8
- import asyncio
9
- import json
10
- from abc import ABC, abstractmethod
11
- from dataclasses import dataclass
12
- from typing import Dict, List, Optional, Union
13
-
14
- import pandas as pd
15
- from ipywidgets import HTML, GridBox, Layout, VBox
16
-
17
- from ... import api_client
18
- from ...ai.test_descriptions import AI_REVISION_NAME, DescriptionFuture
19
- from ...input_registry import input_registry
20
- from ...logging import get_logger
21
- from ...utils import NumpyEncoder, display, run_async, test_id_to_name
22
- from ..dataset import VMDataset
23
- from ..figure import Figure
24
- from .metric_result import MetricResult
25
- from .output_template import OutputTemplate
26
- from .result_summary import ResultSummary
27
- from .threshold_test_result import ThresholdTestResults
28
-
29
- logger = get_logger(__name__)
30
-
31
-
32
- async def update_metadata(content_id: str, text: str, _json: Union[Dict, List] = None):
33
- """Create or Update a Metadata Object"""
34
- parts = content_id.split("::")
35
- content_id = parts[0]
36
- revision_name = parts[1] if len(parts) > 1 else None
37
-
38
- # we always want composite metric definitions to be updated
39
- should_update = content_id.startswith("composite_metric_def:")
40
-
41
- # if we are updating a metric or test description, we check if the text
42
- # has changed from the last time it was logged, and only update if it has
43
- if content_id.split(":", 1)[0] in ["metric_description", "test_description"]:
44
- try:
45
- md = await api_client.get_metadata(content_id)
46
- # if there is an existing description, only update it if the new one
47
- # is different and is an AI-generated description
48
- should_update = (
49
- md["text"] != text if revision_name == AI_REVISION_NAME else False
50
- )
51
- logger.debug(f"Check if description has changed: {should_update}")
52
- except Exception:
53
- # if exception, assume its not created yet TODO: don't catch all
54
- should_update = True
55
-
56
- if should_update:
57
- if revision_name:
58
- content_id = f"{content_id}::{revision_name}"
59
-
60
- logger.debug(f"Updating metadata for `{content_id}`")
61
-
62
- await api_client.log_metadata(content_id, text, _json)
63
-
64
-
65
- def plot_figures(figures: List[Figure]) -> None:
66
- """Plot figures to a ipywidgets GridBox"""
67
- plots = [figure.to_widget() for figure in figures]
68
- num_columns = 2 if len(figures) > 1 else 1
69
-
70
- return GridBox(
71
- plots,
72
- layout=Layout(grid_template_columns=f"repeat({num_columns}, 1fr)"),
73
- )
74
-
75
-
76
- def _summary_tables_to_widget(summary: ResultSummary):
77
- """Convert summary (list of json tables) into ipywidgets"""
78
- widgets = []
79
-
80
- for table in summary.results:
81
- if table.metadata and table.metadata.title:
82
- widgets.append(HTML(f"<h4>{table.metadata.title}</h4>"))
83
-
84
- df_html = (
85
- pd.DataFrame(table.data)
86
- .style.format(precision=4)
87
- .hide(axis="index")
88
- .set_table_styles(
89
- [
90
- {
91
- "selector": "",
92
- "props": [("width", "100%")],
93
- },
94
- {
95
- "selector": "th",
96
- "props": [("text-align", "left")],
97
- },
98
- {
99
- "selector": "tbody tr:nth-child(even)",
100
- "props": [("background-color", "#FFFFFF")],
101
- },
102
- {
103
- "selector": "tbody tr:nth-child(odd)",
104
- "props": [("background-color", "#F5F5F5")],
105
- },
106
- {
107
- "selector": "td, th",
108
- "props": [
109
- ("padding-left", "5px"),
110
- ("padding-right", "5px"),
111
- ],
112
- },
113
- ]
114
- )
115
- .set_properties(**{"text-align": "left"})
116
- .to_html(escape=False)
117
- )
118
- widgets.append(HTML(df_html))
119
-
120
- return widgets
121
-
122
-
123
- @dataclass
124
- class ResultWrapper(ABC):
125
- """Base Class for test suite results"""
126
-
127
- name: str = "ResultWrapper"
128
- # id of the result, can be set by the subclass. This helps
129
- # looking up results later on
130
- result_id: str = None
131
- # Text description from test or metric (docstring usually)
132
- result_description: str = None
133
- # Text metadata about the result, can include description, etc.
134
- result_metadata: List[dict] = None
135
- # Output template to use for rendering the result
136
- output_template: Optional[str] = None
137
-
138
- def __str__(self) -> str:
139
- """May be overridden by subclasses"""
140
- return self.__class__.__name__
141
-
142
- @abstractmethod
143
- def to_widget(self):
144
- """Create an ipywdiget representation of the result... Must be overridden by subclasses"""
145
- raise NotImplementedError
146
-
147
- def render(self, output_template=None):
148
- """Helper method thats lets the user try out output templates"""
149
- if output_template:
150
- self.output_template = output_template
151
-
152
- return self.to_widget()
153
-
154
- def _validate_section_id_for_block(self, section_id: str, position: int = None):
155
- """
156
- Validate the section_id exits on the template before logging. We validate
157
- if the section exists and if the user provided position is within the bounds
158
- of the section. When the position is None, we assume it goes to the end of the section.
159
- """
160
- if section_id is None:
161
- return
162
-
163
- api_client.reload()
164
- found = False
165
- client_config = api_client.client_config
166
-
167
- for section in client_config.documentation_template["sections"]:
168
- if section["id"] == section_id:
169
- found = True
170
- break
171
-
172
- if not found:
173
- raise ValueError(
174
- f"Section with id {section_id} not found in the model's document"
175
- )
176
-
177
- # Check if the block already exists in the section
178
- block_definition = {
179
- "content_id": self.result_id,
180
- "content_type": (
181
- "metric" if isinstance(self, MetricResultWrapper) else "test"
182
- ),
183
- }
184
- blocks = section.get("contents", [])
185
- for block in blocks:
186
- if (
187
- block["content_id"] == block_definition["content_id"]
188
- and block["content_type"] == block_definition["content_type"]
189
- ):
190
- logger.info(
191
- f"Test driven block with content_id {block_definition['content_id']} already exists in the document's section"
192
- )
193
- return
194
-
195
- # Validate that the position is within the bounds of the section
196
- if position is not None:
197
- num_blocks = len(blocks)
198
- if position < 0 or position > num_blocks:
199
- raise ValueError(
200
- f"Invalid position {position}. Must be between 0 and {num_blocks}"
201
- )
202
-
203
- def show(self):
204
- """Display the result... May be overridden by subclasses"""
205
- display(self.to_widget())
206
-
207
- @abstractmethod
208
- async def log_async(self):
209
- """Log the result... Must be overridden by subclasses"""
210
- raise NotImplementedError
211
-
212
- def log(self, section_id: str = None, position: int = None):
213
- """Log the result... May be overridden by subclasses"""
214
-
215
- self._validate_section_id_for_block(section_id, position)
216
- run_async(self.log_async, section_id=section_id, position=position)
217
-
218
-
219
- @dataclass
220
- class FailedResultWrapper(ResultWrapper):
221
- """
222
- Result wrapper for test suites that fail to load or run properly
223
- """
224
-
225
- name: str = "Failed"
226
- error: Exception = None
227
- message: str = None
228
-
229
- def __repr__(self) -> str:
230
- return f'FailedResult(result_id="{self.result_id}")'
231
-
232
- def to_widget(self):
233
- return HTML(f"<h3 style='color: red;'>{self.message}</h3><p>{self.error}</p>")
234
-
235
- async def log_async(self):
236
- pass
237
-
238
-
239
- @dataclass
240
- class MetricResultWrapper(ResultWrapper):
241
- """
242
- Result wrapper for metrics that run as part of a test suite
243
- """
244
-
245
- name: str = "Metric"
246
- scalar: Optional[Union[int, float]] = None
247
- metric: Optional[MetricResult] = None
248
- figures: Optional[List[Figure]] = None
249
- inputs: List[str] = None # List of input ids
250
- params: Dict = None
251
-
252
- def __repr__(self) -> str:
253
- if self.metric:
254
- return f'{self.__class__.__name__}(result_id="{self.result_id}", metric, figures)'
255
- else:
256
- return f'{self.__class__.__name__}(result_id="{self.result_id}", figures)'
257
-
258
- def to_widget(self):
259
- if self.metric and self.metric.key == "dataset_description":
260
- return ""
261
-
262
- vbox_children = [
263
- HTML(f"<h1>{test_id_to_name(self.result_id)}</h1>"),
264
- ]
265
-
266
- if self.result_metadata:
267
- metric_description = self.result_metadata[0].get("text", "")
268
- if isinstance(metric_description, DescriptionFuture):
269
- metric_description = metric_description.get_description()
270
- self.result_metadata[0]["text"] = metric_description
271
-
272
- vbox_children.append(HTML(metric_description))
273
-
274
- if self.scalar is not None:
275
- vbox_children.append(
276
- HTML(
277
- "<h3>Unit Metrics</h3>"
278
- f"<p>{test_id_to_name(self.result_id)} "
279
- f"(<i>{self.result_id}</i>): "
280
- f"<code>{self.scalar}</code></p>"
281
- )
282
- )
283
-
284
- if self.metric:
285
- vbox_children.append(HTML("<h3>Tables</h3>"))
286
- if self.output_template:
287
- vbox_children.append(
288
- HTML(
289
- OutputTemplate(self.output_template).render(
290
- value=self.metric.value
291
- )
292
- )
293
- )
294
- elif self.metric.summary:
295
- vbox_children.extend(_summary_tables_to_widget(self.metric.summary))
296
-
297
- if self.figures:
298
- vbox_children.append(HTML("<h3>Plots</h3>"))
299
- plot_widgets = plot_figures(self.figures)
300
- vbox_children.append(plot_widgets)
301
-
302
- return VBox(vbox_children)
303
-
304
- def _get_filtered_summary(self):
305
- """Check if the metric summary has columns from input datasets with matching row counts."""
306
- dataset_columns = self._get_dataset_columns()
307
- filtered_results = []
308
-
309
- for table in self.metric.summary.results:
310
- table_columns = self._get_table_columns(table)
311
- sensitive_columns = self._find_sensitive_columns(
312
- dataset_columns, table_columns
313
- )
314
-
315
- if sensitive_columns:
316
- self._log_sensitive_data_warning(sensitive_columns)
317
- else:
318
- filtered_results.append(table)
319
-
320
- self.metric.summary.results = filtered_results
321
- return self.metric.summary
322
-
323
- def _get_dataset_columns(self):
324
- dataset_columns = {}
325
- for input_item in self.inputs:
326
- input_id = (
327
- input_item if isinstance(input_item, str) else input_item.input_id
328
- )
329
- input_obj = input_registry.get(input_id)
330
- if isinstance(input_obj, VMDataset):
331
- dataset_columns.update(
332
- {col: len(input_obj.df) for col in input_obj.columns}
333
- )
334
- return dataset_columns
335
-
336
- def _get_table_columns(self, table):
337
- if isinstance(table.data, pd.DataFrame):
338
- return {col: len(table.data) for col in table.data.columns}
339
- elif isinstance(table.data, list) and table.data:
340
- return {col: len(table.data) for col in table.data[0].keys()}
341
- else:
342
- raise ValueError("Invalid data type in summary table")
343
-
344
- def _find_sensitive_columns(self, dataset_columns, table_columns):
345
- return [
346
- col
347
- for col, row_count in table_columns.items()
348
- if col in dataset_columns and row_count == dataset_columns[col]
349
- ]
350
-
351
- def _log_sensitive_data_warning(self, sensitive_columns):
352
- logger.warning(
353
- "Sensitive data in metric summary table. Not logging to API automatically. "
354
- "Pass `unsafe=True` to result.log() method to override manually."
355
- )
356
- logger.warning(
357
- f"The following columns are present in the table with matching row counts: {sensitive_columns}"
358
- )
359
-
360
- async def log_async(
361
- self, section_id: str = None, position: int = None, unsafe=False
362
- ):
363
- tasks = [] # collect tasks to run in parallel (async)
364
-
365
- if self.scalar is not None:
366
- # scalars (unit metrics) are logged as key-value pairs associated with the inventory model
367
- tasks.append(
368
- api_client.alog_metric(
369
- key=self.result_id,
370
- value=self.scalar,
371
- inputs=self.inputs,
372
- params=self.params,
373
- )
374
- )
375
-
376
- if self.metric:
377
- if self.metric.summary and not unsafe:
378
- self.metric.summary = self._get_filtered_summary()
379
-
380
- tasks.append(
381
- api_client.log_metric_result(
382
- metric=self.metric,
383
- inputs=self.inputs,
384
- output_template=self.output_template,
385
- section_id=section_id,
386
- position=position,
387
- )
388
- )
389
-
390
- if self.figures:
391
- tasks.extend([api_client.log_figure(figure) for figure in self.figures])
392
-
393
- if hasattr(self, "result_metadata") and self.result_metadata:
394
- description = self.result_metadata[0].get("text", "")
395
- if isinstance(description, DescriptionFuture):
396
- description = description.get_description()
397
- self.result_metadata[0]["text"] = description
398
-
399
- for metadata in self.result_metadata:
400
- tasks.append(
401
- update_metadata(
402
- content_id=metadata["content_id"],
403
- text=metadata.get("text", ""),
404
- _json=metadata.get("json"),
405
- )
406
- )
407
-
408
- return await asyncio.gather(*tasks)
409
-
410
-
411
- @dataclass
412
- class ThresholdTestResultWrapper(ResultWrapper):
413
- """
414
- Result wrapper for test results produced by the tests that run as part of a test suite
415
- """
416
-
417
- name: str = "Threshold Test"
418
- figures: Optional[List[Figure]] = None
419
- test_results: ThresholdTestResults = None
420
- inputs: List[str] = None
421
-
422
- def __repr__(self) -> str:
423
- if self.test_results:
424
- return (
425
- f'{self.__class__.__name__}(result_id="{self.result_id}", test_results)'
426
- )
427
- else:
428
- return f'{self.__class__.__name__}(result_id="{self.result_id}", figures)'
429
-
430
- def to_widget(self):
431
- vbox_children = []
432
- description_html = []
433
-
434
- description_html.append(
435
- f"""
436
- <h1>{test_id_to_name(self.test_results.test_name)} {"✅" if self.test_results.passed else "❌"}</h1>
437
- """
438
- )
439
-
440
- if self.result_metadata:
441
- metric_description = self.result_metadata[0].get("text", "")
442
- if isinstance(metric_description, DescriptionFuture):
443
- metric_description = metric_description.get_description()
444
- self.result_metadata[0]["text"] = metric_description
445
-
446
- description_html.append(metric_description)
447
-
448
- test_params = json.dumps(self.test_results.params, cls=NumpyEncoder, indent=2)
449
- description_html.append(
450
- f"""
451
- <h4>Test Parameters</h4>
452
- <pre>{test_params}</pre>
453
- """
454
- )
455
-
456
- vbox_children.append(HTML("".join(description_html)))
457
-
458
- if self.test_results.summary:
459
- vbox_children.append(HTML("<h3>Tables</h3>"))
460
- vbox_children.extend(_summary_tables_to_widget(self.test_results.summary))
461
-
462
- if self.figures:
463
- vbox_children.append(HTML("<h3>Plots</h3>"))
464
- plot_widgets = plot_figures(self.figures)
465
- vbox_children.append(plot_widgets)
466
-
467
- return VBox(vbox_children)
468
-
469
- async def log_async(self, section_id: str = None, position: int = None):
470
- tasks = [
471
- api_client.log_test_result(
472
- self.test_results, self.inputs, section_id, position
473
- )
474
- ]
475
-
476
- if self.figures:
477
- tasks.extend([api_client.log_figure(figure) for figure in self.figures])
478
-
479
- if hasattr(self, "result_metadata") and self.result_metadata:
480
- description = self.result_metadata[0].get("text", "")
481
- if isinstance(description, DescriptionFuture):
482
- description = description.get_description()
483
- self.result_metadata[0]["text"] = description
484
-
485
- for metadata in self.result_metadata:
486
- tasks.append(update_metadata(metadata["content_id"], metadata["text"]))
487
-
488
- await asyncio.gather(*tasks)
@@ -1,103 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- """Base Class for Metric, ThresholdTest and any other test type"""
6
-
7
- from abc import abstractmethod
8
- from dataclasses import dataclass
9
- from inspect import getdoc
10
- from typing import ClassVar, List
11
- from uuid import uuid4
12
-
13
- from ..test_context import TestUtils
14
- from .result_wrapper import ResultWrapper
15
-
16
-
17
- @dataclass
18
- class Test(TestUtils):
19
- # Class Variables
20
- name: ClassVar[str] = "" # should be overridden by leaf classes
21
- test_type: ClassVar[str] # should be overridden by parent classes
22
- tasks: List[str] = None # should be overridden by leaf classes
23
- tags: List[str] = None # should be overridden by leaf classes
24
-
25
- required_inputs: ClassVar[List[str]] = None # should be overridden by leaf classes
26
- default_params: ClassVar[dict] = None # should be overridden by leaf classes
27
-
28
- # Instance Variables
29
- _ref_id: str = None # unique identifier (populated at init)
30
- _section_id: str = None # which section of template this test belongs to
31
- test_id: str = None # populated when loading tests from suites
32
- result: ResultWrapper = None # type should be overridden by parent classes
33
-
34
- params: dict = None # populated by test suite from user-passed config
35
-
36
- output_template: str = None # optional output template
37
-
38
- generate_description: bool = (
39
- True # whether to generate a description when caching result
40
- )
41
-
42
- def __post_init__(self):
43
- """
44
- Set default params if not provided
45
- """
46
- if not self.test_id:
47
- raise Exception(
48
- "test_id is missing. It must be passed when initializing the test"
49
- )
50
-
51
- self._ref_id = str(uuid4())
52
- self.key = (
53
- self.test_id
54
- ) # for backwards compatibility - figures really should get keyed automatically
55
-
56
- # TODO: add validation for required inputs
57
- if self.default_params is None:
58
- self.default_params = {}
59
- if self.required_inputs is None:
60
- self.required_inputs = []
61
- if self.tags is None:
62
- self.tags = []
63
- if self.tasks is None:
64
- self.tasks = []
65
-
66
- self.params = {
67
- **(self.default_params or {}),
68
- **(self.params if self.params is not None else {}),
69
- }
70
-
71
- def description(self):
72
- """
73
- Return the test description. May be overridden by subclasses. Defaults
74
- to returning the class' docstring
75
- """
76
- return getdoc(self).strip()
77
-
78
- @abstractmethod
79
- def summary(self, *args, **kwargs):
80
- """
81
- Return the summary. Should be overridden by subclasses.
82
- """
83
- raise NotImplementedError("base class method should not be called")
84
-
85
- @abstractmethod
86
- def run(self, *args, **kwargs):
87
- """
88
- Run the calculation and cache its results
89
- """
90
- raise NotImplementedError("base class method should not be called")
91
-
92
- @abstractmethod
93
- def cache_results(self, *args, **kwargs):
94
- """
95
- Cache the results of the calculation
96
- """
97
- raise NotImplementedError("base class method should not be called")
98
-
99
- def log(self):
100
- """
101
- Log the test results to ValidMind
102
- """
103
- return self.result.log()
@@ -1,106 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- """
6
- (Threshold)Test class wrapper. Our API exposes the concept of of a
7
- Test (as test_results) but we'll refer to it as a ThresholdTest to
8
- avoid confusion with the "tests" in the general data science/modeling sense.
9
- """
10
-
11
- from dataclasses import dataclass
12
- from typing import ClassVar, List, Optional
13
-
14
- from ...ai.test_descriptions import get_description_metadata
15
- from ..figure import Figure
16
- from .result_summary import ResultSummary, ResultTable
17
- from .result_wrapper import ThresholdTestResultWrapper
18
- from .test import Test
19
- from .threshold_test_result import ThresholdTestResult, ThresholdTestResults
20
-
21
-
22
- @dataclass
23
- class ThresholdTest(Test):
24
- """
25
- A threshold test is a combination of a metric/plot we track and a
26
- corresponding set of parameters and thresholds values that allow
27
- us to determine whether the metric/plot passes or fails.
28
- """
29
-
30
- # Class Variables
31
- test_type: ClassVar[str] = "ThresholdTest"
32
- category: ClassVar[str] # should be overridden by test classes
33
-
34
- # Instance Variables
35
- result: ThresholdTestResults = None # populated by cache_results() method
36
-
37
- def summary(self, results: Optional[List[ThresholdTestResult]], all_passed: bool):
38
- """
39
- Return the threshold test summary. May be overridden by subclasses. Defaults to showing
40
- a table with test_name(optional), column and passed.
41
-
42
- The test summary allows renderers (e.g. Word and ValidMind UI) to display a
43
- short summary of the test results.
44
- """
45
- if results is None:
46
- return None
47
-
48
- results_table = []
49
- for test_result in results:
50
- result_object = {
51
- "passed": test_result.passed,
52
- }
53
-
54
- if test_result.test_name is not None:
55
- result_object["test_name"] = test_result.test_name
56
- if test_result.column is not None:
57
- result_object["column"] = test_result.column
58
-
59
- results_table.append(result_object)
60
-
61
- return ResultSummary(results=[ResultTable(data=results_table)])
62
-
63
- def cache_results(
64
- self,
65
- test_results_list: List[ThresholdTestResult],
66
- passed: bool,
67
- figures: Optional[List[Figure]] = None,
68
- ):
69
- """
70
- Cache the individual results of the threshold test as a list of ThresholdTestResult objects
71
-
72
- Args:
73
- result (List[ThresholdTestResult]): The results of the threshold test
74
- passed (bool): Whether the threshold test passed or failed
75
-
76
- Returns:
77
- TestSuiteResult: The test suite result object
78
- """
79
- result_summary = self.summary(test_results_list, passed)
80
-
81
- self.result = ThresholdTestResultWrapper(
82
- result_id=self.test_id,
83
- result_description=self.description(),
84
- result_metadata=[
85
- get_description_metadata(
86
- test_id=self.test_id,
87
- default_description=self.description(),
88
- summary=result_summary.serialize(),
89
- figures=figures,
90
- prefix="test_description",
91
- should_generate=self.generate_description,
92
- )
93
- ],
94
- inputs=self.get_accessed_inputs(),
95
- test_results=ThresholdTestResults(
96
- test_name=self.test_id,
97
- ref_id=self._ref_id,
98
- params=self.params,
99
- passed=passed,
100
- results=test_results_list,
101
- summary=result_summary,
102
- ),
103
- figures=figures,
104
- )
105
-
106
- return self.result