validmind 2.5.25__py3-none-any.whl → 2.6.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. validmind/__init__.py +8 -17
  2. validmind/__version__.py +1 -1
  3. validmind/ai/test_descriptions.py +66 -85
  4. validmind/ai/test_result_description/context.py +2 -2
  5. validmind/ai/utils.py +26 -1
  6. validmind/api_client.py +43 -79
  7. validmind/client.py +5 -7
  8. validmind/client_config.py +1 -1
  9. validmind/datasets/__init__.py +1 -1
  10. validmind/datasets/classification/customer_churn.py +7 -5
  11. validmind/datasets/nlp/__init__.py +2 -2
  12. validmind/errors.py +6 -10
  13. validmind/html_templates/content_blocks.py +18 -16
  14. validmind/logging.py +21 -16
  15. validmind/tests/__init__.py +28 -5
  16. validmind/tests/__types__.py +186 -170
  17. validmind/tests/_store.py +7 -21
  18. validmind/tests/comparison.py +362 -0
  19. validmind/tests/data_validation/ACFandPACFPlot.py +44 -73
  20. validmind/tests/data_validation/ADF.py +49 -83
  21. validmind/tests/data_validation/AutoAR.py +59 -96
  22. validmind/tests/data_validation/AutoMA.py +59 -96
  23. validmind/tests/data_validation/AutoStationarity.py +66 -114
  24. validmind/tests/data_validation/ClassImbalance.py +48 -117
  25. validmind/tests/data_validation/DatasetDescription.py +180 -209
  26. validmind/tests/data_validation/DatasetSplit.py +50 -75
  27. validmind/tests/data_validation/DescriptiveStatistics.py +59 -85
  28. validmind/tests/data_validation/{DFGLSArch.py → DickeyFullerGLS.py} +44 -76
  29. validmind/tests/data_validation/Duplicates.py +21 -90
  30. validmind/tests/data_validation/EngleGrangerCoint.py +53 -75
  31. validmind/tests/data_validation/HighCardinality.py +32 -80
  32. validmind/tests/data_validation/HighPearsonCorrelation.py +29 -97
  33. validmind/tests/data_validation/IQROutliersBarPlot.py +63 -94
  34. validmind/tests/data_validation/IQROutliersTable.py +40 -80
  35. validmind/tests/data_validation/IsolationForestOutliers.py +41 -63
  36. validmind/tests/data_validation/KPSS.py +33 -81
  37. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +47 -95
  38. validmind/tests/data_validation/MissingValues.py +17 -58
  39. validmind/tests/data_validation/MissingValuesBarPlot.py +61 -87
  40. validmind/tests/data_validation/PhillipsPerronArch.py +56 -79
  41. validmind/tests/data_validation/RollingStatsPlot.py +50 -81
  42. validmind/tests/data_validation/SeasonalDecompose.py +102 -184
  43. validmind/tests/data_validation/Skewness.py +27 -64
  44. validmind/tests/data_validation/SpreadPlot.py +34 -57
  45. validmind/tests/data_validation/TabularCategoricalBarPlots.py +46 -65
  46. validmind/tests/data_validation/TabularDateTimeHistograms.py +23 -45
  47. validmind/tests/data_validation/TabularNumericalHistograms.py +27 -46
  48. validmind/tests/data_validation/TargetRateBarPlots.py +54 -93
  49. validmind/tests/data_validation/TimeSeriesFrequency.py +48 -133
  50. validmind/tests/data_validation/TimeSeriesHistogram.py +24 -3
  51. validmind/tests/data_validation/TimeSeriesLinePlot.py +29 -47
  52. validmind/tests/data_validation/TimeSeriesMissingValues.py +59 -135
  53. validmind/tests/data_validation/TimeSeriesOutliers.py +54 -171
  54. validmind/tests/data_validation/TooManyZeroValues.py +21 -70
  55. validmind/tests/data_validation/UniqueRows.py +23 -62
  56. validmind/tests/data_validation/WOEBinPlots.py +83 -109
  57. validmind/tests/data_validation/WOEBinTable.py +28 -69
  58. validmind/tests/data_validation/ZivotAndrewsArch.py +33 -75
  59. validmind/tests/data_validation/nlp/CommonWords.py +49 -57
  60. validmind/tests/data_validation/nlp/Hashtags.py +27 -49
  61. validmind/tests/data_validation/nlp/LanguageDetection.py +7 -13
  62. validmind/tests/data_validation/nlp/Mentions.py +32 -63
  63. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +89 -14
  64. validmind/tests/data_validation/nlp/Punctuations.py +63 -47
  65. validmind/tests/data_validation/nlp/Sentiment.py +4 -0
  66. validmind/tests/data_validation/nlp/StopWords.py +62 -91
  67. validmind/tests/data_validation/nlp/TextDescription.py +116 -159
  68. validmind/tests/data_validation/nlp/Toxicity.py +12 -4
  69. validmind/tests/decorator.py +33 -242
  70. validmind/tests/load.py +212 -153
  71. validmind/tests/model_validation/BertScore.py +13 -7
  72. validmind/tests/model_validation/BleuScore.py +4 -0
  73. validmind/tests/model_validation/ClusterSizeDistribution.py +24 -47
  74. validmind/tests/model_validation/ContextualRecall.py +3 -0
  75. validmind/tests/model_validation/FeaturesAUC.py +43 -74
  76. validmind/tests/model_validation/MeteorScore.py +3 -0
  77. validmind/tests/model_validation/RegardScore.py +5 -1
  78. validmind/tests/model_validation/RegressionResidualsPlot.py +54 -75
  79. validmind/tests/model_validation/embeddings/ClusterDistribution.py +10 -33
  80. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +11 -29
  81. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +19 -31
  82. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +40 -49
  83. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +29 -15
  84. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +25 -11
  85. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +28 -13
  86. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +67 -38
  87. validmind/tests/model_validation/embeddings/utils.py +53 -0
  88. validmind/tests/model_validation/ragas/AnswerCorrectness.py +37 -32
  89. validmind/tests/model_validation/ragas/{AspectCritique.py → AspectCritic.py} +33 -27
  90. validmind/tests/model_validation/ragas/ContextEntityRecall.py +44 -41
  91. validmind/tests/model_validation/ragas/ContextPrecision.py +40 -35
  92. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +133 -0
  93. validmind/tests/model_validation/ragas/ContextRecall.py +40 -35
  94. validmind/tests/model_validation/ragas/Faithfulness.py +42 -30
  95. validmind/tests/model_validation/ragas/NoiseSensitivity.py +59 -35
  96. validmind/tests/model_validation/ragas/{AnswerRelevance.py → ResponseRelevancy.py} +52 -41
  97. validmind/tests/model_validation/ragas/{AnswerSimilarity.py → SemanticSimilarity.py} +39 -34
  98. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +13 -16
  99. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +13 -16
  100. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +51 -89
  101. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +31 -61
  102. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +118 -83
  103. validmind/tests/model_validation/sklearn/CompletenessScore.py +13 -16
  104. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +62 -94
  105. validmind/tests/model_validation/sklearn/FeatureImportance.py +7 -8
  106. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -15
  107. validmind/tests/model_validation/sklearn/HomogeneityScore.py +12 -15
  108. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +23 -53
  109. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +60 -74
  110. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +16 -84
  111. validmind/tests/model_validation/sklearn/MinimumF1Score.py +22 -72
  112. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +29 -78
  113. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +52 -82
  114. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +51 -145
  115. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +60 -78
  116. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +130 -172
  117. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +26 -55
  118. validmind/tests/model_validation/sklearn/ROCCurve.py +43 -77
  119. validmind/tests/model_validation/sklearn/RegressionPerformance.py +41 -94
  120. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +47 -136
  121. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +164 -208
  122. validmind/tests/model_validation/sklearn/SilhouettePlot.py +54 -99
  123. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +50 -124
  124. validmind/tests/model_validation/sklearn/VMeasure.py +12 -15
  125. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +225 -281
  126. validmind/tests/model_validation/statsmodels/AutoARIMA.py +40 -45
  127. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +22 -47
  128. validmind/tests/model_validation/statsmodels/Lilliefors.py +17 -28
  129. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +37 -81
  130. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +37 -105
  131. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +62 -166
  132. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +57 -119
  133. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +20 -57
  134. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +47 -80
  135. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +2 -0
  136. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -2
  137. validmind/tests/output.py +120 -0
  138. validmind/tests/prompt_validation/Bias.py +55 -98
  139. validmind/tests/prompt_validation/Clarity.py +56 -99
  140. validmind/tests/prompt_validation/Conciseness.py +63 -101
  141. validmind/tests/prompt_validation/Delimitation.py +48 -89
  142. validmind/tests/prompt_validation/NegativeInstruction.py +62 -96
  143. validmind/tests/prompt_validation/Robustness.py +80 -121
  144. validmind/tests/prompt_validation/Specificity.py +61 -95
  145. validmind/tests/prompt_validation/ai_powered_test.py +2 -2
  146. validmind/tests/run.py +314 -496
  147. validmind/tests/test_providers.py +109 -79
  148. validmind/tests/utils.py +91 -0
  149. validmind/unit_metrics/__init__.py +16 -155
  150. validmind/unit_metrics/classification/F1.py +1 -0
  151. validmind/unit_metrics/classification/Precision.py +1 -0
  152. validmind/unit_metrics/classification/ROC_AUC.py +1 -0
  153. validmind/unit_metrics/classification/Recall.py +1 -0
  154. validmind/unit_metrics/regression/AdjustedRSquaredScore.py +1 -0
  155. validmind/unit_metrics/regression/GiniCoefficient.py +1 -0
  156. validmind/unit_metrics/regression/HuberLoss.py +1 -0
  157. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -0
  158. validmind/unit_metrics/regression/MeanAbsoluteError.py +1 -0
  159. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -0
  160. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -0
  161. validmind/unit_metrics/regression/MeanSquaredError.py +1 -0
  162. validmind/unit_metrics/regression/QuantileLoss.py +1 -0
  163. validmind/unit_metrics/regression/RSquaredScore.py +2 -1
  164. validmind/unit_metrics/regression/RootMeanSquaredError.py +1 -0
  165. validmind/utils.py +66 -17
  166. validmind/vm_models/__init__.py +2 -17
  167. validmind/vm_models/dataset/dataset.py +31 -4
  168. validmind/vm_models/figure.py +7 -37
  169. validmind/vm_models/model.py +3 -0
  170. validmind/vm_models/result/__init__.py +7 -0
  171. validmind/vm_models/result/result.jinja +21 -0
  172. validmind/vm_models/result/result.py +337 -0
  173. validmind/vm_models/result/utils.py +160 -0
  174. validmind/vm_models/test_suite/runner.py +16 -54
  175. validmind/vm_models/test_suite/summary.py +3 -3
  176. validmind/vm_models/test_suite/test.py +43 -77
  177. validmind/vm_models/test_suite/test_suite.py +8 -40
  178. validmind-2.6.8.dist-info/METADATA +137 -0
  179. {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/RECORD +182 -189
  180. validmind/tests/data_validation/AutoSeasonality.py +0 -190
  181. validmind/tests/metadata.py +0 -59
  182. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -176
  183. validmind/tests/model_validation/ragas/ContextUtilization.py +0 -161
  184. validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -80
  185. validmind/unit_metrics/composite.py +0 -238
  186. validmind/vm_models/test/metric.py +0 -98
  187. validmind/vm_models/test/metric_result.py +0 -61
  188. validmind/vm_models/test/output_template.py +0 -55
  189. validmind/vm_models/test/result_summary.py +0 -76
  190. validmind/vm_models/test/result_wrapper.py +0 -488
  191. validmind/vm_models/test/test.py +0 -103
  192. validmind/vm_models/test/threshold_test.py +0 -106
  193. validmind/vm_models/test/threshold_test_result.py +0 -75
  194. validmind/vm_models/test_context.py +0 -259
  195. validmind-2.5.25.dist-info/METADATA +0 -118
  196. {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/LICENSE +0 -0
  197. {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/WHEEL +0 -0
  198. {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/entry_points.txt +0 -0
validmind/__init__.py CHANGED
@@ -3,25 +3,19 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  """
6
- ValidMind’s Python Library is a library of developer tools and methods designed to automate
7
- the documentation and validation of your models.
6
+ The ValidMind Library is a suite of developer tools and methods designed to automate the documentation and validation of your models.
8
7
 
9
- The Library is designed to be model agnostic. If your model is built in Python, ValidMind's
10
- Python library will provide all the standard functionality without requiring your developers to rewrite any functions.
8
+ Designed to be model agnostic, the ValidMind Library provides all the standard functionality without requiring you to rewrite any functions as long as your model is built in Python.
11
9
 
12
- The Library provides a rich suite of documentation tools and test suites, from documenting
13
- descriptions of your dataset to testing your models for weak spots and overfit areas. The Library
14
- helps you automate the generation of model documentation by feeding the ValidMind platform with documentation
15
- artifacts and test results to the ValidMind platform.
10
+ With a rich array of documentation tools and test suites, from documenting descriptions of your datasets to testing your models for weak spots and overfit areas, the ValidMind Library helps you automate model documentation by feeding the ValidMind Platform with documentation artifacts and test results.
16
11
 
17
- To install the client library:
12
+ To install the ValidMind Library:
18
13
 
19
14
  ```bash
20
15
  pip install validmind
21
16
  ```
22
17
 
23
- To initialize the client library, paste the code snippet with the client integration details directly into your
24
- development source code, replacing this example with your own:
18
+ To initialize the ValidMind Library, paste the code snippet with the model identifier credentials directly into your development source code, replacing this example with your own:
25
19
 
26
20
  ```python
27
21
  import validmind as vm
@@ -34,9 +28,7 @@ vm.init(
34
28
  )
35
29
  ```
36
30
 
37
- After you have pasted the code snippet into your development source code and executed the code, the Python client
38
- library will register with ValidMind. You can now use the Library to document and test your models,
39
- and to upload to the ValidMind Platform.
31
+ After you have pasted the code snippet into your development source code and executed the code, the Python Library API will register with ValidMind. You can now use the ValidMind Library to document and test your models, and to upload to the ValidMind Platform.
40
32
  """
41
33
  import warnings
42
34
 
@@ -57,11 +49,11 @@ from .client import ( # noqa: E402
57
49
  run_documentation_tests,
58
50
  run_test_suite,
59
51
  )
60
- from .tests.decorator import metric, tags, tasks, test
52
+ from .tests.decorator import tags, tasks, test
61
53
 
62
54
  __all__ = [ # noqa
63
55
  "__version__",
64
- # Framework High Level API
56
+ # Python Library API
65
57
  "datasets",
66
58
  "errors",
67
59
  "get_test_suite",
@@ -69,7 +61,6 @@ __all__ = [ # noqa
69
61
  "init_dataset",
70
62
  "init_model",
71
63
  "init_r_model",
72
- "metric",
73
64
  "preview_template",
74
65
  "reload",
75
66
  "run_documentation_tests",
validmind/__version__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2.5.25"
1
+ __version__ = "2.6.8"
@@ -2,17 +2,20 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ import json
5
6
  import os
6
7
  import re
7
8
  from concurrent.futures import ThreadPoolExecutor
8
- from typing import Union
9
+ from typing import List, Optional, Union
9
10
 
10
11
  from jinja2 import Template
11
12
 
12
- from validmind.utils import md_to_html
13
-
14
13
  from ..client_config import client_config
15
14
  from ..logging import get_logger
15
+ from ..utils import NumpyEncoder, md_to_html, test_id_to_name
16
+ from ..vm_models.figure import Figure
17
+ from ..vm_models.result import ResultTable
18
+ from .utils import DescriptionFuture, get_client_and_model
16
19
 
17
20
  __executor = ThreadPoolExecutor()
18
21
  __prompt = None
@@ -20,10 +23,6 @@ __prompt = None
20
23
  logger = get_logger(__name__)
21
24
 
22
25
 
23
- AI_REVISION_NAME = "Generated by ValidMind AI"
24
- DEFAULT_REVISION_NAME = "Default Description"
25
-
26
-
27
26
  def _load_prompt():
28
27
  global __prompt
29
28
 
@@ -55,7 +54,6 @@ def prompt_to_message(role, prompt):
55
54
  if start > last_index:
56
55
  content.append({"type": "text", "text": prompt[last_index:start]})
57
56
 
58
- # Image
59
57
  content.append({"type": "image_url", "image_url": {"url": match.group(1)}})
60
58
 
61
59
  last_index = end
@@ -67,79 +65,66 @@ def prompt_to_message(role, prompt):
67
65
  return {"role": role, "content": content}
68
66
 
69
67
 
70
- class DescriptionFuture:
71
- """This will be immediately returned from generate_description so that
72
- the tests can continue to be run in parallel while the description is
73
- retrieved asynchronously.
74
-
75
- The value will be retrieved later and if its not ready yet, it should
76
- block until it is.
77
- """
78
-
79
- def __init__(self, future):
80
- self._future = future
81
-
82
- def get_description(self):
83
- if isinstance(self._future, str):
84
- description = self._future
85
- else:
86
- # This will block until the future is completed
87
- description = self._future.result()
88
-
89
- return md_to_html(description, mathml=True)
90
-
91
-
92
68
  def generate_description(
93
69
  test_id: str,
94
70
  test_description: str,
95
- test_summary: str,
71
+ tables: List[ResultTable] = None,
96
72
  metric: Union[float, int] = None,
97
- figures: list = None,
73
+ figures: List[Figure] = None,
74
+ title: Optional[str] = None,
98
75
  ):
99
76
  """Generate the description for the test results"""
100
- if not test_summary and not figures and not metric:
77
+ if not tables and not figures and not metric:
101
78
  raise ValueError(
102
- "No summary, unit metric or figures provided - cannot generate description"
79
+ "No tables, unit metric or figures provided - cannot generate description"
103
80
  )
104
81
 
105
- # TODO: fix circular import
106
- from validmind.ai.utils import get_client_and_model
82
+ # # TODO: fix circular import
83
+ # from validmind.ai.utils import get_client_and_model
107
84
 
108
85
  client, model = get_client_and_model()
109
86
 
110
87
  # get last part of test id
111
- test_name = test_id.split(".")[-1]
112
- # truncate the test description to save time
113
- test_description = (
114
- f"{test_description[:500]}..."
115
- if len(test_description) > 500
116
- else test_description
117
- )
118
-
119
- if metric:
120
- metric_summary = f"**Metric Value**: {metric}"
121
- if test_summary:
122
- test_summary = metric_summary + "\n" + test_summary
123
- else:
124
- test_summary = metric_summary
88
+ test_name = title or test_id.split(".")[-1]
89
+
90
+ # TODO: fully support metrics
91
+ if metric is not None:
92
+ tables = [] if not tables else tables
93
+ tables.append(
94
+ ResultTable(
95
+ data=[
96
+ {"Metric": test_id_to_name(test_id), "Value": metric},
97
+ ],
98
+ )
99
+ )
125
100
 
126
- figures = [] if test_summary else figures
101
+ if tables:
102
+ summary = "\n---\n".join(
103
+ [
104
+ json.dumps(table.serialize(), cls=NumpyEncoder, separators=(",", ":"))
105
+ for table in tables
106
+ ]
107
+ )
108
+ else:
109
+ summary = None
127
110
 
128
111
  input_data = {
129
112
  "test_name": test_name,
130
113
  "test_description": test_description,
131
- "summary": test_summary,
132
- "figures": [figure._get_b64_url() for figure in figures],
114
+ "title": title,
115
+ "summary": summary,
116
+ "figures": [figure._get_b64_url() for figure in ([] if tables else figures)],
133
117
  }
134
118
  system, user = _load_prompt()
135
119
 
120
+ messages = [
121
+ prompt_to_message("system", system.render(input_data)),
122
+ prompt_to_message("user", user.render(input_data)),
123
+ ]
136
124
  response = client.chat.completions.create(
137
125
  model=model,
138
126
  temperature=0.0,
139
- messages=[
140
- prompt_to_message("system", system.render(input_data)),
141
- prompt_to_message("user", user.render(input_data)),
142
- ],
127
+ messages=messages,
143
128
  )
144
129
 
145
130
  return response.choices[0].message.content
@@ -148,18 +133,20 @@ def generate_description(
148
133
  def background_generate_description(
149
134
  test_id: str,
150
135
  test_description: str,
151
- test_summary: str,
152
- figures: list = None,
136
+ tables: List[ResultTable] = None,
137
+ figures: List[Figure] = None,
153
138
  metric: Union[int, float] = None,
139
+ title: Optional[str] = None,
154
140
  ):
155
141
  def wrapped():
156
142
  try:
157
143
  return generate_description(
158
144
  test_id=test_id,
159
145
  test_description=test_description,
160
- test_summary=test_summary,
146
+ tables=tables,
161
147
  figures=figures,
162
148
  metric=metric,
149
+ title=title,
163
150
  )
164
151
  except Exception as e:
165
152
  logger.error(f"Failed to generate description: {e}")
@@ -169,14 +156,14 @@ def background_generate_description(
169
156
  return DescriptionFuture(__executor.submit(wrapped))
170
157
 
171
158
 
172
- def get_description_metadata(
173
- test_id,
174
- default_description,
175
- summary=None,
176
- figures=None,
177
- metric=None,
178
- prefix="metric_description",
179
- should_generate=True,
159
+ def get_result_description(
160
+ test_id: str,
161
+ test_description: str,
162
+ tables: List[ResultTable] = None,
163
+ figures: List[Figure] = None,
164
+ metric: Union[int, float] = None,
165
+ should_generate: bool = True,
166
+ title: Optional[str] = None,
180
167
  ):
181
168
  """Get Metadata Dictionary for a Test or Metric Result
182
169
 
@@ -184,24 +171,23 @@ def get_description_metadata(
184
171
  description and returns a metadata object that can be logged with the test results.
185
172
 
186
173
  By default, the description is generated by an LLM that will interpret the test
187
- results and provide a human-readable description. If the summary or figures are
174
+ results and provide a human-readable description. If the tables or figures are
188
175
  not provided, or the `VALIDMIND_LLM_DESCRIPTIONS_ENABLED` environment variable is
189
176
  set to `0` or `false` or no LLM has been configured, the default description will
190
177
  be used as the test result description.
191
178
 
192
- Note: Either the summary or figures must be provided to generate the description.
179
+ Note: Either the tables or figures must be provided to generate the description.
193
180
 
194
181
  Args:
195
182
  test_id (str): The test ID
196
- default_description (str): The default description for the test
197
- summary (Any): The test summary or results to interpret
183
+ test_description (str): The default description for the test
184
+ tables (Any): The test tables or results to interpret
198
185
  figures (List[Figure]): The figures to attach to the test suite result
199
186
  metric (Union[int, float]): Unit metrics attached to the test result
200
- prefix (str): The prefix to use for the content ID (Default: "metric_description")
201
187
  should_generate (bool): Whether to generate the description or not (Default: True)
202
188
 
203
189
  Returns:
204
- dict: The metadata object to be logged with the test results
190
+ str: The description to be logged with the test results
205
191
  """
206
192
  # Check the feature flag first, then the environment variable
207
193
  llm_descriptions_enabled = (
@@ -214,27 +200,22 @@ def get_description_metadata(
214
200
 
215
201
  if (
216
202
  should_generate
217
- and (summary or figures)
203
+ and (tables or figures)
218
204
  and llm_descriptions_enabled
219
205
  and is_configured()
220
206
  ):
221
- revision_name = AI_REVISION_NAME
222
-
223
207
  # get description future and set it as the description in the metadata
224
208
  # this will lazily retrieved so it can run in the background in parallel
225
209
  description = background_generate_description(
226
210
  test_id=test_id,
227
- test_description=default_description,
228
- test_summary=summary,
211
+ test_description=test_description,
212
+ tables=tables,
229
213
  figures=figures,
230
214
  metric=metric,
215
+ title=title,
231
216
  )
232
217
 
233
218
  else:
234
- revision_name = DEFAULT_REVISION_NAME
235
- description = md_to_html(default_description, mathml=True)
219
+ description = md_to_html(test_description, mathml=True)
236
220
 
237
- return {
238
- "content_id": f"{prefix}:{test_id}::{revision_name}",
239
- "text": description,
240
- }
221
+ return description
@@ -37,11 +37,11 @@ class Context:
37
37
  pass
38
38
 
39
39
  def load(self, input_data):
40
- # this task can accept a dict or a test result object from the library
40
+ # this task can accept a dict or a test result object from the ValidMind Library
41
41
  if isinstance(input_data, dict):
42
42
  return input_data
43
43
 
44
- # we are likely running outside of the library and need to convert
44
+ # we are likely running outside of the ValidMind Library and need to convert
45
45
  # the test result object to a dictionary
46
46
  test_result = input_data
47
47
 
validmind/ai/utils.py CHANGED
@@ -7,8 +7,8 @@ from urllib.parse import urljoin
7
7
 
8
8
  from openai import AzureOpenAI, Client, OpenAI
9
9
 
10
- from ..api_client import get_ai_key, get_api_host
11
10
  from ..logging import get_logger
11
+ from ..utils import md_to_html
12
12
 
13
13
  logger = get_logger(__name__)
14
14
 
@@ -19,6 +19,28 @@ __model = None
19
19
  __ack = None
20
20
 
21
21
 
22
+ class DescriptionFuture:
23
+ """This will be immediately returned from generate_description so that
24
+ the tests can continue to be run in parallel while the description is
25
+ retrieved asynchronously.
26
+
27
+ The value will be retrieved later and if its not ready yet, it should
28
+ block until it is.
29
+ """
30
+
31
+ def __init__(self, future):
32
+ self._future = future
33
+
34
+ def get_description(self):
35
+ if isinstance(self._future, str):
36
+ description = self._future
37
+ else:
38
+ # This will block until the future is completed
39
+ description = self._future.result()
40
+
41
+ return md_to_html(description, mathml=True)
42
+
43
+
22
44
  def get_client_and_model():
23
45
  """Get model and client to use for generating interpretations
24
46
 
@@ -58,6 +80,9 @@ def get_client_and_model():
58
80
 
59
81
  else:
60
82
  try:
83
+ # TODO: fix circular import
84
+ from ..api_client import get_ai_key, get_api_host
85
+
61
86
  response = get_ai_key()
62
87
  __client = Client(
63
88
  base_url=(
validmind/api_client.py CHANGED
@@ -23,9 +23,7 @@ from .client_config import client_config
23
23
  from .errors import MissingAPICredentialsError, MissingModelIdError, raise_api_error
24
24
  from .logging import get_logger, init_sentry, send_single_error
25
25
  from .utils import NumpyEncoder, run_async
26
- from .vm_models import Figure, MetricResult, ThresholdTestResults
27
-
28
- # TODO: can't import types from vm_models because of circular dependency
26
+ from .vm_models import Figure
29
27
 
30
28
  logger = get_logger(__name__)
31
29
 
@@ -50,6 +48,14 @@ def _close_session():
50
48
  loop.create_task(__api_session.close())
51
49
  else:
52
50
  loop.run_until_complete(__api_session.close())
51
+ except RuntimeError as e:
52
+ # ignore RuntimeError when closing the session from the main thread
53
+ if "no current event loop in thread" in str(e):
54
+ pass
55
+ elif "Event loop is closed" in str(e):
56
+ pass
57
+ else:
58
+ raise e
53
59
  except Exception as e:
54
60
  logger.exception("Error closing aiohttp session at exit: %s", e)
55
61
 
@@ -187,7 +193,7 @@ def init(
187
193
  api_secret: Optional[str] = None,
188
194
  api_host: Optional[str] = None,
189
195
  model: Optional[str] = None,
190
- monitoring=False,
196
+ monitoring: bool = False,
191
197
  ):
192
198
  """
193
199
  Initializes the API client instances and calls the /ping endpoint to ensure
@@ -202,7 +208,7 @@ def init(
202
208
  api_key (str, optional): The API key. Defaults to None.
203
209
  api_secret (str, optional): The API secret. Defaults to None.
204
210
  api_host (str, optional): The API host. Defaults to None.
205
- monitoring (str, optional): The ongoing monitoring flag. Defaults to False.
211
+ monitoring (bool): The ongoing monitoring flag. Defaults to False.
206
212
 
207
213
  Raises:
208
214
  ValueError: If the API key and secret are not provided
@@ -212,7 +218,7 @@ def init(
212
218
  if api_key == "...":
213
219
  # special case to detect when running a notebook placeholder (...)
214
220
  # will override with environment variables for easier local development
215
- api_host = api_key = api_secret = project = None
221
+ api_host = api_key = api_secret = project = model = None
216
222
 
217
223
  _model_cuid = project or model or os.getenv("VM_API_MODEL")
218
224
  if _model_cuid is None:
@@ -244,30 +250,7 @@ def reload():
244
250
  raise e
245
251
 
246
252
 
247
- async def log_figure(figure: Figure) -> Dict[str, Any]:
248
- """Logs a figure
249
-
250
- Args:
251
- figure (Figure): The Figure object wrapper
252
-
253
- Raises:
254
- Exception: If the API call fails
255
-
256
- Returns:
257
- dict: The response from the API
258
- """
259
- try:
260
- return await _post(
261
- "log_figure",
262
- data=figure.serialize(),
263
- files=figure.serialize_files(),
264
- )
265
- except Exception as e:
266
- logger.error("Error logging figure to ValidMind API")
267
- raise e
268
-
269
-
270
- async def get_metadata(content_id: str) -> Dict[str, Any]:
253
+ async def aget_metadata(content_id: str) -> Dict[str, Any]:
271
254
  """Gets a metadata object from ValidMind API.
272
255
 
273
256
  Args:
@@ -279,11 +262,10 @@ async def get_metadata(content_id: str) -> Dict[str, Any]:
279
262
  Returns:
280
263
  dict: Metadata object
281
264
  """
282
- # TODO: add a more accurate type hint/documentation
283
265
  return await _get(f"get_metadata/{content_id}")
284
266
 
285
267
 
286
- async def log_metadata(
268
+ async def alog_metadata(
287
269
  content_id: str,
288
270
  text: Optional[str] = None,
289
271
  _json: Optional[Dict[str, Any]] = None,
@@ -317,21 +299,11 @@ async def log_metadata(
317
299
  raise e
318
300
 
319
301
 
320
- async def log_metric_result(
321
- metric: MetricResult,
322
- inputs: List[str],
323
- output_template: str = None,
324
- section_id: str = None,
325
- position: int = None,
326
- ) -> Dict[str, Any]:
327
- """Logs metrics to ValidMind API.
302
+ async def alog_figure(figure: Figure) -> Dict[str, Any]:
303
+ """Logs a figure
328
304
 
329
305
  Args:
330
- metric (MetricResult): A MetricResult object
331
- inputs (list): A list of input keys (names) that were used to run the test
332
- output_template (str): The optional output template for the test
333
- section_id (str): The section ID add a test driven block to the documentation
334
- position (int): The position in the section to add the test driven block
306
+ figure (Figure): The Figure object wrapper
335
307
 
336
308
  Raises:
337
309
  Exception: If the API call fails
@@ -339,33 +311,19 @@ async def log_metric_result(
339
311
  Returns:
340
312
  dict: The response from the API
341
313
  """
342
- request_params = {}
343
- if section_id:
344
- request_params["section_id"] = section_id
345
- if position is not None:
346
- request_params["position"] = position
347
-
348
- metric_data = {
349
- **metric.serialize(),
350
- "inputs": inputs,
351
- }
352
- if output_template:
353
- metric_data["output_template"] = output_template
354
-
355
314
  try:
356
315
  return await _post(
357
- "log_metrics",
358
- params=request_params,
359
- data=json.dumps([metric_data], cls=NumpyEncoder, allow_nan=False),
316
+ "log_figure",
317
+ data=figure.serialize(),
318
+ files=figure.serialize_files(),
360
319
  )
361
320
  except Exception as e:
362
- logger.error("Error logging metrics to ValidMind API")
321
+ logger.error("Error logging figure to ValidMind API")
363
322
  raise e
364
323
 
365
324
 
366
- async def log_test_result(
367
- result: ThresholdTestResults,
368
- inputs: List[str],
325
+ async def alog_test_result(
326
+ result: Dict[str, Any],
369
327
  section_id: str = None,
370
328
  position: int = None,
371
329
  ) -> Dict[str, Any]:
@@ -375,8 +333,7 @@ async def log_test_result(
375
333
  can also be called directly if the user wants to run tests on their own.
376
334
 
377
335
  Args:
378
- result (validmind.ThresholdTestResults): A ThresholdTestResults object
379
- inputs (list): A list of input keys (names) that were used to run the test
336
+ result (dict): A dictionary representing the test result
380
337
  section_id (str, optional): The section ID add a test driven block to the documentation
381
338
  position (int): The position in the section to add the test driven block
382
339
 
@@ -391,16 +348,12 @@ async def log_test_result(
391
348
  request_params["section_id"] = section_id
392
349
  if position is not None:
393
350
  request_params["position"] = position
394
-
395
351
  try:
396
352
  return await _post(
397
353
  "log_test_results",
398
354
  params=request_params,
399
355
  data=json.dumps(
400
- {
401
- **result.serialize(),
402
- "inputs": inputs,
403
- },
356
+ result,
404
357
  cls=NumpyEncoder,
405
358
  allow_nan=False,
406
359
  ),
@@ -410,7 +363,9 @@ async def log_test_result(
410
363
  raise e
411
364
 
412
365
 
413
- def log_input(input_id: str, type: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
366
+ async def alog_input(
367
+ input_id: str, type: str, metadata: Dict[str, Any]
368
+ ) -> Dict[str, Any]:
414
369
  """Logs input information - internal use for now (don't expose via public API)
415
370
 
416
371
  Args:
@@ -425,8 +380,7 @@ def log_input(input_id: str, type: str, metadata: Dict[str, Any]) -> Dict[str, A
425
380
  dict: The response from the API
426
381
  """
427
382
  try:
428
- return run_async(
429
- _post,
383
+ return await _post(
430
384
  "log_input",
431
385
  data=json.dumps(
432
386
  {
@@ -443,9 +397,13 @@ def log_input(input_id: str, type: str, metadata: Dict[str, Any]) -> Dict[str, A
443
397
  raise e
444
398
 
445
399
 
400
+ def log_input(input_id: str, type: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
401
+ return run_async(alog_input, input_id, type, metadata)
402
+
403
+
446
404
  async def alog_metric(
447
405
  key: str,
448
- value: float,
406
+ value: Union[int, float],
449
407
  inputs: Optional[List[str]] = None,
450
408
  params: Optional[Dict[str, Any]] = None,
451
409
  recorded_at: Optional[str] = None,
@@ -454,8 +412,14 @@ async def alog_metric(
454
412
  if not key or not isinstance(key, str):
455
413
  raise ValueError("`key` must be a non-empty string")
456
414
 
457
- if not value or not isinstance(value, (int, float)):
458
- raise ValueError("`value` must be a scalar (int or float)")
415
+ if value is None:
416
+ raise ValueError("Must provide a value for the metric")
417
+
418
+ if not isinstance(value, (int, float)):
419
+ try:
420
+ value = float(value)
421
+ except (ValueError, TypeError):
422
+ raise ValueError("`value` must be a scalar (int or float)")
459
423
 
460
424
  try:
461
425
  return await _post(
@@ -489,7 +453,7 @@ def log_metric(
489
453
  Unit metrics are key-value pairs where the key is the metric name and the value is
490
454
  a scalar (int or float). These key-value pairs are associated with the currently
491
455
  selected model (inventory model in the ValidMind Platform) and keys can be logged
492
- to over time to create a history of the metric. On the platform, these metrics
456
+ to over time to create a history of the metric. On the ValidMind Platform, these metrics
493
457
  will be used to create plots/visualizations for documentation and dashboards etc.
494
458
 
495
459
  Args: