validmind 2.5.6__py3-none-any.whl → 2.5.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/test_descriptions.py +26 -7
  3. validmind/api_client.py +89 -43
  4. validmind/client.py +2 -2
  5. validmind/client_config.py +11 -14
  6. validmind/datasets/regression/fred_timeseries.py +67 -138
  7. validmind/template.py +1 -0
  8. validmind/test_suites/__init__.py +0 -2
  9. validmind/test_suites/statsmodels_timeseries.py +1 -1
  10. validmind/test_suites/summarization.py +0 -1
  11. validmind/test_suites/time_series.py +0 -43
  12. validmind/tests/__types__.py +3 -13
  13. validmind/tests/data_validation/ACFandPACFPlot.py +15 -13
  14. validmind/tests/data_validation/ADF.py +31 -24
  15. validmind/tests/data_validation/AutoAR.py +9 -9
  16. validmind/tests/data_validation/AutoMA.py +23 -16
  17. validmind/tests/data_validation/AutoSeasonality.py +18 -16
  18. validmind/tests/data_validation/AutoStationarity.py +21 -16
  19. validmind/tests/data_validation/BivariateScatterPlots.py +67 -96
  20. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +82 -124
  21. validmind/tests/data_validation/ClassImbalance.py +15 -12
  22. validmind/tests/data_validation/DFGLSArch.py +19 -13
  23. validmind/tests/data_validation/DatasetDescription.py +17 -11
  24. validmind/tests/data_validation/DatasetSplit.py +7 -5
  25. validmind/tests/data_validation/DescriptiveStatistics.py +28 -21
  26. validmind/tests/data_validation/Duplicates.py +33 -25
  27. validmind/tests/data_validation/EngleGrangerCoint.py +35 -33
  28. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +59 -71
  29. validmind/tests/data_validation/HighCardinality.py +19 -12
  30. validmind/tests/data_validation/HighPearsonCorrelation.py +27 -22
  31. validmind/tests/data_validation/IQROutliersBarPlot.py +13 -10
  32. validmind/tests/data_validation/IQROutliersTable.py +40 -36
  33. validmind/tests/data_validation/IsolationForestOutliers.py +21 -14
  34. validmind/tests/data_validation/KPSS.py +34 -29
  35. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +22 -15
  36. validmind/tests/data_validation/MissingValues.py +32 -27
  37. validmind/tests/data_validation/MissingValuesBarPlot.py +25 -21
  38. validmind/tests/data_validation/PearsonCorrelationMatrix.py +71 -84
  39. validmind/tests/data_validation/PhillipsPerronArch.py +37 -30
  40. validmind/tests/data_validation/RollingStatsPlot.py +31 -23
  41. validmind/tests/data_validation/ScatterPlot.py +63 -78
  42. validmind/tests/data_validation/SeasonalDecompose.py +38 -34
  43. validmind/tests/data_validation/Skewness.py +35 -37
  44. validmind/tests/data_validation/SpreadPlot.py +35 -35
  45. validmind/tests/data_validation/TabularCategoricalBarPlots.py +23 -17
  46. validmind/tests/data_validation/TabularDateTimeHistograms.py +21 -13
  47. validmind/tests/data_validation/TabularDescriptionTables.py +51 -16
  48. validmind/tests/data_validation/TabularNumericalHistograms.py +25 -22
  49. validmind/tests/data_validation/TargetRateBarPlots.py +21 -14
  50. validmind/tests/data_validation/TimeSeriesDescription.py +25 -18
  51. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +23 -17
  52. validmind/tests/data_validation/TimeSeriesFrequency.py +24 -17
  53. validmind/tests/data_validation/TimeSeriesHistogram.py +33 -32
  54. validmind/tests/data_validation/TimeSeriesLinePlot.py +17 -10
  55. validmind/tests/data_validation/TimeSeriesMissingValues.py +15 -10
  56. validmind/tests/data_validation/TimeSeriesOutliers.py +37 -33
  57. validmind/tests/data_validation/TooManyZeroValues.py +16 -11
  58. validmind/tests/data_validation/UniqueRows.py +11 -6
  59. validmind/tests/data_validation/WOEBinPlots.py +23 -16
  60. validmind/tests/data_validation/WOEBinTable.py +35 -30
  61. validmind/tests/data_validation/ZivotAndrewsArch.py +34 -28
  62. validmind/tests/data_validation/nlp/CommonWords.py +21 -14
  63. validmind/tests/data_validation/nlp/Hashtags.py +27 -20
  64. validmind/tests/data_validation/nlp/LanguageDetection.py +33 -14
  65. validmind/tests/data_validation/nlp/Mentions.py +21 -15
  66. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +32 -9
  67. validmind/tests/data_validation/nlp/Punctuations.py +24 -20
  68. validmind/tests/data_validation/nlp/Sentiment.py +27 -8
  69. validmind/tests/data_validation/nlp/StopWords.py +26 -19
  70. validmind/tests/data_validation/nlp/TextDescription.py +36 -35
  71. validmind/tests/data_validation/nlp/Toxicity.py +32 -9
  72. validmind/tests/decorator.py +81 -42
  73. validmind/tests/model_validation/BertScore.py +36 -27
  74. validmind/tests/model_validation/BleuScore.py +25 -19
  75. validmind/tests/model_validation/ClusterSizeDistribution.py +38 -34
  76. validmind/tests/model_validation/ContextualRecall.py +35 -13
  77. validmind/tests/model_validation/FeaturesAUC.py +32 -13
  78. validmind/tests/model_validation/MeteorScore.py +46 -33
  79. validmind/tests/model_validation/ModelMetadata.py +32 -64
  80. validmind/tests/model_validation/ModelPredictionResiduals.py +75 -73
  81. validmind/tests/model_validation/RegardScore.py +30 -14
  82. validmind/tests/model_validation/RegressionResidualsPlot.py +10 -5
  83. validmind/tests/model_validation/RougeScore.py +36 -30
  84. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +30 -14
  85. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +27 -30
  86. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +68 -63
  87. validmind/tests/model_validation/TokenDisparity.py +31 -23
  88. validmind/tests/model_validation/ToxicityScore.py +26 -17
  89. validmind/tests/model_validation/embeddings/ClusterDistribution.py +24 -20
  90. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +30 -27
  91. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +7 -5
  92. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +32 -23
  93. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +7 -5
  94. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +15 -11
  95. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +29 -29
  96. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +34 -25
  97. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +38 -26
  98. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +40 -1
  99. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +18 -17
  100. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +40 -45
  101. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +17 -19
  102. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +29 -25
  103. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +38 -28
  104. validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -4
  105. validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
  106. validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
  107. validmind/tests/model_validation/ragas/AspectCritique.py +7 -0
  108. validmind/tests/model_validation/ragas/ContextEntityRecall.py +9 -8
  109. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -4
  110. validmind/tests/model_validation/ragas/ContextRecall.py +5 -4
  111. validmind/tests/model_validation/ragas/Faithfulness.py +5 -4
  112. validmind/tests/model_validation/ragas/utils.py +6 -0
  113. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +19 -12
  114. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +22 -17
  115. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +27 -25
  116. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +7 -5
  117. validmind/tests/model_validation/sklearn/ClusterPerformance.py +40 -78
  118. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +15 -17
  119. validmind/tests/model_validation/sklearn/CompletenessScore.py +17 -11
  120. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -15
  121. validmind/tests/model_validation/sklearn/FeatureImportance.py +95 -0
  122. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +7 -7
  123. validmind/tests/model_validation/sklearn/HomogeneityScore.py +19 -12
  124. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +35 -30
  125. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +10 -5
  126. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +32 -32
  127. validmind/tests/model_validation/sklearn/MinimumF1Score.py +23 -23
  128. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +15 -10
  129. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +26 -19
  130. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +38 -18
  131. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +31 -25
  132. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +8 -6
  133. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +24 -17
  134. validmind/tests/model_validation/sklearn/ROCCurve.py +12 -7
  135. validmind/tests/model_validation/sklearn/RegressionErrors.py +74 -130
  136. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +27 -12
  137. validmind/tests/model_validation/sklearn/{RegressionModelsPerformanceComparison.py → RegressionPerformance.py} +18 -20
  138. validmind/tests/model_validation/sklearn/RegressionR2Square.py +55 -93
  139. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +32 -13
  140. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +113 -73
  141. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +7 -5
  142. validmind/tests/model_validation/sklearn/SilhouettePlot.py +27 -19
  143. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +25 -18
  144. validmind/tests/model_validation/sklearn/VMeasure.py +14 -13
  145. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +7 -5
  146. validmind/tests/model_validation/statsmodels/AutoARIMA.py +24 -18
  147. validmind/tests/model_validation/statsmodels/BoxPierce.py +14 -10
  148. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +73 -104
  149. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +19 -12
  150. validmind/tests/model_validation/statsmodels/GINITable.py +44 -77
  151. validmind/tests/model_validation/statsmodels/JarqueBera.py +27 -22
  152. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +33 -34
  153. validmind/tests/model_validation/statsmodels/LJungBox.py +32 -28
  154. validmind/tests/model_validation/statsmodels/Lilliefors.py +27 -24
  155. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +87 -119
  156. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +100 -0
  157. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +14 -9
  158. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +17 -13
  159. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +46 -43
  160. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +38 -36
  161. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +30 -28
  162. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +18 -11
  163. validmind/tests/model_validation/statsmodels/RunsTest.py +32 -28
  164. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +75 -107
  165. validmind/tests/model_validation/statsmodels/ShapiroWilk.py +15 -8
  166. validmind/tests/ongoing_monitoring/FeatureDrift.py +10 -6
  167. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +31 -25
  168. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +29 -21
  169. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +31 -23
  170. validmind/tests/prompt_validation/Bias.py +14 -11
  171. validmind/tests/prompt_validation/Clarity.py +16 -14
  172. validmind/tests/prompt_validation/Conciseness.py +7 -5
  173. validmind/tests/prompt_validation/Delimitation.py +23 -22
  174. validmind/tests/prompt_validation/NegativeInstruction.py +7 -5
  175. validmind/tests/prompt_validation/Robustness.py +12 -10
  176. validmind/tests/prompt_validation/Specificity.py +13 -11
  177. validmind/tests/prompt_validation/ai_powered_test.py +6 -0
  178. validmind/tests/run.py +68 -23
  179. validmind/unit_metrics/__init__.py +81 -144
  180. validmind/unit_metrics/classification/{sklearn/Accuracy.py → Accuracy.py} +1 -1
  181. validmind/unit_metrics/classification/{sklearn/F1.py → F1.py} +1 -1
  182. validmind/unit_metrics/classification/{sklearn/Precision.py → Precision.py} +1 -1
  183. validmind/unit_metrics/classification/{sklearn/ROC_AUC.py → ROC_AUC.py} +1 -2
  184. validmind/unit_metrics/classification/{sklearn/Recall.py → Recall.py} +1 -1
  185. validmind/unit_metrics/regression/{sklearn/AdjustedRSquaredScore.py → AdjustedRSquaredScore.py} +1 -1
  186. validmind/unit_metrics/regression/GiniCoefficient.py +1 -1
  187. validmind/unit_metrics/regression/HuberLoss.py +1 -1
  188. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -1
  189. validmind/unit_metrics/regression/{sklearn/MeanAbsoluteError.py → MeanAbsoluteError.py} +1 -1
  190. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -1
  191. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -1
  192. validmind/unit_metrics/regression/{sklearn/MeanSquaredError.py → MeanSquaredError.py} +1 -1
  193. validmind/unit_metrics/regression/QuantileLoss.py +1 -1
  194. validmind/unit_metrics/regression/{sklearn/RSquaredScore.py → RSquaredScore.py} +1 -1
  195. validmind/unit_metrics/regression/{sklearn/RootMeanSquaredError.py → RootMeanSquaredError.py} +1 -1
  196. validmind/vm_models/dataset/dataset.py +2 -0
  197. validmind/vm_models/figure.py +5 -0
  198. validmind/vm_models/test/result_wrapper.py +93 -132
  199. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/METADATA +1 -1
  200. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/RECORD +203 -210
  201. validmind/tests/data_validation/ANOVAOneWayTable.py +0 -138
  202. validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -142
  203. validmind/tests/data_validation/BivariateHistograms.py +0 -117
  204. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -124
  205. validmind/tests/data_validation/MissingValuesRisk.py +0 -88
  206. validmind/tests/model_validation/ModelMetadataComparison.py +0 -59
  207. validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -83
  208. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -135
  209. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -103
  210. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/LICENSE +0 -0
  211. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/WHEEL +0 -0
  212. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/entry_points.txt +0 -0
validmind/__version__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2.5.6"
1
+ __version__ = "2.5.15"
@@ -4,9 +4,11 @@
4
4
 
5
5
  import os
6
6
  from concurrent.futures import ThreadPoolExecutor
7
+ from typing import Union
7
8
 
8
9
  from validmind.utils import md_to_html
9
10
 
11
+ from ..client_config import client_config
10
12
  from ..logging import get_logger
11
13
 
12
14
  __executor = ThreadPoolExecutor()
@@ -110,10 +112,11 @@ def generate_description(
110
112
  test_id: str,
111
113
  test_description: str,
112
114
  test_summary: str,
115
+ metric: Union[float, int] = None,
113
116
  figures: list = None,
114
117
  ):
115
118
  """Generate the description for the test results"""
116
- if not test_summary and not figures:
119
+ if not test_summary and not figures and not metric:
117
120
  raise ValueError("No summary or figures provided - cannot generate description")
118
121
 
119
122
  # TODO: fix circular import
@@ -130,6 +133,13 @@ def generate_description(
130
133
  else test_description
131
134
  )
132
135
 
136
+ if metric:
137
+ metric_summary = f"**Metric Value**: {metric}"
138
+ if test_summary:
139
+ test_summary = metric_summary + "\n" + test_summary
140
+ else:
141
+ test_summary = metric_summary
142
+
133
143
  if test_summary:
134
144
  logger.debug(
135
145
  f"Generating description for test {test_name} with stringified summary"
@@ -198,11 +208,16 @@ def background_generate_description(
198
208
  test_description: str,
199
209
  test_summary: str,
200
210
  figures: list = None,
211
+ metric: Union[int, float] = None,
201
212
  ):
202
213
  def wrapped():
203
214
  try:
204
215
  return generate_description(
205
- test_id, test_description, test_summary, figures
216
+ test_id=test_id,
217
+ test_description=test_description,
218
+ test_summary=test_summary,
219
+ figures=figures,
220
+ metric=metric,
206
221
  )
207
222
  except Exception as e:
208
223
  logger.error(f"Failed to generate description: {e}")
@@ -217,6 +232,7 @@ def get_description_metadata(
217
232
  default_description,
218
233
  summary=None,
219
234
  figures=None,
235
+ metric=None,
220
236
  prefix="metric_description",
221
237
  should_generate=True,
222
238
  ):
@@ -238,16 +254,18 @@ def get_description_metadata(
238
254
  default_description (str): The default description for the test
239
255
  summary (Any): The test summary or results to interpret
240
256
  figures (List[Figure]): The figures to attach to the test suite result
257
+ metric (Union[int, float]): Unit metrics attached to the test result
241
258
  prefix (str): The prefix to use for the content ID (Default: "metric_description")
242
259
  should_generate (bool): Whether to generate the description or not (Default: True)
243
260
 
244
261
  Returns:
245
262
  dict: The metadata object to be logged with the test results
246
263
  """
247
- env_disabled = os.getenv("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "1") in [
248
- "0",
249
- "false",
250
- ]
264
+ # Check the feature flag first, then the environment variable
265
+ llm_descriptions_enabled = (
266
+ client_config.can_generate_llm_test_descriptions()
267
+ and os.getenv("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "1") not in ["0", "false"]
268
+ )
251
269
 
252
270
  # TODO: fix circular import
253
271
  from validmind.ai.utils import is_configured
@@ -255,7 +273,7 @@ def get_description_metadata(
255
273
  if (
256
274
  should_generate
257
275
  and (summary or figures)
258
- and not env_disabled
276
+ and llm_descriptions_enabled
259
277
  and is_configured()
260
278
  ):
261
279
  revision_name = AI_REVISION_NAME
@@ -267,6 +285,7 @@ def get_description_metadata(
267
285
  test_description=default_description,
268
286
  test_summary=summary,
269
287
  figures=figures,
288
+ metric=metric,
270
289
  )
271
290
 
272
291
  else:
validmind/api_client.py CHANGED
@@ -186,12 +186,24 @@ def __ping() -> Dict[str, Any]:
186
186
  client_config.project = client_info["project"]
187
187
  client_config.documentation_template = client_info.get("documentation_template", {})
188
188
  client_config.feature_flags = client_info.get("feature_flags", {})
189
+ client_config.model = client_info.get("model", {})
190
+ client_config.document_type = client_info.get(
191
+ "document_type", "model_documentation"
192
+ )
189
193
 
190
194
  if ack_connected:
191
- logger.info(
192
- f"Connected to ValidMind... Current Model: {client_config.project['name']}"
193
- f" ({client_config.project['cuid']})"
194
- )
195
+ if client_config.model:
196
+ logger.info(
197
+ f"🎉 Connected to ValidMind!\n"
198
+ f"📊 Model: {client_config.model.get('name', 'N/A')} "
199
+ f"(ID: {client_config.model.get('cuid', 'N/A')})\n"
200
+ f"📁 Document Type: {client_config.document_type}"
201
+ )
202
+ else:
203
+ logger.info(
204
+ f"Connected to ValidMind... Current Model: {client_config.project['name']}"
205
+ f" ({client_config.project['cuid']})"
206
+ )
195
207
 
196
208
 
197
209
  def reload():
@@ -331,32 +343,6 @@ async def log_figures(figures: List[Figure]) -> Dict[str, Any]:
331
343
  Returns:
332
344
  dict: The response from the API
333
345
  """
334
- # this actually slows things down - better to log them in parallel
335
- # if client_config.can_log_figures(): # check if the backend supports batch logging
336
- # try:
337
- # data = {}
338
- # files = {}
339
- # for figure in figures:
340
- # data.update(
341
- # {f"{k}-{figure.key}": v for k, v in figure.serialize().items()}
342
- # )
343
- # files.update(
344
- # {
345
- # f"{k}-{figure.key}": v
346
- # for k, v in figure.serialize_files().items()
347
- # }
348
- # )
349
-
350
- # return await _post(
351
- # "log_figures",
352
- # data=data,
353
- # files=files,
354
- # )
355
- # except Exception as e:
356
- # logger.error("Error logging figures to ValidMind API")
357
- # raise e
358
-
359
- # else:
360
346
  return await asyncio.gather(*[log_figure(figure) for figure in figures])
361
347
 
362
348
 
@@ -416,11 +402,11 @@ async def log_metrics(
416
402
  Returns:
417
403
  dict: The response from the API
418
404
  """
419
- params = {}
405
+ request_params = {}
420
406
  if section_id:
421
- params["section_id"] = section_id
407
+ request_params["section_id"] = section_id
422
408
  if position is not None:
423
- params["position"] = position
409
+ request_params["position"] = position
424
410
 
425
411
  data = []
426
412
 
@@ -430,7 +416,7 @@ async def log_metrics(
430
416
  "inputs": inputs,
431
417
  }
432
418
 
433
- if output_template and client_config.can_log_output_template():
419
+ if output_template:
434
420
  metric_data["output_template"] = output_template
435
421
 
436
422
  data.append(metric_data)
@@ -438,7 +424,7 @@ async def log_metrics(
438
424
  try:
439
425
  return await _post(
440
426
  "log_metrics",
441
- params=params,
427
+ params=request_params,
442
428
  data=json.dumps(data, cls=NumpyEncoder, allow_nan=False),
443
429
  )
444
430
  except Exception as e:
@@ -469,16 +455,16 @@ async def log_test_result(
469
455
  Returns:
470
456
  dict: The response from the API
471
457
  """
472
- params = {}
458
+ request_params = {}
473
459
  if section_id:
474
- params["section_id"] = section_id
460
+ request_params["section_id"] = section_id
475
461
  if position is not None:
476
- params["position"] = position
462
+ request_params["position"] = position
477
463
 
478
464
  try:
479
465
  return await _post(
480
466
  "log_test_results",
481
- params=params,
467
+ params=request_params,
482
468
  data=json.dumps(
483
469
  {
484
470
  **result.serialize(),
@@ -503,7 +489,7 @@ def log_test_results(
503
489
 
504
490
  Args:
505
491
  results (list): A list of ThresholdTestResults objects
506
- inputs (list): A list of input keys (names) that were used to run the test
492
+ inputs (list): A list of input IDs that were used to run the test
507
493
 
508
494
  Raises:
509
495
  Exception: If the API call fails
@@ -522,11 +508,11 @@ def log_test_results(
522
508
  return responses
523
509
 
524
510
 
525
- def log_input(name: str, type: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
511
+ def log_input(input_id: str, type: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
526
512
  """Logs input information - internal use for now (don't expose via public API)
527
513
 
528
514
  Args:
529
- name (str): The name of the input
515
+ input_id (str): The input_id of the input
530
516
  type (str): The type of the input
531
517
  metadata (dict): The metadata of the input
532
518
 
@@ -542,7 +528,7 @@ def log_input(name: str, type: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
542
528
  "log_input",
543
529
  data=json.dumps(
544
530
  {
545
- "name": name,
531
+ "name": input_id,
546
532
  "type": type,
547
533
  "metadata": metadata,
548
534
  },
@@ -555,6 +541,66 @@ def log_input(name: str, type: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
555
541
  raise e
556
542
 
557
543
 
544
+ async def alog_metric(
545
+ key: str,
546
+ value: float,
547
+ inputs: Optional[List[str]] = None,
548
+ params: Optional[Dict[str, Any]] = None,
549
+ recorded_at: Optional[str] = None,
550
+ ) -> None:
551
+ """See log_metric for details"""
552
+ if not key or not isinstance(key, str):
553
+ raise ValueError("`key` must be a non-empty string")
554
+
555
+ if not value or not isinstance(value, (int, float)):
556
+ raise ValueError("`value` must be a scalar (int or float)")
557
+
558
+ try:
559
+ return await _post(
560
+ "log_unit_metric",
561
+ data=json.dumps(
562
+ {
563
+ "key": key,
564
+ "value": value,
565
+ "inputs": inputs or [],
566
+ "params": params or {},
567
+ "recorded_at": recorded_at,
568
+ },
569
+ cls=NumpyEncoder,
570
+ allow_nan=False,
571
+ ),
572
+ )
573
+ except Exception as e:
574
+ logger.error("Error logging metric to ValidMind API")
575
+ raise e
576
+
577
+
578
+ def log_metric(
579
+ key: str,
580
+ value: float,
581
+ inputs: Optional[List[str]] = None,
582
+ params: Optional[Dict[str, Any]] = None,
583
+ recorded_at: Optional[str] = None,
584
+ ) -> None:
585
+ """Logs a unit metric
586
+
587
+ Unit metrics are key-value pairs where the key is the metric name and the value is
588
+ a scalar (int or float). These key-value pairs are associated with the currently
589
+ selected model (inventory model in the ValidMind Platform) and keys can be logged
590
+ to over time to create a history of the metric. On the platform, these metrics
591
+ will be used to create plots/visualizations for documentation and dashboards etc.
592
+
593
+ Args:
594
+ key (str): The metric key
595
+ value (float): The metric value
596
+ inputs (list, optional): A list of input IDs that were used to compute the metric.
597
+ params (dict, optional): Dictionary of parameters used to compute the metric.
598
+ recorded_at (str, optional): The timestamp of the metric. Server will use
599
+ current time if not provided.
600
+ """
601
+ run_async(alog_metric, key, value, inputs, params, recorded_at)
602
+
603
+
558
604
  def start_run() -> str:
559
605
  """Starts a new test run
560
606
 
validmind/client.py CHANGED
@@ -164,7 +164,7 @@ def init_dataset(
164
164
 
165
165
  if __log:
166
166
  log_input(
167
- name=input_id,
167
+ input_id=input_id,
168
168
  type="dataset",
169
169
  metadata=get_dataset_info(vm_dataset),
170
170
  )
@@ -265,7 +265,7 @@ def init_model(
265
265
 
266
266
  if __log:
267
267
  log_input(
268
- name=input_id,
268
+ input_id=input_id,
269
269
  type="model",
270
270
  metadata=metadata,
271
271
  )
@@ -18,7 +18,9 @@ class ClientConfig:
18
18
  """
19
19
 
20
20
  project: object
21
+ model: object
21
22
  feature_flags: dict
23
+ document_type: str
22
24
  documentation_template: object
23
25
  running_on_colab: bool = False
24
26
 
@@ -34,21 +36,16 @@ class ClientConfig:
34
36
  except ImportError:
35
37
  self.running_on_colab = False
36
38
 
37
- def is_json_plots_enabled(self):
38
- """
39
- Returns True if the JSON plots feature flag is enabled on the backend
40
- """
41
- return self.feature_flags.get("generate_json_plots", False)
42
-
43
- def can_log_figures(self):
44
- """Returns True if the client can log figures to the API"""
45
- return self.feature_flags.get("log_figures", False)
46
-
47
- def can_log_output_template(self):
48
- """Returns True if the client can log output templates to the API"""
49
- return self.feature_flags.get("output_templates", False)
39
+ def can_generate_llm_test_descriptions(self):
40
+ """Returns True if the client can generate LLM based test descriptions"""
41
+ return self.feature_flags.get("llm_test_descriptions", True)
50
42
 
51
43
 
52
44
  client_config = ClientConfig(
53
- project=None, feature_flags={}, documentation_template=None
45
+ project=None,
46
+ model=None,
47
+ feature_flags={},
48
+ document_type="model_documentation",
49
+ documentation_template=None,
50
+ running_on_colab=False,
54
51
  )
@@ -68,65 +68,56 @@ def convert_to_levels(diff_df, original_df, target_column):
68
68
  return levels_df
69
69
 
70
70
 
71
- def get_demo_test_config(test_suite=None):
71
+ def get_demo_test_config():
72
72
 
73
73
  default_config = {}
74
74
 
75
75
  default_config["validmind.data_validation.TimeSeriesDescription"] = {
76
- "inputs": {
77
- "dataset": "raw_ds",
76
+ "input_grid": {
77
+ "dataset": [
78
+ "raw_ds",
79
+ "train_diff_ds",
80
+ "test_diff_ds",
81
+ "train_ds",
82
+ "test_ds",
83
+ ],
78
84
  }
79
85
  }
80
86
  default_config["validmind.data_validation.TimeSeriesLinePlot"] = {
81
- "inputs": {
82
- "dataset": "raw_ds",
87
+ "input_grid": {
88
+ "dataset": ["raw_ds"],
83
89
  }
84
90
  }
85
91
  default_config["validmind.data_validation.TimeSeriesMissingValues"] = {
86
- "inputs": {
87
- "dataset": "raw_ds",
92
+ "input_grid": {
93
+ "dataset": [
94
+ "raw_ds",
95
+ "train_diff_ds",
96
+ "test_diff_ds",
97
+ "train_ds",
98
+ "test_ds",
99
+ ],
88
100
  }
89
101
  }
90
102
  default_config["validmind.data_validation.SeasonalDecompose"] = {
91
- "inputs": {
92
- "dataset": "raw_ds",
103
+ "input_grid": {
104
+ "dataset": ["raw_ds"],
93
105
  }
94
106
  }
95
- default_config[
96
- "validmind.data_validation.TimeSeriesDescriptiveStatistics:train_diff_data"
97
- ] = {
98
- "inputs": {
99
- "dataset": "train_diff_ds",
107
+ default_config["validmind.data_validation.TimeSeriesDescriptiveStatistics"] = {
108
+ "input_grid": {
109
+ "dataset": ["train_diff_ds", "test_diff_ds"],
100
110
  }
101
111
  }
102
- default_config[
103
- "validmind.data_validation.TimeSeriesDescriptiveStatistics:test_diff_data"
104
- ] = {
105
- "inputs": {
106
- "dataset": "test_diff_ds",
107
- }
108
- }
109
- default_config["validmind.data_validation.TimeSeriesOutliers:train_diff_data"] = {
110
- "inputs": {
111
- "dataset": "train_diff_ds",
112
+ default_config["validmind.data_validation.TimeSeriesOutliers"] = {
113
+ "input_grid": {
114
+ "dataset": ["train_diff_ds", "test_diff_ds"],
112
115
  },
113
116
  "params": {"zscore_threshold": 4},
114
117
  }
115
- default_config["validmind.data_validation.TimeSeriesOutliers:test_diff_data"] = {
116
- "inputs": {
117
- "dataset": "test_diff_ds",
118
- },
119
- "params": {"zscore_threshold": 4},
120
- }
121
- default_config["validmind.data_validation.TimeSeriesHistogram:train_diff_data"] = {
122
- "inputs": {
123
- "dataset": "train_diff_ds",
124
- },
125
- "params": {"nbins": 100},
126
- }
127
- default_config["validmind.data_validation.TimeSeriesHistogram:test_diff_data"] = {
128
- "inputs": {
129
- "dataset": "test_diff_ds",
118
+ default_config["validmind.data_validation.TimeSeriesHistogram"] = {
119
+ "input_grid": {
120
+ "dataset": ["train_diff_ds", "test_diff_ds"],
130
121
  },
131
122
  "params": {"nbins": 100},
132
123
  }
@@ -135,57 +126,37 @@ def get_demo_test_config(test_suite=None):
135
126
  "datasets": ["train_diff_ds", "test_diff_ds"],
136
127
  }
137
128
  }
138
- default_config["validmind.model_validation.ModelMetadataComparison"] = {
139
- "inputs": {
140
- "models": ["random_forests_model", "gradient_boosting_model"],
141
- }
142
- }
143
- default_config[
144
- "validmind.model_validation.sklearn.RegressionErrorsComparison:train_data"
145
- ] = {
146
- "inputs": {
147
- "datasets": ["train_ds", "train_ds"],
148
- "models": ["random_forests_model", "gradient_boosting_model"],
129
+ default_config["validmind.model_validation.ModelMetadata"] = {
130
+ "input_grid": {
131
+ "model": ["random_forests_model", "gradient_boosting_model"],
149
132
  }
150
133
  }
151
- default_config[
152
- "validmind.model_validation.sklearn.RegressionErrorsComparison:test_data"
153
- ] = {
154
- "inputs": {
155
- "datasets": ["test_ds", "test_ds"],
156
- "models": ["random_forests_model", "gradient_boosting_model"],
134
+ default_config["validmind.model_validation.sklearn.RegressionErrors"] = {
135
+ "input_grid": {
136
+ "dataset": ["train_ds", "test_ds"],
137
+ "model": ["random_forests_model", "gradient_boosting_model"],
157
138
  }
158
139
  }
159
- default_config[
160
- "validmind.model_validation.sklearn.RegressionR2SquareComparison:train_data"
161
- ] = {
162
- "inputs": {
163
- "datasets": ["train_ds", "train_ds"],
164
- "models": ["random_forests_model", "gradient_boosting_model"],
165
- }
166
- }
167
- default_config[
168
- "validmind.model_validation.sklearn.RegressionR2SquareComparison:test_data"
169
- ] = {
170
- "inputs": {
171
- "datasets": ["test_ds", "test_ds"],
172
- "models": ["random_forests_model", "gradient_boosting_model"],
140
+ default_config["validmind.model_validation.sklearn.RegressionR2Square"] = {
141
+ "input_grid": {
142
+ "dataset": ["train_ds", "test_ds"],
143
+ "model": ["random_forests_model", "gradient_boosting_model"],
173
144
  }
174
145
  }
175
146
  default_config[
176
147
  "validmind.model_validation.TimeSeriesR2SquareBySegments:train_data"
177
148
  ] = {
178
- "inputs": {
179
- "datasets": ["train_ds", "train_ds"],
180
- "models": ["random_forests_model", "gradient_boosting_model"],
149
+ "input_grid": {
150
+ "dataset": ["train_ds"],
151
+ "model": ["random_forests_model", "gradient_boosting_model"],
181
152
  }
182
153
  }
183
154
  default_config[
184
155
  "validmind.model_validation.TimeSeriesR2SquareBySegments:test_data"
185
156
  ] = {
186
- "inputs": {
187
- "datasets": ["test_ds", "test_ds"],
188
- "models": ["random_forests_model", "gradient_boosting_model"],
157
+ "input_grid": {
158
+ "dataset": ["test_ds"],
159
+ "model": ["random_forests_model", "gradient_boosting_model"],
189
160
  },
190
161
  "params": {
191
162
  "segments": {
@@ -194,78 +165,36 @@ def get_demo_test_config(test_suite=None):
194
165
  }
195
166
  },
196
167
  }
197
- default_config[
198
- "validmind.model_validation.TimeSeriesPredictionsPlot:train_data"
199
- ] = {
200
- "inputs": {
201
- "datasets": ["train_ds", "train_ds"],
202
- "models": ["random_forests_model", "gradient_boosting_model"],
203
- }
204
- }
205
- default_config["validmind.model_validation.TimeSeriesPredictionsPlot:test_data"] = {
206
- "inputs": {
207
- "datasets": ["test_ds", "test_ds"],
208
- "models": ["random_forests_model", "gradient_boosting_model"],
209
- }
210
- }
211
- default_config[
212
- "validmind.model_validation.TimeSeriesPredictionWithCI:random_forests_model"
213
- ] = {
214
- "inputs": {
215
- "dataset": "test_ds",
216
- "model": "random_forests_model",
217
- }
218
- }
219
- default_config[
220
- "validmind.model_validation.TimeSeriesPredictionWithCI:gradient_boosting_model"
221
- ] = {
222
- "inputs": {
223
- "dataset": "test_ds",
224
- "model": "gradient_boosting_model",
225
- }
226
- }
227
- default_config["validmind.model_validation.ModelPredictionResiduals:train_data"] = {
228
- "inputs": {
229
- "datasets": ["train_ds", "train_ds"],
230
- "models": ["random_forests_model", "gradient_boosting_model"],
168
+ default_config["validmind.model_validation.TimeSeriesPredictionsPlot"] = {
169
+ "input_grid": {
170
+ "dataset": ["train_ds", "test_ds"],
171
+ "model": ["random_forests_model", "gradient_boosting_model"],
231
172
  }
232
173
  }
233
- default_config["validmind.model_validation.ModelPredictionResiduals:test_data"] = {
234
- "inputs": {
235
- "datasets": ["test_ds", "test_ds"],
236
- "models": ["random_forests_model", "gradient_boosting_model"],
237
- }
238
- }
239
- default_config[
240
- "validmind.model_validation.sklearn.FeatureImportanceComparison:train_data"
241
- ] = {
242
- "inputs": {
243
- "datasets": ["train_ds", "train_ds"],
244
- "models": ["random_forests_model", "gradient_boosting_model"],
174
+ default_config["validmind.model_validation.TimeSeriesPredictionWithCI"] = {
175
+ "input_grid": {
176
+ "dataset": ["train_ds", "test_ds"],
177
+ "model": ["random_forests_model", "gradient_boosting_model"],
245
178
  }
246
179
  }
247
- default_config[
248
- "validmind.model_validation.sklearn.FeatureImportanceComparison:test_data"
249
- ] = {
250
- "inputs": {
251
- "datasets": ["test_ds", "test_ds"],
252
- "models": ["random_forests_model", "gradient_boosting_model"],
180
+ default_config["validmind.model_validation.ModelPredictionResiduals"] = {
181
+ "input_grid": {
182
+ "dataset": ["train_ds", "test_ds"],
183
+ "model": ["random_forests_model", "gradient_boosting_model"],
253
184
  }
254
185
  }
255
- default_config[
256
- "validmind.model_validation.sklearn.PermutationFeatureImportance:random_forests_model"
257
- ] = {
258
- "inputs": {
259
- "dataset": "test_ds",
260
- "model": "random_forests_model",
186
+ default_config["validmind.model_validation.sklearn.FeatureImportance"] = {
187
+ "input_grid": {
188
+ "dataset": ["train_ds", "test_ds"],
189
+ "model": ["random_forests_model", "gradient_boosting_model"],
261
190
  }
262
191
  }
263
192
  default_config[
264
- "validmind.model_validation.sklearn.PermutationFeatureImportance:gradient_boosting_model"
193
+ "validmind.model_validation.sklearn.PermutationFeatureImportance"
265
194
  ] = {
266
- "inputs": {
267
- "dataset": "test_ds",
268
- "model": "gradient_boosting_model",
195
+ "input_grid": {
196
+ "dataset": ["train_ds", "test_ds"],
197
+ "model": ["random_forests_model", "gradient_boosting_model"],
269
198
  }
270
199
  }
271
200
 
validmind/template.py CHANGED
@@ -18,6 +18,7 @@ logger = get_logger(__name__)
18
18
  CONTENT_TYPE_MAP = {
19
19
  "test": "Threshold Test",
20
20
  "metric": "Metric",
21
+ "unit_metric": "Unit Metric",
21
22
  "metadata_text": "Metadata Text",
22
23
  "dynamic": "Dynamic Content",
23
24
  "text": "Text",
@@ -42,7 +42,6 @@ from .time_series import (
42
42
  TimeSeriesDataset,
43
43
  TimeSeriesModelValidation,
44
44
  TimeSeriesMultivariate,
45
- TimeSeriesSensitivity,
46
45
  TimeSeriesUnivariate,
47
46
  )
48
47
 
@@ -78,7 +77,6 @@ core_test_suites = {
78
77
  TimeSeriesDataset.suite_id: TimeSeriesDataset,
79
78
  TimeSeriesModelValidation.suite_id: TimeSeriesModelValidation,
80
79
  TimeSeriesMultivariate.suite_id: TimeSeriesMultivariate,
81
- TimeSeriesSensitivity.suite_id: TimeSeriesSensitivity,
82
80
  TimeSeriesUnivariate.suite_id: TimeSeriesUnivariate,
83
81
  }
84
82