validmind 2.2.6__py3-none-any.whl → 2.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/{ai.py → ai/test_descriptions.py} +74 -82
  3. validmind/ai/utils.py +104 -0
  4. validmind/api_client.py +58 -19
  5. validmind/client.py +5 -5
  6. validmind/models/foundation.py +10 -6
  7. validmind/models/function.py +3 -1
  8. validmind/models/metadata.py +1 -1
  9. validmind/test_suites/__init__.py +1 -7
  10. validmind/test_suites/regression.py +0 -16
  11. validmind/test_suites/statsmodels_timeseries.py +1 -1
  12. validmind/tests/data_validation/ACFandPACFPlot.py +36 -27
  13. validmind/tests/{model_validation/statsmodels → data_validation}/ADF.py +42 -13
  14. validmind/tests/data_validation/BivariateScatterPlots.py +38 -41
  15. validmind/tests/{model_validation/statsmodels → data_validation}/DFGLSArch.py +67 -11
  16. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +1 -1
  17. validmind/tests/data_validation/HighPearsonCorrelation.py +12 -3
  18. validmind/tests/data_validation/IsolationForestOutliers.py +2 -2
  19. validmind/tests/{model_validation/statsmodels → data_validation}/KPSS.py +64 -11
  20. validmind/tests/{model_validation/statsmodels → data_validation}/PhillipsPerronArch.py +65 -11
  21. validmind/tests/data_validation/ScatterPlot.py +1 -1
  22. validmind/tests/data_validation/SeasonalDecompose.py +12 -7
  23. validmind/tests/data_validation/TabularDateTimeHistograms.py +29 -33
  24. validmind/tests/data_validation/WOEBinPlots.py +1 -1
  25. validmind/tests/data_validation/WOEBinTable.py +1 -1
  26. validmind/tests/{model_validation/statsmodels → data_validation}/ZivotAndrewsArch.py +65 -11
  27. validmind/tests/data_validation/nlp/CommonWords.py +1 -1
  28. validmind/tests/data_validation/nlp/Hashtags.py +1 -1
  29. validmind/tests/data_validation/nlp/Mentions.py +1 -1
  30. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +2 -1
  31. validmind/tests/data_validation/nlp/Punctuations.py +1 -1
  32. validmind/tests/data_validation/nlp/Sentiment.py +1 -1
  33. validmind/tests/data_validation/nlp/TextDescription.py +5 -1
  34. validmind/tests/data_validation/nlp/Toxicity.py +1 -1
  35. validmind/tests/decorator.py +1 -1
  36. validmind/tests/model_validation/FeaturesAUC.py +5 -3
  37. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +4 -0
  38. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +4 -0
  39. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +4 -0
  40. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +4 -0
  41. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +4 -0
  42. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +4 -0
  43. validmind/tests/model_validation/ragas/AnswerCorrectness.py +3 -3
  44. validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
  45. validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
  46. validmind/tests/model_validation/ragas/AspectCritique.py +14 -8
  47. validmind/tests/model_validation/ragas/ContextEntityRecall.py +3 -4
  48. validmind/tests/model_validation/ragas/ContextPrecision.py +4 -5
  49. validmind/tests/model_validation/ragas/ContextRecall.py +3 -4
  50. validmind/tests/model_validation/ragas/ContextRelevancy.py +5 -4
  51. validmind/tests/model_validation/ragas/Faithfulness.py +6 -5
  52. validmind/tests/model_validation/ragas/utils.py +35 -9
  53. validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
  54. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +1 -1
  55. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +6 -8
  56. validmind/tests/model_validation/sklearn/RegressionErrors.py +1 -1
  57. validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +14 -8
  58. validmind/tests/model_validation/sklearn/RegressionR2Square.py +1 -1
  59. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +1 -1
  60. validmind/tests/model_validation/statsmodels/GINITable.py +1 -1
  61. validmind/tests/model_validation/statsmodels/JarqueBera.py +1 -1
  62. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +1 -1
  63. validmind/tests/model_validation/statsmodels/LJungBox.py +1 -1
  64. validmind/tests/model_validation/statsmodels/Lilliefors.py +1 -1
  65. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +4 -0
  66. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +9 -4
  67. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +2 -2
  68. validmind/tests/model_validation/statsmodels/RunsTest.py +1 -1
  69. validmind/tests/model_validation/statsmodels/ShapiroWilk.py +1 -1
  70. validmind/tests/prompt_validation/Bias.py +14 -11
  71. validmind/tests/prompt_validation/Clarity.py +14 -11
  72. validmind/tests/prompt_validation/Conciseness.py +14 -11
  73. validmind/tests/prompt_validation/Delimitation.py +14 -11
  74. validmind/tests/prompt_validation/NegativeInstruction.py +14 -11
  75. validmind/tests/prompt_validation/Robustness.py +11 -11
  76. validmind/tests/prompt_validation/Specificity.py +14 -11
  77. validmind/tests/prompt_validation/ai_powered_test.py +53 -75
  78. validmind/unit_metrics/composite.py +2 -1
  79. validmind/utils.py +4 -63
  80. validmind/vm_models/dataset/dataset.py +17 -3
  81. validmind/vm_models/dataset/utils.py +2 -2
  82. validmind/vm_models/model.py +1 -1
  83. validmind/vm_models/test/metric.py +1 -8
  84. validmind/vm_models/test/result_wrapper.py +2 -2
  85. validmind/vm_models/test/test.py +3 -0
  86. validmind/vm_models/test/threshold_test.py +1 -1
  87. validmind/vm_models/test_suite/runner.py +7 -4
  88. {validmind-2.2.6.dist-info → validmind-2.3.1.dist-info}/METADATA +1 -1
  89. {validmind-2.2.6.dist-info → validmind-2.3.1.dist-info}/RECORD +92 -101
  90. validmind/tests/data_validation/DefaultRatesbyRiskBandPlot.py +0 -114
  91. validmind/tests/data_validation/PiTCreditScoresHistogram.py +0 -150
  92. validmind/tests/data_validation/PiTPDHistogram.py +0 -152
  93. validmind/tests/model_validation/statsmodels/ADFTest.py +0 -88
  94. validmind/tests/model_validation/statsmodels/FeatureImportanceAndSignificance.py +0 -198
  95. validmind/tests/model_validation/statsmodels/PDRatingClassPlot.py +0 -151
  96. validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +0 -146
  97. validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +0 -144
  98. validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +0 -127
  99. validmind/tests/model_validation/statsmodels/ResidualsVisualInspection.py +0 -130
  100. {validmind-2.2.6.dist-info → validmind-2.3.1.dist-info}/LICENSE +0 -0
  101. {validmind-2.2.6.dist-info → validmind-2.3.1.dist-info}/WHEEL +0 -0
  102. {validmind-2.2.6.dist-info → validmind-2.3.1.dist-info}/entry_points.txt +0 -0
@@ -7,6 +7,7 @@ from typing import List
7
7
 
8
8
  import pandas as pd
9
9
 
10
+ from validmind.errors import MissingRequiredTestInputError
10
11
  from validmind.vm_models import (
11
12
  ResultSummary,
12
13
  ResultTable,
@@ -15,11 +16,16 @@ from validmind.vm_models import (
15
16
  ThresholdTestResult,
16
17
  )
17
18
 
18
- from .ai_powered_test import AIPoweredTest
19
+ from .ai_powered_test import (
20
+ call_model,
21
+ get_explanation,
22
+ get_score,
23
+ missing_prompt_message,
24
+ )
19
25
 
20
26
 
21
27
  @dataclass
22
- class NegativeInstruction(ThresholdTest, AIPoweredTest):
28
+ class NegativeInstruction(ThresholdTest):
23
29
  """
24
30
  Evaluates and grades the use of affirmative, proactive language over negative instructions in LLM prompts.
25
31
 
@@ -96,12 +102,6 @@ Prompt:
96
102
  """
97
103
  '''.strip()
98
104
 
99
- def __init__(self, *args, **kwargs):
100
- super().__init__(*args, **kwargs) # Call ThresholdTest.__init__
101
- AIPoweredTest.__init__(
102
- self, *args, **kwargs
103
- ) # Explicitly call AIPoweredTest.__init__
104
-
105
105
  def summary(self, results: List[ThresholdTestResult], all_passed: bool):
106
106
  result = results[0]
107
107
  results_table = [
@@ -125,14 +125,17 @@ Prompt:
125
125
  )
126
126
 
127
127
  def run(self):
128
- response = self.call_model(
128
+ if not hasattr(self.inputs.model, "prompt"):
129
+ raise MissingRequiredTestInputError(missing_prompt_message)
130
+
131
+ response = call_model(
129
132
  system_prompt=self.system_prompt,
130
133
  user_prompt=self.user_prompt.format(
131
134
  prompt_to_test=self.inputs.model.prompt.template
132
135
  ),
133
136
  )
134
- score = self.get_score(response)
135
- explanation = self.get_explanation(response)
137
+ score = get_score(response)
138
+ explanation = get_explanation(response)
136
139
 
137
140
  passed = score > self.params["min_threshold"]
138
141
  results = [
@@ -7,7 +7,7 @@ from typing import List
7
7
 
8
8
  import pandas as pd
9
9
 
10
- from validmind.errors import SkipTestError
10
+ from validmind.errors import MissingRequiredTestInputError, SkipTestError
11
11
  from validmind.vm_models import (
12
12
  ResultSummary,
13
13
  ResultTable,
@@ -16,11 +16,11 @@ from validmind.vm_models import (
16
16
  ThresholdTestResult,
17
17
  )
18
18
 
19
- from .ai_powered_test import AIPoweredTest
19
+ from .ai_powered_test import call_model, missing_prompt_message
20
20
 
21
21
 
22
22
  @dataclass
23
- class Robustness(ThresholdTest, AIPoweredTest):
23
+ class Robustness(ThresholdTest):
24
24
  """
25
25
  Assesses the robustness of prompts provided to a Large Language Model under varying conditions and contexts.
26
26
 
@@ -94,12 +94,6 @@ Prompt:
94
94
  Input:
95
95
  '''.strip()
96
96
 
97
- def __init__(self, *args, **kwargs):
98
- super().__init__(*args, **kwargs) # Call ThresholdTest.__init__
99
- AIPoweredTest.__init__(
100
- self, *args, **kwargs
101
- ) # Explicitly call AIPoweredTest.__init__
102
-
103
97
  def summary(self, results: List[ThresholdTestResult], all_passed: bool):
104
98
  results_table = [
105
99
  {
@@ -122,8 +116,14 @@ Input:
122
116
  )
123
117
 
124
118
  def run(self):
119
+ if not hasattr(self.inputs.model, "prompt"):
120
+ raise MissingRequiredTestInputError(missing_prompt_message)
121
+
125
122
  # TODO: add support for multi-variable prompts
126
- if len(self.inputs.model.prompt.variables) > 1:
123
+ if (
124
+ not self.inputs.model.prompt.variables
125
+ or len(self.inputs.model.prompt.variables) > 1
126
+ ):
127
127
  raise SkipTestError(
128
128
  "Robustness only supports single-variable prompts for now"
129
129
  )
@@ -138,7 +138,7 @@ Input:
138
138
  results = []
139
139
 
140
140
  for _ in range(self.params["num_tests"]):
141
- response = self.call_model(
141
+ response = call_model(
142
142
  system_prompt=self.system_prompt,
143
143
  user_prompt=self.user_prompt.format(
144
144
  variables="\n".join(self.inputs.model.prompt.variables),
@@ -7,6 +7,7 @@ from typing import List
7
7
 
8
8
  import pandas as pd
9
9
 
10
+ from validmind.errors import MissingRequiredTestInputError
10
11
  from validmind.vm_models import (
11
12
  ResultSummary,
12
13
  ResultTable,
@@ -15,11 +16,16 @@ from validmind.vm_models import (
15
16
  ThresholdTestResult,
16
17
  )
17
18
 
18
- from .ai_powered_test import AIPoweredTest
19
+ from .ai_powered_test import (
20
+ call_model,
21
+ get_explanation,
22
+ get_score,
23
+ missing_prompt_message,
24
+ )
19
25
 
20
26
 
21
27
  @dataclass
22
- class Specificity(ThresholdTest, AIPoweredTest):
28
+ class Specificity(ThresholdTest):
23
29
  """
24
30
  Evaluates and scores the specificity of prompts provided to a Large Language Model (LLM), based on clarity,
25
31
  detail, and relevance.
@@ -91,12 +97,6 @@ Prompt:
91
97
  """
92
98
  '''.strip()
93
99
 
94
- def __init__(self, *args, **kwargs):
95
- super().__init__(*args, **kwargs) # Call ThresholdTest.__init__
96
- AIPoweredTest.__init__(
97
- self, *args, **kwargs
98
- ) # Explicitly call AIPoweredTest.__init__
99
-
100
100
  def summary(self, results: List[ThresholdTestResult], all_passed: bool):
101
101
  result = results[0]
102
102
  results_table = [
@@ -120,14 +120,17 @@ Prompt:
120
120
  )
121
121
 
122
122
  def run(self):
123
- response = self.call_model(
123
+ if not hasattr(self.inputs.model, "prompt"):
124
+ raise MissingRequiredTestInputError(missing_prompt_message)
125
+
126
+ response = call_model(
124
127
  system_prompt=self.system_prompt,
125
128
  user_prompt=self.user_prompt.format(
126
129
  prompt_to_test=self.inputs.model.prompt.template
127
130
  ),
128
131
  )
129
- score = self.get_score(response)
130
- explanation = self.get_explanation(response)
132
+ score = get_score(response)
133
+ explanation = get_explanation(response)
131
134
 
132
135
  passed = score > self.params["min_threshold"]
133
136
  results = [
@@ -2,90 +2,68 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- import os
6
5
  import re
7
6
 
8
- from openai import AzureOpenAI, OpenAI
7
+ from validmind.ai.utils import get_client_and_model
8
+
9
+ missing_prompt_message = """
10
+ Cannot run prompt validation tests on a model with no prompt.
11
+ You can set a prompt when creating a vm_model object like this:
12
+ my_vm_model = vm.init_model(
13
+ predict_fn=call_model,
14
+ prompt=Prompt(
15
+ template="<your-prompt-here>",
16
+ variables=[],
17
+ ),
18
+ input_id="my_llm_model",
19
+ )
20
+ """
21
+
22
+
23
+ def call_model(
24
+ system_prompt: str, user_prompt: str, temperature: float = 0.0, seed: int = 42
25
+ ):
26
+ """Call LLM with the given prompts and return the response"""
27
+ client, model = get_client_and_model()
28
+
29
+ return (
30
+ client.chat.completions.create(
31
+ model=model,
32
+ messages=[
33
+ {"role": "system", "content": system_prompt},
34
+ {"role": "user", "content": user_prompt},
35
+ ],
36
+ temperature=temperature,
37
+ seed=seed,
38
+ )
39
+ .choices[0]
40
+ .message.content
41
+ )
9
42
 
10
43
 
11
- class AIPoweredTest:
12
- """
13
- Base class for tests powered by an LLM
14
- """
44
+ def get_score(response: str):
45
+ """Get just the score from the response string
46
+ TODO: use json response mode instead of this
15
47
 
16
- api_key = None
17
- client = None
18
- endpoint = None
19
- model_name = None
20
-
21
- def __init__(self, *args, **kwargs):
22
- if "OPENAI_API_KEY" in os.environ:
23
- self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
24
- self.model_name = os.getenv("VM_OPENAI_MODEL", "gpt-3.5-turbo")
25
-
26
- elif "AZURE_OPENAI_KEY" in os.environ:
27
- if "AZURE_OPENAI_ENDPOINT" not in os.environ:
28
- raise ValueError(
29
- "AZURE_OPENAI_ENDPOINT must be set to run LLM tests with Azure"
30
- )
31
-
32
- if "AZURE_OPENAI_MODEL" not in os.environ:
33
- raise ValueError(
34
- "AZURE_OPENAI_MODEL must be set to run LLM tests with Azure"
35
- )
36
-
37
- self.client = AzureOpenAI(
38
- azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
39
- api_key=os.getenv("AZURE_OPENAI_KEY"),
40
- api_version=os.getenv("AZURE_OPENAI_VERSION", "2023-05-15"),
41
- )
42
- self.model_name = os.getenv("AZURE_OPENAI_MODEL")
43
-
44
- else:
45
- raise ValueError(
46
- "OPENAI_API_KEY or AZURE_OPENAI_KEY must be set to run LLM tests"
47
- )
48
-
49
- def call_model(self, user_prompt: str, system_prompt: str = None):
50
- """
51
- Call an LLM with the passed prompts and return the response. We're using GPT4 for now.
52
- """
53
- return (
54
- self.client.chat.completions.create(
55
- model=self.model_name,
56
- messages=[
57
- {"role": "system", "content": system_prompt},
58
- {"role": "user", "content": user_prompt},
59
- ],
60
- temperature=0.0,
61
- seed=42,
62
- )
63
- .choices[0]
64
- .message.content
65
- )
66
-
67
- def get_score(self, response: str):
68
- """
69
- Get just the numeric data in the response string and convert it to an int
48
+ e.g. "Score: 8\nExplanation: <some-explanation>" -> 8
49
+ """
50
+ score = re.search(r"Score: (\d+)", response)
70
51
 
71
- e.g. "Score: 8\nExplanation: <some-explanation>" -> 8
72
- """
73
- score = re.search(r"Score: (\d+)", response)
52
+ if not score:
53
+ raise ValueError("Could not find score in response")
74
54
 
75
- if not score:
76
- raise ValueError("Could not find score in response")
55
+ return int(score.group(1))
77
56
 
78
- return int(score.group(1))
79
57
 
80
- def get_explanation(self, response: str):
81
- """
82
- Get just the explanation from the response string
58
+ def get_explanation(response: str):
59
+ """Get just the explanation from the response string
60
+ TODO: use json response mode instead of this
83
61
 
84
- e.g. "Score: 8\nExplanation: <some-explanation>" -> "<some-explanation>"
85
- """
86
- explanation = re.search(r"Explanation: (.+)", response, re.DOTALL)
62
+ e.g. "Score: 8\nExplanation: <some-explanation>" -> "<some-explanation>"
63
+ """
64
+ explanation = re.search(r"Explanation: (.+)", response, re.DOTALL)
87
65
 
88
- if not explanation:
89
- raise ValueError("Could not find explanation in response")
66
+ if not explanation:
67
+ raise ValueError("Could not find explanation in response")
90
68
 
91
- return explanation.group(1)
69
+ return explanation.group(1).strip().strip("`")
@@ -6,9 +6,10 @@ from dataclasses import dataclass
6
6
  from typing import List, Tuple, Union
7
7
  from uuid import uuid4
8
8
 
9
+ from ..ai.test_descriptions import get_description_metadata
9
10
  from ..logging import get_logger
10
11
  from ..tests.decorator import _inspect_signature
11
- from ..utils import get_description_metadata, run_async, test_id_to_name
12
+ from ..utils import run_async, test_id_to_name
12
13
  from ..vm_models.test.metric import Metric
13
14
  from ..vm_models.test.metric_result import MetricResult
14
15
  from ..vm_models.test.result_summary import ResultSummary, ResultTable
validmind/utils.py CHANGED
@@ -6,7 +6,6 @@ import asyncio
6
6
  import difflib
7
7
  import json
8
8
  import math
9
- import os
10
9
  import re
11
10
  import sys
12
11
  from platform import python_version
@@ -26,11 +25,8 @@ from matplotlib.axes._axes import _log as matplotlib_axes_logger
26
25
  from numpy import ndarray
27
26
  from tabulate import tabulate
28
27
 
29
- from .ai import background_generate_description, is_configured
30
28
  from .html_templates.content_blocks import math_jax_snippet, python_syntax_highlighting
31
-
32
- AI_REVISION_NAME = "Generated by ValidMind AI"
33
- DEFAULT_REVISION_NAME = "Default Description"
29
+ from .logging import get_logger
34
30
 
35
31
  DEFAULT_BIG_NUMBER_DECIMALS = 2
36
32
  DEFAULT_SMALL_NUMBER_DECIMALS = 4
@@ -53,6 +49,8 @@ params = {
53
49
  pylab.rcParams.update(params)
54
50
  #################################
55
51
 
52
+ logger = get_logger(__name__)
53
+
56
54
 
57
55
  def is_notebook() -> bool:
58
56
  """
@@ -310,7 +308,7 @@ def run_async_check(func, *args, **kwargs):
310
308
  if task.get_name() == name:
311
309
  return task
312
310
 
313
- return run_async(func, name=name, *args, **kwargs)
311
+ return run_async(func, name=name, *args, **kwargs) # noqa B026
314
312
 
315
313
  except RuntimeError:
316
314
  pass
@@ -460,60 +458,3 @@ def md_to_html(md: str, mathml=False) -> str:
460
458
  )
461
459
 
462
460
  return html
463
-
464
-
465
- def get_description_metadata(
466
- test_id,
467
- default_description,
468
- summary=None,
469
- figures=None,
470
- prefix="metric_description",
471
- ):
472
- """Get Metadata Dictionary for a Test or Metric Result
473
-
474
- Generates an LLM interpretation of the test results or uses the default
475
- description and returns a metadata object that can be logged with the test results.
476
-
477
- By default, the description is generated by an LLM that will interpret the test
478
- results and provide a human-readable description. If the summary or figures are
479
- not provided, or the `VALIDMIND_LLM_DESCRIPTIONS_ENABLED` environment variable is
480
- set to `0` or `false` or no LLM has been configured, the default description will
481
- be used as the test result description.
482
-
483
- Note: Either the summary or figures must be provided to generate the description.
484
-
485
- Args:
486
- test_id (str): The test ID
487
- default_description (str): The default description for the test
488
- summary (Any): The test summary or results to interpret
489
- figures (List[Figure]): The figures to attach to the test suite result
490
- prefix (str): The prefix to use for the content ID (Default: "metric_description")
491
-
492
- Returns:
493
- dict: The metadata object to be logged with the test results
494
- """
495
- env_disabled = os.getenv("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "1") in [
496
- "0",
497
- "false",
498
- ]
499
-
500
- if (summary or figures) and not env_disabled and is_configured():
501
- revision_name = AI_REVISION_NAME
502
-
503
- # get description future and set it as the description in the metadata
504
- # this will lazily retrieved so it can run in the background in parallel
505
- description = background_generate_description(
506
- test_id=test_id,
507
- test_description=default_description,
508
- test_summary=summary,
509
- figures=figures,
510
- )
511
-
512
- else:
513
- revision_name = DEFAULT_REVISION_NAME
514
- description = md_to_html(default_description, mathml=True)
515
-
516
- return {
517
- "content_id": f"{prefix}:{test_id}::{revision_name}",
518
- "text": description,
519
- }
@@ -195,7 +195,19 @@ class VMDataset:
195
195
  probability_column: str = None,
196
196
  probability_values: list = None,
197
197
  prediction_probabilities: list = None, # DEPRECATED: use probability_values
198
+ **kwargs,
198
199
  ):
200
+ """Assign predictions and probabilities to the dataset.
201
+
202
+ Args:
203
+ model (VMModel): The model used to generate the predictions.
204
+ prediction_column (str, optional): The name of the column containing the predictions. Defaults to None.
205
+ prediction_values (list, optional): The values of the predictions. Defaults to None.
206
+ probability_column (str, optional): The name of the column containing the probabilities. Defaults to None.
207
+ probability_values (list, optional): The values of the probabilities. Defaults to None.
208
+ prediction_probabilities (list, optional): DEPRECATED: The values of the probabilities. Defaults to None.
209
+ kwargs: Additional keyword arguments that will get passed through to the model's `predict` method.
210
+ """
199
211
  if prediction_probabilities is not None:
200
212
  warnings.warn(
201
213
  "The `prediction_probabilities` argument is deprecated. Use `probability_values` instead.",
@@ -226,7 +238,9 @@ class VMDataset:
226
238
 
227
239
  if prediction_values is None:
228
240
  X = self.df if isinstance(model, (FunctionModel, PipelineModel)) else self.x
229
- probability_values, prediction_values = compute_predictions(model, X)
241
+ probability_values, prediction_values = compute_predictions(
242
+ model, X, **kwargs
243
+ )
230
244
 
231
245
  prediction_column = prediction_column or f"{model.input_id}_prediction"
232
246
  self._add_column(prediction_column, prediction_values)
@@ -356,8 +370,8 @@ class VMDataset:
356
370
  return as_df(self.df[self.probability_column(model)])
357
371
 
358
372
  def target_classes(self):
359
- """Returns the unique number of target classes for the target (Y) variable"""
360
- return [str(i) for i in np.unique(self.y)]
373
+ """Returns the target class labels or unique values of the target column."""
374
+ return self.target_class_labels or [str(i) for i in np.unique(self.y)]
361
375
 
362
376
  def __str__(self):
363
377
  return (
@@ -94,7 +94,7 @@ def _is_probabilties(output):
94
94
  return np.all((output >= 0) & (output <= 1)) and np.any((output > 0) & (output < 1))
95
95
 
96
96
 
97
- def compute_predictions(model, X) -> tuple:
97
+ def compute_predictions(model, X, **kwargs) -> tuple:
98
98
  probability_values = None
99
99
 
100
100
  try:
@@ -108,7 +108,7 @@ def compute_predictions(model, X) -> tuple:
108
108
 
109
109
  try:
110
110
  logger.info("Running predict()... This may take a while")
111
- prediction_values = model.predict(X)
111
+ prediction_values = model.predict(X, **kwargs)
112
112
  logger.info("Done running predict()")
113
113
  except MissingOrInvalidModelPredictFnError:
114
114
  raise MissingOrInvalidModelPredictFnError(
@@ -114,7 +114,7 @@ class VMModel(ABC):
114
114
 
115
115
  self.__post_init__()
116
116
 
117
- def __post_init__(self):
117
+ def __post_init__(self): # noqa: B027
118
118
  """Allows child classes to add their own post-init logic"""
119
119
  pass
120
120
 
@@ -12,8 +12,8 @@ from typing import ClassVar, List, Optional, Union
12
12
 
13
13
  import pandas as pd
14
14
 
15
+ from ...ai.test_descriptions import get_description_metadata
15
16
  from ...errors import MissingCacheResultsArgumentsError
16
- from ...utils import get_description_metadata
17
17
  from ..figure import Figure
18
18
  from .metric_result import MetricResult
19
19
  from .result_wrapper import MetricResultWrapper
@@ -36,13 +36,6 @@ class Metric(Test):
36
36
  # Instance Variables
37
37
  result: MetricResultWrapper = None # populated by cache_results() method
38
38
 
39
- @property
40
- def key(self):
41
- """
42
- Keep the key for compatibility reasons
43
- """
44
- return self._key if hasattr(self, "_key") else self.name
45
-
46
39
  @abstractmethod
47
40
  def summary(self, metric_value: Optional[Union[dict, list, pd.DataFrame]] = None):
48
41
  """
@@ -15,10 +15,10 @@ import pandas as pd
15
15
  from ipywidgets import HTML, GridBox, Layout, VBox
16
16
 
17
17
  from ... import api_client
18
- from ...ai import DescriptionFuture
18
+ from ...ai.test_descriptions import AI_REVISION_NAME, DescriptionFuture
19
19
  from ...input_registry import input_registry
20
20
  from ...logging import get_logger
21
- from ...utils import AI_REVISION_NAME, NumpyEncoder, display, run_async, test_id_to_name
21
+ from ...utils import NumpyEncoder, display, run_async, test_id_to_name
22
22
  from ..dataset import VMDataset
23
23
  from ..figure import Figure
24
24
  from .metric_result import MetricResult
@@ -52,6 +52,9 @@ class Test(TestUtils):
52
52
  "test_id is missing. It must be passed when initializing the test"
53
53
  )
54
54
  self._ref_id = str(uuid4())
55
+ self.key = (
56
+ self.test_id
57
+ ) # for backwards compatibility - figures really should get keyed automatically
55
58
 
56
59
  # TODO: add validation for required inputs
57
60
  if self.default_params is None:
@@ -11,7 +11,7 @@ avoid confusion with the "tests" in the general data science/modeling sense.
11
11
  from dataclasses import dataclass
12
12
  from typing import ClassVar, List, Optional
13
13
 
14
- from ...utils import get_description_metadata
14
+ from ...ai.test_descriptions import get_description_metadata
15
15
  from ..figure import Figure
16
16
  from .result_summary import ResultSummary, ResultTable
17
17
  from .result_wrapper import ThresholdTestResultWrapper
@@ -83,11 +83,14 @@ class TestSuiteRunner:
83
83
  test_configs = test_configs.get("params", {})
84
84
  else:
85
85
  if (test_configs) and ("params" not in test_configs):
86
- """[DEPRECATED] Deprecated method for setting test parameters directly in the 'config' parameter"""
87
- logger.info(
88
- "Setting test parameters directly in the 'config' parameter of the run_documentation_tests() method is deprecated. "
89
- 'Instead, use the new format of the config: config = {"test_id": {"params": {...}, "inputs": {...}}}'
86
+ # [DEPRECATED] This is the old way of setting test parameters
87
+ msg = (
88
+ "Setting test parameters directly in the 'config' parameter"
89
+ " of the run_documentation_tests() method is deprecated. "
90
+ "Instead, use the new format of the config: "
91
+ 'config = {"test_id": {"params": {...}, "inputs": {...}}}'
90
92
  )
93
+ logger.warning(msg)
91
94
 
92
95
  test.load(inputs=inputs, context=self.context, config=test_configs)
93
96
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: validmind
3
- Version: 2.2.6
3
+ Version: 2.3.1
4
4
  Summary: ValidMind Developer Framework
5
5
  License: Commercial License
6
6
  Author: Andres Rodriguez