validmind 2.2.5__py3-none-any.whl → 2.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/{ai.py → ai/test_descriptions.py} +127 -69
  3. validmind/ai/utils.py +104 -0
  4. validmind/api_client.py +70 -31
  5. validmind/client.py +5 -5
  6. validmind/logging.py +38 -32
  7. validmind/models/foundation.py +10 -6
  8. validmind/models/function.py +3 -1
  9. validmind/models/metadata.py +1 -1
  10. validmind/test_suites/__init__.py +1 -7
  11. validmind/test_suites/regression.py +0 -16
  12. validmind/test_suites/statsmodels_timeseries.py +1 -1
  13. validmind/tests/data_validation/ACFandPACFPlot.py +36 -27
  14. validmind/tests/{model_validation/statsmodels → data_validation}/ADF.py +42 -13
  15. validmind/tests/data_validation/BivariateScatterPlots.py +38 -41
  16. validmind/tests/{model_validation/statsmodels → data_validation}/DFGLSArch.py +67 -11
  17. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +1 -1
  18. validmind/tests/data_validation/HighPearsonCorrelation.py +12 -3
  19. validmind/tests/data_validation/IsolationForestOutliers.py +2 -2
  20. validmind/tests/{model_validation/statsmodels → data_validation}/KPSS.py +64 -11
  21. validmind/tests/{model_validation/statsmodels → data_validation}/PhillipsPerronArch.py +65 -11
  22. validmind/tests/data_validation/ScatterPlot.py +1 -1
  23. validmind/tests/data_validation/SeasonalDecompose.py +12 -7
  24. validmind/tests/data_validation/TabularDateTimeHistograms.py +29 -33
  25. validmind/tests/data_validation/WOEBinPlots.py +1 -1
  26. validmind/tests/data_validation/WOEBinTable.py +1 -1
  27. validmind/tests/{model_validation/statsmodels → data_validation}/ZivotAndrewsArch.py +65 -11
  28. validmind/tests/data_validation/nlp/CommonWords.py +1 -1
  29. validmind/tests/data_validation/nlp/Hashtags.py +1 -1
  30. validmind/tests/data_validation/nlp/Mentions.py +1 -1
  31. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +2 -1
  32. validmind/tests/data_validation/nlp/Punctuations.py +1 -1
  33. validmind/tests/data_validation/nlp/Sentiment.py +1 -1
  34. validmind/tests/data_validation/nlp/TextDescription.py +5 -1
  35. validmind/tests/data_validation/nlp/Toxicity.py +1 -1
  36. validmind/tests/decorator.py +1 -1
  37. validmind/tests/model_validation/FeaturesAUC.py +5 -3
  38. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +4 -0
  39. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +4 -0
  40. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +4 -0
  41. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +4 -0
  42. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +4 -0
  43. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +4 -0
  44. validmind/tests/model_validation/ragas/AnswerCorrectness.py +3 -3
  45. validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
  46. validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
  47. validmind/tests/model_validation/ragas/AspectCritique.py +14 -8
  48. validmind/tests/model_validation/ragas/ContextEntityRecall.py +3 -4
  49. validmind/tests/model_validation/ragas/ContextPrecision.py +4 -5
  50. validmind/tests/model_validation/ragas/ContextRecall.py +3 -4
  51. validmind/tests/model_validation/ragas/ContextRelevancy.py +5 -4
  52. validmind/tests/model_validation/ragas/Faithfulness.py +6 -5
  53. validmind/tests/model_validation/ragas/utils.py +35 -9
  54. validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
  55. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +1 -1
  56. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +6 -8
  57. validmind/tests/model_validation/sklearn/RegressionErrors.py +1 -1
  58. validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +14 -8
  59. validmind/tests/model_validation/sklearn/RegressionR2Square.py +1 -1
  60. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +1 -1
  61. validmind/tests/model_validation/statsmodels/GINITable.py +1 -1
  62. validmind/tests/model_validation/statsmodels/JarqueBera.py +1 -1
  63. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +1 -1
  64. validmind/tests/model_validation/statsmodels/LJungBox.py +1 -1
  65. validmind/tests/model_validation/statsmodels/Lilliefors.py +1 -1
  66. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +4 -0
  67. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +9 -4
  68. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +2 -2
  69. validmind/tests/model_validation/statsmodels/RunsTest.py +1 -1
  70. validmind/tests/model_validation/statsmodels/ShapiroWilk.py +1 -1
  71. validmind/tests/prompt_validation/Bias.py +14 -11
  72. validmind/tests/prompt_validation/Clarity.py +14 -11
  73. validmind/tests/prompt_validation/Conciseness.py +14 -11
  74. validmind/tests/prompt_validation/Delimitation.py +14 -11
  75. validmind/tests/prompt_validation/NegativeInstruction.py +14 -11
  76. validmind/tests/prompt_validation/Robustness.py +11 -11
  77. validmind/tests/prompt_validation/Specificity.py +14 -11
  78. validmind/tests/prompt_validation/ai_powered_test.py +53 -75
  79. validmind/unit_metrics/composite.py +2 -1
  80. validmind/utils.py +4 -49
  81. validmind/vm_models/dataset/dataset.py +17 -3
  82. validmind/vm_models/dataset/utils.py +2 -2
  83. validmind/vm_models/model.py +1 -1
  84. validmind/vm_models/test/metric.py +1 -8
  85. validmind/vm_models/test/result_wrapper.py +27 -34
  86. validmind/vm_models/test/test.py +3 -0
  87. validmind/vm_models/test/threshold_test.py +1 -1
  88. validmind/vm_models/test_suite/runner.py +12 -6
  89. validmind/vm_models/test_suite/summary.py +18 -7
  90. validmind/vm_models/test_suite/test.py +13 -20
  91. {validmind-2.2.5.dist-info → validmind-2.3.1.dist-info}/METADATA +1 -1
  92. {validmind-2.2.5.dist-info → validmind-2.3.1.dist-info}/RECORD +95 -104
  93. validmind/tests/data_validation/DefaultRatesbyRiskBandPlot.py +0 -114
  94. validmind/tests/data_validation/PiTCreditScoresHistogram.py +0 -150
  95. validmind/tests/data_validation/PiTPDHistogram.py +0 -152
  96. validmind/tests/model_validation/statsmodels/ADFTest.py +0 -88
  97. validmind/tests/model_validation/statsmodels/FeatureImportanceAndSignificance.py +0 -198
  98. validmind/tests/model_validation/statsmodels/PDRatingClassPlot.py +0 -151
  99. validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +0 -146
  100. validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +0 -144
  101. validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +0 -127
  102. validmind/tests/model_validation/statsmodels/ResidualsVisualInspection.py +0 -130
  103. {validmind-2.2.5.dist-info → validmind-2.3.1.dist-info}/LICENSE +0 -0
  104. {validmind-2.2.5.dist-info → validmind-2.3.1.dist-info}/WHEEL +0 -0
  105. {validmind-2.2.5.dist-info → validmind-2.3.1.dist-info}/entry_points.txt +0 -0
@@ -11,7 +11,7 @@ from ragas.metrics import answer_relevancy
11
11
 
12
12
  from validmind import tags, tasks
13
13
 
14
- from .utils import get_renamed_columns
14
+ from .utils import get_ragas_config, get_renamed_columns
15
15
 
16
16
 
17
17
  @tags("ragas", "llm", "rag_performance")
@@ -108,8 +108,7 @@ def AnswerRelevance(
108
108
  df = get_renamed_columns(dataset.df, required_columns)
109
109
 
110
110
  result_df = evaluate(
111
- Dataset.from_pandas(df),
112
- metrics=[answer_relevancy],
111
+ Dataset.from_pandas(df), metrics=[answer_relevancy], **get_ragas_config()
113
112
  ).to_pandas()
114
113
 
115
114
  fig_histogram = px.histogram(x=result_df["answer_relevancy"].to_list(), nbins=10)
@@ -117,7 +116,9 @@ def AnswerRelevance(
117
116
 
118
117
  return (
119
118
  {
120
- "Scores": result_df[["question", "contexts", "answer", "answer_relevancy"]],
119
+ "Scores (will not be uploaded to UI)": result_df[
120
+ ["question", "contexts", "answer", "answer_relevancy"]
121
+ ],
121
122
  "Aggregate Scores": [
122
123
  {
123
124
  "Mean Score": result_df["answer_relevancy"].mean(),
@@ -11,7 +11,7 @@ from ragas.metrics import answer_similarity
11
11
 
12
12
  from validmind import tags, tasks
13
13
 
14
- from .utils import get_renamed_columns
14
+ from .utils import get_ragas_config, get_renamed_columns
15
15
 
16
16
 
17
17
  @tags("ragas", "llm")
@@ -93,8 +93,7 @@ def AnswerSimilarity(
93
93
  df = get_renamed_columns(dataset.df, required_columns)
94
94
 
95
95
  result_df = evaluate(
96
- Dataset.from_pandas(df),
97
- metrics=[answer_similarity],
96
+ Dataset.from_pandas(df), metrics=[answer_similarity], **get_ragas_config()
98
97
  ).to_pandas()
99
98
 
100
99
  fig_histogram = px.histogram(x=result_df["answer_similarity"].to_list(), nbins=10)
@@ -102,7 +101,9 @@ def AnswerSimilarity(
102
101
 
103
102
  return (
104
103
  {
105
- "Scores": result_df[["answer", "ground_truth", "answer_similarity"]],
104
+ "Scores (will not be uploaded to UI)": result_df[
105
+ ["answer", "ground_truth", "answer_similarity"]
106
+ ],
106
107
  "Aggregate Scores": [
107
108
  {
108
109
  "Mean Score": result_df["answer_similarity"].mean(),
@@ -18,7 +18,7 @@ from ragas.metrics.critique import (
18
18
 
19
19
  from validmind import tags, tasks
20
20
 
21
- from .utils import get_renamed_columns
21
+ from .utils import get_ragas_config, get_renamed_columns
22
22
 
23
23
  aspect_map = {
24
24
  "coherence": coherence,
@@ -36,14 +36,14 @@ def AspectCritique(
36
36
  question_column="question",
37
37
  answer_column="answer",
38
38
  contexts_column="contexts",
39
- aspects: list = [
39
+ aspects: list = [ # noqa: B006 this is fine as immutable default since it never gets modified
40
40
  "coherence",
41
41
  "conciseness",
42
42
  "correctness",
43
43
  "harmfulness",
44
44
  "maliciousness",
45
45
  ],
46
- additional_aspects: list = [],
46
+ additional_aspects: list = None,
47
47
  ):
48
48
  """
49
49
  Evaluates generations against the following aspects: harmfulness, maliciousness,
@@ -131,13 +131,19 @@ def AspectCritique(
131
131
  df = get_renamed_columns(dataset.df, required_columns)
132
132
 
133
133
  built_in_aspects = [aspect_map[aspect] for aspect in aspects]
134
- custom_aspects = [
135
- _AspectCritique(name=name, definition=description)
136
- for name, description in additional_aspects
137
- ]
134
+ custom_aspects = (
135
+ [
136
+ _AspectCritique(name=name, definition=description)
137
+ for name, description in additional_aspects
138
+ ]
139
+ if additional_aspects
140
+ else []
141
+ )
138
142
  all_aspects = [*built_in_aspects, *custom_aspects]
139
143
 
140
- result_df = evaluate(Dataset.from_pandas(df), metrics=all_aspects).to_pandas()
144
+ result_df = evaluate(
145
+ Dataset.from_pandas(df), metrics=all_aspects, **get_ragas_config()
146
+ ).to_pandas()
141
147
 
142
148
  df_melted = result_df.melt(
143
149
  id_vars=["question", "answer", "contexts"],
@@ -11,7 +11,7 @@ from ragas.metrics import context_entity_recall
11
11
 
12
12
  from validmind import tags, tasks
13
13
 
14
- from .utils import get_renamed_columns
14
+ from .utils import get_ragas_config, get_renamed_columns
15
15
 
16
16
 
17
17
  @tags("ragas", "llm", "retrieval_performance")
@@ -99,8 +99,7 @@ def ContextEntityRecall(
99
99
  df = get_renamed_columns(dataset.df, required_columns)
100
100
 
101
101
  result_df = evaluate(
102
- Dataset.from_pandas(df),
103
- metrics=[context_entity_recall],
102
+ Dataset.from_pandas(df), metrics=[context_entity_recall], **get_ragas_config()
104
103
  ).to_pandas()
105
104
 
106
105
  fig_histogram = px.histogram(
@@ -110,7 +109,7 @@ def ContextEntityRecall(
110
109
 
111
110
  return (
112
111
  {
113
- "Scores": result_df[
112
+ "Scores (will not be uploaded to UI)": result_df[
114
113
  [
115
114
  "contexts",
116
115
  "ground_truth",
@@ -11,7 +11,7 @@ from ragas.metrics import context_precision
11
11
 
12
12
  from validmind import tags, tasks
13
13
 
14
- from .utils import get_renamed_columns
14
+ from .utils import get_ragas_config, get_renamed_columns
15
15
 
16
16
 
17
17
  @tags("ragas", "llm", "retrieval_performance")
@@ -21,7 +21,7 @@ def ContextPrecision(
21
21
  question_column: str = "question",
22
22
  contexts_column: str = "contexts",
23
23
  ground_truth_column: str = "ground_truth",
24
- ):
24
+ ): # noqa: B950
25
25
  """
26
26
  Context Precision is a metric that evaluates whether all of the ground-truth
27
27
  relevant items present in the contexts are ranked higher or not. Ideally all the
@@ -95,8 +95,7 @@ def ContextPrecision(
95
95
  df = get_renamed_columns(dataset.df, required_columns)
96
96
 
97
97
  result_df = evaluate(
98
- Dataset.from_pandas(df),
99
- metrics=[context_precision],
98
+ Dataset.from_pandas(df), metrics=[context_precision], **get_ragas_config()
100
99
  ).to_pandas()
101
100
 
102
101
  fig_histogram = px.histogram(x=result_df["context_precision"].to_list(), nbins=10)
@@ -104,7 +103,7 @@ def ContextPrecision(
104
103
 
105
104
  return (
106
105
  {
107
- "Scores": result_df[
106
+ "Scores (will not be uploaded to UI)": result_df[
108
107
  ["question", "contexts", "ground_truth", "context_precision"]
109
108
  ],
110
109
  "Aggregate Scores": [
@@ -11,7 +11,7 @@ from ragas.metrics import context_recall
11
11
 
12
12
  from validmind import tags, tasks
13
13
 
14
- from .utils import get_renamed_columns
14
+ from .utils import get_ragas_config, get_renamed_columns
15
15
 
16
16
 
17
17
  @tags("ragas", "llm", "retrieval_performance")
@@ -95,8 +95,7 @@ def ContextRecall(
95
95
  df = get_renamed_columns(dataset.df, required_columns)
96
96
 
97
97
  result_df = evaluate(
98
- Dataset.from_pandas(df),
99
- metrics=[context_recall],
98
+ Dataset.from_pandas(df), metrics=[context_recall], **get_ragas_config()
100
99
  ).to_pandas()
101
100
 
102
101
  fig_histogram = px.histogram(x=result_df["context_recall"].to_list(), nbins=10)
@@ -104,7 +103,7 @@ def ContextRecall(
104
103
 
105
104
  return (
106
105
  {
107
- "Scores": result_df[
106
+ "Scores (will not be uploaded to UI)": result_df[
108
107
  ["question", "contexts", "ground_truth", "context_recall"]
109
108
  ],
110
109
  "Aggregate Scores": [
@@ -11,7 +11,7 @@ from ragas.metrics import context_relevancy
11
11
 
12
12
  from validmind import tags, tasks
13
13
 
14
- from .utils import get_renamed_columns
14
+ from .utils import get_ragas_config, get_renamed_columns
15
15
 
16
16
 
17
17
  @tags("ragas", "llm", "retrieval_performance")
@@ -88,8 +88,7 @@ def ContextRelevancy(
88
88
  df = get_renamed_columns(dataset.df, required_columns)
89
89
 
90
90
  result_df = evaluate(
91
- Dataset.from_pandas(df),
92
- metrics=[context_relevancy],
91
+ Dataset.from_pandas(df), metrics=[context_relevancy], **get_ragas_config()
93
92
  ).to_pandas()
94
93
 
95
94
  fig_histogram = px.histogram(x=result_df["context_relevancy"].to_list(), nbins=10)
@@ -97,7 +96,9 @@ def ContextRelevancy(
97
96
 
98
97
  return (
99
98
  {
100
- "Scores": result_df[["question", "contexts", "context_relevancy"]],
99
+ "Scores (will not be uploaded to UI)": result_df[
100
+ ["question", "contexts", "context_relevancy"]
101
+ ],
101
102
  "Aggregate Scores": [
102
103
  {
103
104
  "Mean Score": result_df["context_relevancy"].mean(),
@@ -11,7 +11,7 @@ from ragas.metrics import faithfulness
11
11
 
12
12
  from validmind import tags, tasks
13
13
 
14
- from .utils import get_renamed_columns
14
+ from .utils import get_ragas_config, get_renamed_columns
15
15
 
16
16
 
17
17
  @tags("ragas", "llm", "rag_performance")
@@ -20,7 +20,7 @@ def Faithfulness(
20
20
  dataset,
21
21
  answer_column="answer",
22
22
  contexts_column="contexts",
23
- ):
23
+ ): # noqa
24
24
  """
25
25
  Evaluates the faithfulness of the generated answers with respect to retrieved contexts.
26
26
 
@@ -93,8 +93,7 @@ def Faithfulness(
93
93
  df = get_renamed_columns(dataset.df, required_columns)
94
94
 
95
95
  result_df = evaluate(
96
- Dataset.from_pandas(df),
97
- metrics=[faithfulness],
96
+ Dataset.from_pandas(df), metrics=[faithfulness], **get_ragas_config()
98
97
  ).to_pandas()
99
98
 
100
99
  fig_histogram = px.histogram(x=result_df["faithfulness"].to_list(), nbins=10)
@@ -102,7 +101,9 @@ def Faithfulness(
102
101
 
103
102
  return (
104
103
  {
105
- "Scores": result_df[["contexts", "answer", "faithfulness"]],
104
+ "Scores (will not be uploaded to UI)": result_df[
105
+ ["contexts", "answer", "faithfulness"]
106
+ ],
106
107
  "Aggregate Scores": [
107
108
  {
108
109
  "Mean Score": result_df["faithfulness"].mean(),
@@ -2,17 +2,42 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ import os
5
6
 
6
- def _udf_get_sub_col(x, root_col, sub_col):
7
- if not isinstance(x, dict):
8
- raise TypeError(f"Expected a dictionary in column '{root_col}', got {type(x)}.")
7
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
9
8
 
10
- if sub_col not in x:
11
- raise KeyError(
12
- f"Sub-column '{sub_col}' not found in dictionary in column '{root_col}'."
13
- )
9
+ from validmind.ai.utils import get_client_and_model
14
10
 
15
- return x[sub_col]
11
+ EMBEDDINGS_MODEL = "text-embedding-3-small"
12
+
13
+
14
+ def get_ragas_config():
15
+ client, model = get_client_and_model()
16
+ os.environ["OPENAI_API_BASE"] = str(client.base_url)
17
+
18
+ return {
19
+ "llm": ChatOpenAI(api_key=client.api_key, model=model),
20
+ "embeddings": OpenAIEmbeddings(api_key=client.api_key, model=EMBEDDINGS_MODEL),
21
+ }
22
+
23
+
24
+ def make_sub_col_udf(root_col, sub_col):
25
+ """Create a udf that extracts sub-column values from a dictionary."""
26
+
27
+ def _udf_get_sub_col(x):
28
+ if not isinstance(x, dict):
29
+ raise TypeError(
30
+ f"Expected a dictionary in column '{root_col}', got {type(x)}."
31
+ )
32
+
33
+ if sub_col not in x:
34
+ raise KeyError(
35
+ f"Sub-column '{sub_col}' not found in dictionary in column '{root_col}'."
36
+ )
37
+
38
+ return x[sub_col]
39
+
40
+ return _udf_get_sub_col
16
41
 
17
42
 
18
43
  def get_renamed_columns(df, column_map):
@@ -34,6 +59,7 @@ def get_renamed_columns(df, column_map):
34
59
  Returns:
35
60
  pd.DataFrame: The DataFrame with columns renamed.
36
61
  """
62
+
37
63
  new_df = df.copy()
38
64
 
39
65
  for new_name, source in column_map.items():
@@ -50,7 +76,7 @@ def get_renamed_columns(df, column_map):
50
76
 
51
77
  if root_col in new_df.columns:
52
78
  new_df[new_name] = new_df[root_col].apply(
53
- lambda x: _udf_get_sub_col(x, root_col, sub_col)
79
+ make_sub_col_udf(root_col, sub_col)
54
80
  )
55
81
 
56
82
  else:
@@ -66,7 +66,7 @@ class ClusterPerformance(Metric):
66
66
  y_true_test = y_true_test.astype(y_pred_test.dtype).flatten()
67
67
  results = []
68
68
  for metric_name, metric_fcn in metric_info.items():
69
- for sample in samples:
69
+ for _ in samples:
70
70
  train_value = metric_fcn(list(y_true_train), y_pred_train)
71
71
  test_value = metric_fcn(list(y_true_test), y_pred_test)
72
72
  results.append(
@@ -85,7 +85,7 @@ class ClusterPerformance(Metric):
85
85
  """
86
86
  table_records = []
87
87
  for result in raw_results:
88
- for key, value in result.items():
88
+ for key, _ in result.items():
89
89
  table_records.append(
90
90
  {
91
91
  "Metric": key,
@@ -123,7 +123,7 @@ class ClusterPerformanceMetrics(ClusterPerformance):
123
123
  """
124
124
  table_records = []
125
125
  for result in raw_results:
126
- for key, value in result.items():
126
+ for key, _ in result.items():
127
127
  table_records.append(
128
128
  {
129
129
  "Metric": key,
@@ -52,7 +52,7 @@ class ModelsPerformanceComparison(ClassifierPerformance):
52
52
  """
53
53
 
54
54
  name = "models_performance_comparison"
55
- required_inputs = ["model", "models", "dataset"]
55
+ required_inputs = ["dataset", "models"]
56
56
  metadata = {
57
57
  "task_types": ["classification", "text_classification"],
58
58
  "tags": [
@@ -70,12 +70,12 @@ class ModelsPerformanceComparison(ClassifierPerformance):
70
70
  """
71
71
  results = []
72
72
  prf_table = []
73
- classes = {str(i) for i in unique(self.y_true())}
73
+ classes = {str(i) for i in unique(self.inputs.dataset.y)}
74
74
 
75
75
  for class_name in classes:
76
76
  prf_dict = {}
77
77
  prf_dict["Class"] = class_name
78
- for m, m_v in metric_value.items():
78
+ for m, _ in metric_value.items():
79
79
  prf_dict[f"Precision- {m}"] = metric_value[m][class_name]["precision"]
80
80
  prf_dict[f"Recall- {m}"] = metric_value[m][class_name]["recall"]
81
81
  prf_dict[f"F1- {m}"] = metric_value[m][class_name]["f1-score"]
@@ -85,7 +85,7 @@ class ModelsPerformanceComparison(ClassifierPerformance):
85
85
  for class_name in avg_metrics:
86
86
  avg_dict = {}
87
87
  avg_dict["Class"] = class_name
88
- for m, m_v in metric_value.items():
88
+ for m, _ in metric_value.items():
89
89
  avg_dict[f"Precision- {m}"] = metric_value[m][class_name]["precision"]
90
90
  avg_dict[f"Recall- {m}"] = metric_value[m][class_name]["recall"]
91
91
  avg_dict[f"F1- {m}"] = metric_value[m][class_name]["f1-score"]
@@ -103,7 +103,7 @@ class ModelsPerformanceComparison(ClassifierPerformance):
103
103
  for metric_name in ["accuracy", "roc_auc"]:
104
104
  acc_roc_auc_dict = {}
105
105
  acc_roc_auc_dict["Metric"] = metric_name
106
- for m, m_v in metric_value.items():
106
+ for m, _ in metric_value.items():
107
107
  acc_roc_auc_dict[f"accuracy- {m}"] = metric_value[m]["accuracy"]
108
108
  acc_roc_auc_dict[f"roc_auc- {m}"] = metric_value[m]["roc_auc"]
109
109
  acc_roc_auc_table.append(acc_roc_auc_dict)
@@ -122,10 +122,8 @@ class ModelsPerformanceComparison(ClassifierPerformance):
122
122
  "List of models must be provided as a `models` parameter to compare performance"
123
123
  )
124
124
 
125
- all_models = [self.inputs.model]
125
+ all_models = self.inputs.models
126
126
 
127
- if self.inputs.models is not None:
128
- all_models.extend(self.inputs.models)
129
127
  results = {}
130
128
  for idx, model in enumerate(all_models):
131
129
  y_true = self.inputs.dataset.y
@@ -57,7 +57,7 @@ class RegressionErrors(Metric):
57
57
  """
58
58
  table_records = []
59
59
  for result in raw_results:
60
- for key, value in result.items():
60
+ for key, _ in result.items():
61
61
  table_records.append(
62
62
  {
63
63
  "Metric": key,
@@ -9,8 +9,11 @@ import numpy as np
9
9
  from sklearn.metrics import mean_absolute_error, mean_squared_error
10
10
 
11
11
  from validmind.errors import SkipTestError
12
+ from validmind.logging import get_logger
12
13
  from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
13
14
 
15
+ logger = get_logger(__name__)
16
+
14
17
 
15
18
  @dataclass
16
19
  class RegressionModelsPerformanceComparison(Metric):
@@ -56,7 +59,7 @@ class RegressionModelsPerformanceComparison(Metric):
56
59
  """
57
60
 
58
61
  name = "models_performance_comparison"
59
- required_inputs = ["model", "dataset"]
62
+ required_inputs = ["dataset", "models"]
60
63
 
61
64
  metadata = {
62
65
  "task_types": ["regression"],
@@ -76,8 +79,14 @@ class RegressionModelsPerformanceComparison(Metric):
76
79
  results["Mean Squared Error (MSE)"] = mse_test
77
80
  results["Root Mean Squared Error (RMSE)"] = np.sqrt(mse_test)
78
81
 
79
- mape_test = np.mean(np.abs((y_true_test - y_pred_test) / y_true_test)) * 100
80
- results["Mean Absolute Percentage Error (MAPE)"] = mape_test
82
+ if np.any(y_true_test == 0):
83
+ logger.warning(
84
+ "y_true_test contains zero values. Skipping MAPE calculation to avoid division by zero."
85
+ )
86
+ results["Mean Absolute Percentage Error (MAPE)"] = None
87
+ else:
88
+ mape_test = np.mean(np.abs((y_true_test - y_pred_test) / y_true_test)) * 100
89
+ results["Mean Absolute Percentage Error (MAPE)"] = mape_test
81
90
 
82
91
  mbd_test = np.mean(y_pred_test - y_true_test)
83
92
  results["Mean Bias Deviation (MBD)"] = mbd_test
@@ -94,7 +103,7 @@ class RegressionModelsPerformanceComparison(Metric):
94
103
  for metric_name in metrics:
95
104
  errors_dict = {}
96
105
  errors_dict["Errors"] = metric_name
97
- for m, m_v in metric_value.items():
106
+ for m, _ in metric_value.items():
98
107
  for metric in metrics:
99
108
  res = re.findall(r"\(.*?\)", metric)
100
109
  res[0][1:-1]
@@ -117,10 +126,7 @@ class RegressionModelsPerformanceComparison(Metric):
117
126
  "List of models must be provided as a `models` parameter to compare performance"
118
127
  )
119
128
 
120
- all_models = [self.inputs.model]
121
-
122
- if self.inputs.models is not None:
123
- all_models.extend(self.inputs.models)
129
+ all_models = self.inputs.models
124
130
 
125
131
  results = {}
126
132
 
@@ -57,7 +57,7 @@ class RegressionR2Square(Metric):
57
57
  """
58
58
  table_records = []
59
59
  for result in raw_results:
60
- for key, value in result.items():
60
+ for key, _ in result.items():
61
61
  table_records.append(
62
62
  {
63
63
  "Metric": key,
@@ -53,7 +53,7 @@ class DurbinWatsonTest(Metric):
53
53
  """
54
54
  Calculates DB for each of the dataset features
55
55
  """
56
- x_train = self.train_ds.df
56
+ x_train = self.inputs.dataset.df
57
57
  dw_values = {}
58
58
  for col in x_train.columns:
59
59
  dw_values[col] = durbin_watson(x_train[col].values)
@@ -80,7 +80,7 @@ class GINITable(Metric):
80
80
  metrics_dict = {"Dataset": [], "AUC": [], "GINI": [], "KS": []}
81
81
 
82
82
  # Iterate over each dataset in the inputs
83
- for i, dataset in enumerate(self.inputs.datasets):
83
+ for _, dataset in enumerate(self.inputs.datasets):
84
84
  dataset_label = (
85
85
  dataset.input_id
86
86
  ) # Use input_id as the label for each dataset
@@ -59,7 +59,7 @@ class JarqueBera(Metric):
59
59
  """
60
60
  Calculates JB for each of the dataset features
61
61
  """
62
- x_train = self.inputs.dataset.df
62
+ x_train = self.inputs.dataset.df[self.inputs.dataset.feature_columns_numeric]
63
63
 
64
64
  jb_values = {}
65
65
  for col in x_train.columns:
@@ -87,7 +87,7 @@ class KolmogorovSmirnov(Metric):
87
87
  if data_distribution not in ["norm" or "exp"]:
88
88
  InvalidTestParametersError("Dist parameter must be either 'norm' or 'exp'")
89
89
 
90
- x_train = self.inputs.dataset.df
90
+ x_train = self.inputs.dataset.df[self.inputs.dataset.feature_columns_numeric]
91
91
  ks_values = {}
92
92
  for col in x_train.columns:
93
93
  ks_stat, p_value = kstest_normal(x_train[col].values, data_distribution)
@@ -54,7 +54,7 @@ class LJungBox(Metric):
54
54
  """
55
55
  Calculates Ljung-Box test for each of the dataset features
56
56
  """
57
- x_train = self.train_ds.df
57
+ x_train = self.inputs.dataset.df
58
58
 
59
59
  ljung_box_values = {}
60
60
  for col in x_train.columns:
@@ -70,7 +70,7 @@ class Lilliefors(Metric):
70
70
  """
71
71
  Calculates Lilliefors test for each of the dataset features
72
72
  """
73
- x_train = self.train_ds.df
73
+ x_train = self.inputs.dataset.df[self.inputs.dataset.feature_columns_numeric]
74
74
 
75
75
  lilliefors_values = {}
76
76
  for col in x_train.columns:
@@ -8,6 +8,7 @@ import pandas as pd
8
8
  import plotly.graph_objects as go
9
9
  from scipy import stats
10
10
 
11
+ from validmind.errors import SkipTestError
11
12
  from validmind.vm_models import Figure, Metric
12
13
 
13
14
 
@@ -115,6 +116,9 @@ class RegressionCoeffsPlot(Metric):
115
116
  all_models.extend(self.inputs.models)
116
117
 
117
118
  for i, model in enumerate(all_models):
119
+ if model.library != "statsmodels":
120
+ raise SkipTestError("Only statsmodels are supported for this metric")
121
+
118
122
  model_name = f"Model {i+1}"
119
123
 
120
124
  fig, metric_values = self.plot_coefficients_with_ci(model, model_name)
@@ -7,6 +7,7 @@ from dataclasses import dataclass
7
7
  import matplotlib.pyplot as plt
8
8
  import seaborn as sns
9
9
 
10
+ from validmind.errors import SkipTestError
10
11
  from validmind.logging import get_logger
11
12
  from validmind.vm_models import Figure, Metric
12
13
 
@@ -82,10 +83,14 @@ class RegressionFeatureSignificance(Metric):
82
83
  # Initialize a list to store figures
83
84
  figures = []
84
85
 
85
- for i, fitted_model in enumerate(model_list):
86
+ for i, model in enumerate(model_list):
87
+
88
+ if model.library != "statsmodels":
89
+ raise SkipTestError("Only statsmodels are supported for this metric")
90
+
86
91
  # Get the coefficients and p-values from the model
87
- coefficients = fitted_model.model.params
88
- pvalues = fitted_model.model.pvalues
92
+ coefficients = model.model.params
93
+ pvalues = model.model.pvalues
89
94
 
90
95
  # Sort the variables by p-value in ascending order
91
96
  sorted_idx = pvalues.argsort()
@@ -122,7 +127,7 @@ class RegressionFeatureSignificance(Metric):
122
127
  for_object=self,
123
128
  key=f"{self.key}:{i}",
124
129
  figure=fig,
125
- metadata={"model": str(fitted_model.model)},
130
+ metadata={"model": str(model.model)},
126
131
  )
127
132
  )
128
133
  plt.close("all")
@@ -73,9 +73,9 @@ class RegressionModelsCoeffs(Metric):
73
73
  raise ValueError("List of models must be provided in the models parameter")
74
74
 
75
75
  for model in self.inputs.models:
76
- if model.class_ != "statsmodels" and model.class_ != "R":
76
+ if model.library != "statsmodels":
77
77
  raise SkipTestError(
78
- "Only statsmodels and R models are supported for this metric"
78
+ "Only statsmodels models are supported for this metric"
79
79
  )
80
80
 
81
81
  coefficients = [m.regression_coefficients() for m in self.inputs.models]
@@ -59,7 +59,7 @@ class RunsTest(Metric):
59
59
  """
60
60
  Calculates the run test for each of the dataset features
61
61
  """
62
- x_train = self.inputs.dataset.df
62
+ x_train = self.inputs.dataset.df[self.inputs.dataset.feature_columns_numeric]
63
63
 
64
64
  runs_test_values = {}
65
65
  for col in x_train.columns:
@@ -53,7 +53,7 @@ class ShapiroWilk(Metric):
53
53
  """
54
54
  Calculates Shapiro-Wilk test for each of the dataset features.
55
55
  """
56
- x_train = self.inputs.dataset.df
56
+ x_train = self.inputs.dataset.df[self.inputs.dataset.feature_columns_numeric]
57
57
  sw_values = {}
58
58
  for col in x_train.columns:
59
59
  sw_stat, sw_pvalue = stats.shapiro(x_train[col].values)