validmind 2.4.4__py3-none-any.whl → 2.4.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/tests/data_validation/IQROutliersBarPlot.py +16 -9
- validmind/tests/data_validation/IQROutliersTable.py +13 -6
- validmind/tests/run.py +37 -13
- {validmind-2.4.4.dist-info → validmind-2.4.6.dist-info}/METADATA +2 -2
- {validmind-2.4.4.dist-info → validmind-2.4.6.dist-info}/RECORD +9 -9
- {validmind-2.4.4.dist-info → validmind-2.4.6.dist-info}/LICENSE +0 -0
- {validmind-2.4.4.dist-info → validmind-2.4.6.dist-info}/WHEEL +0 -0
- {validmind-2.4.4.dist-info → validmind-2.4.6.dist-info}/entry_points.txt +0 -0
validmind/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "2.4.
|
1
|
+
__version__ = "2.4.6"
|
@@ -4,7 +4,6 @@
|
|
4
4
|
|
5
5
|
from dataclasses import dataclass
|
6
6
|
|
7
|
-
import numpy as np
|
8
7
|
import plotly.graph_objects as go
|
9
8
|
|
10
9
|
from validmind.vm_models import Figure, Metric
|
@@ -62,22 +61,27 @@ class IQROutliersBarPlot(Metric):
|
|
62
61
|
|
63
62
|
name = "iqr_outliers_bar_plot"
|
64
63
|
required_inputs = ["dataset"]
|
65
|
-
default_params = {"threshold": 1.5, "
|
64
|
+
default_params = {"threshold": 1.5, "fig_width": 800}
|
66
65
|
tasks = ["classification", "regression"]
|
67
66
|
tags = ["tabular_data", "visualization", "numerical_data"]
|
68
67
|
|
69
68
|
def run(self):
|
70
69
|
df = self.inputs.dataset.df
|
71
|
-
|
70
|
+
|
71
|
+
# Select numerical features
|
72
|
+
features = self.inputs.dataset.feature_columns_numeric
|
73
|
+
|
74
|
+
# Select non-binary features
|
75
|
+
features = [
|
76
|
+
feature
|
77
|
+
for feature in features
|
78
|
+
if len(self.inputs.dataset.df[feature].unique()) > 2
|
79
|
+
]
|
80
|
+
|
72
81
|
threshold = self.params["threshold"]
|
73
82
|
fig_width = self.params["fig_width"]
|
74
83
|
|
75
|
-
|
76
|
-
# Otherwise, only use the columns provided in num_features.
|
77
|
-
if num_features is None:
|
78
|
-
df = df.select_dtypes(include=[np.number])
|
79
|
-
else:
|
80
|
-
df = df[num_features]
|
84
|
+
df = df[features]
|
81
85
|
|
82
86
|
return self.detect_and_visualize_outliers(df, threshold, fig_width)
|
83
87
|
|
@@ -98,6 +102,9 @@ class IQROutliersBarPlot(Metric):
|
|
98
102
|
# Compute outliers
|
99
103
|
outliers = self.compute_outliers(df[col], threshold)
|
100
104
|
|
105
|
+
if outliers.empty:
|
106
|
+
continue # Skip plotting if there are no outliers
|
107
|
+
|
101
108
|
Q1_count = outliers[
|
102
109
|
(outliers >= 0) & (outliers < outliers.quantile(0.25))
|
103
110
|
].count()
|
@@ -4,7 +4,6 @@
|
|
4
4
|
|
5
5
|
from dataclasses import dataclass
|
6
6
|
|
7
|
-
import numpy as np
|
8
7
|
import pandas as pd
|
9
8
|
|
10
9
|
from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
|
@@ -53,12 +52,22 @@ class IQROutliersTable(Metric):
|
|
53
52
|
|
54
53
|
name = "iqr_outliers_table"
|
55
54
|
required_inputs = ["dataset"]
|
56
|
-
default_params = {"
|
55
|
+
default_params = {"threshold": 1.5}
|
57
56
|
tasks = ["classification", "regression"]
|
58
57
|
tags = ["tabular_data", "numerical_data"]
|
59
58
|
|
60
59
|
def run(self):
|
61
|
-
|
60
|
+
|
61
|
+
# Select numerical features
|
62
|
+
features = self.inputs.dataset.feature_columns_numeric
|
63
|
+
|
64
|
+
# Select non-binary features
|
65
|
+
features = [
|
66
|
+
feature
|
67
|
+
for feature in features
|
68
|
+
if len(self.inputs.dataset.df[feature].unique()) > 2
|
69
|
+
]
|
70
|
+
|
62
71
|
threshold = self.params["threshold"]
|
63
72
|
|
64
73
|
df = self.inputs.dataset.df
|
@@ -80,9 +89,7 @@ class IQROutliersTable(Metric):
|
|
80
89
|
upper_bound = Q3 + threshold * IQR
|
81
90
|
return series[(series < lower_bound) | (series > upper_bound)]
|
82
91
|
|
83
|
-
def detect_and_analyze_outliers(self, df, features
|
84
|
-
if features is None:
|
85
|
-
features = df.select_dtypes(include=[np.number]).columns.tolist()
|
92
|
+
def detect_and_analyze_outliers(self, df, features, threshold=1.5):
|
86
93
|
|
87
94
|
outliers_summary = []
|
88
95
|
for feature in features:
|
validmind/tests/run.py
CHANGED
@@ -89,22 +89,24 @@ def _update_plotly_titles(figures, input_groups, title_template):
|
|
89
89
|
for i, figure in enumerate(figures):
|
90
90
|
figure.figure.layout.title.text = title_template.format(
|
91
91
|
current_title=f"{current_title} " if current_title else "",
|
92
|
-
input_description="
|
93
|
-
f"{k}
|
92
|
+
input_description=" and ".join(
|
93
|
+
f"{k}: {v if isinstance(v, str) else ', '.join(item.input_id for item in v) if isinstance(v, list) and all(hasattr(item, 'input_id') for item in v) else v.input_id}"
|
94
94
|
for k, v in input_groups[i].items()
|
95
95
|
),
|
96
96
|
)
|
97
97
|
|
98
98
|
|
99
99
|
def _update_matplotlib_titles(figures, input_groups, title_template):
|
100
|
-
current_title =
|
100
|
+
current_title = (
|
101
|
+
figures[0].figure._suptitle.get_text() if figures[0].figure._suptitle else ""
|
102
|
+
)
|
101
103
|
|
102
104
|
for i, figure in enumerate(figures):
|
103
105
|
figure.figure.suptitle(
|
104
106
|
title_template.format(
|
105
107
|
current_title=f"{current_title} " if current_title else "",
|
106
108
|
input_description=" and ".join(
|
107
|
-
f"{k}: {v if isinstance(v, str) else v.input_id}"
|
109
|
+
f"{k}: {v if isinstance(v, str) else ', '.join(item.input_id for item in v) if isinstance(v, list) and all(hasattr(item, 'input_id') for item in v) else v.input_id}"
|
108
110
|
for k, v in input_groups[i].items()
|
109
111
|
),
|
110
112
|
)
|
@@ -139,10 +141,20 @@ def metric_comparison(
|
|
139
141
|
"""Build a comparison result for multiple metric results"""
|
140
142
|
ref_id = str(uuid4())
|
141
143
|
|
142
|
-
input_group_strings = [
|
143
|
-
|
144
|
-
|
145
|
-
|
144
|
+
input_group_strings = []
|
145
|
+
|
146
|
+
for group in input_groups:
|
147
|
+
new_group = {}
|
148
|
+
for k, v in group.items():
|
149
|
+
if isinstance(v, str):
|
150
|
+
new_group[k] = v
|
151
|
+
elif hasattr(v, "input_id"):
|
152
|
+
new_group[k] = v.input_id
|
153
|
+
elif isinstance(v, list) and all(hasattr(item, "input_id") for item in v):
|
154
|
+
new_group[k] = ", ".join([item.input_id for item in v])
|
155
|
+
else:
|
156
|
+
raise ValueError(f"Unsupported type for value: {v}")
|
157
|
+
input_group_strings.append(new_group)
|
146
158
|
|
147
159
|
merged_summary = _combine_summaries(
|
148
160
|
[
|
@@ -173,9 +185,11 @@ def metric_comparison(
|
|
173
185
|
),
|
174
186
|
],
|
175
187
|
inputs=[
|
176
|
-
|
188
|
+
item.input_id if hasattr(item, "input_id") else item
|
177
189
|
for group in input_groups
|
178
190
|
for input in group.values()
|
191
|
+
for item in (input if isinstance(input, list) else [input])
|
192
|
+
if hasattr(item, "input_id") or isinstance(item, str)
|
179
193
|
],
|
180
194
|
output_template=output_template,
|
181
195
|
metric=MetricResult(
|
@@ -198,10 +212,20 @@ def threshold_test_comparison(
|
|
198
212
|
"""Build a comparison result for multiple threshold test results"""
|
199
213
|
ref_id = str(uuid4())
|
200
214
|
|
201
|
-
input_group_strings = [
|
202
|
-
|
203
|
-
|
204
|
-
|
215
|
+
input_group_strings = []
|
216
|
+
|
217
|
+
for group in input_groups:
|
218
|
+
new_group = {}
|
219
|
+
for k, v in group.items():
|
220
|
+
if isinstance(v, str):
|
221
|
+
new_group[k] = v
|
222
|
+
elif hasattr(v, "input_id"):
|
223
|
+
new_group[k] = v.input_id
|
224
|
+
elif isinstance(v, list) and all(hasattr(item, "input_id") for item in v):
|
225
|
+
new_group[k] = ", ".join([item.input_id for item in v])
|
226
|
+
else:
|
227
|
+
raise ValueError(f"Unsupported type for value: {v}")
|
228
|
+
input_group_strings.append(new_group)
|
205
229
|
|
206
230
|
merged_summary = _combine_summaries(
|
207
231
|
[
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: validmind
|
3
|
-
Version: 2.4.
|
3
|
+
Version: 2.4.6
|
4
4
|
Summary: ValidMind Developer Framework
|
5
5
|
License: Commercial License
|
6
6
|
Author: Andres Rodriguez
|
@@ -32,7 +32,7 @@ Requires-Dist: mistune (>=3.0.2,<4.0.0)
|
|
32
32
|
Requires-Dist: nltk (>=3.8.1,<4.0.0)
|
33
33
|
Requires-Dist: numba (<0.59.0)
|
34
34
|
Requires-Dist: numpy
|
35
|
-
Requires-Dist: openai (>=1) ; extra == "all"
|
35
|
+
Requires-Dist: openai (>=1) ; extra == "all"
|
36
36
|
Requires-Dist: pandas (>=1.1,<2)
|
37
37
|
Requires-Dist: plotly
|
38
38
|
Requires-Dist: plotly-express
|
@@ -1,5 +1,5 @@
|
|
1
1
|
validmind/__init__.py,sha256=UfmzPwUCdUWbWq3zPqqmq4jw0_kfl3hX4U72p_seE4I,3700
|
2
|
-
validmind/__version__.py,sha256=
|
2
|
+
validmind/__version__.py,sha256=jbv3GfuSUTWNtlhuY3Dgm_vD4Uuj9i1AVwV8tthwnik,22
|
3
3
|
validmind/ai/test_descriptions.py,sha256=Q1Ftus4x5eiVLKWJu7hqPLukBQZzhy-dARqq_6_JWtk,9464
|
4
4
|
validmind/ai/utils.py,sha256=TEXII_S5CpkpczzSyHwTlqLcPMLnPBJWEBR6QFMKh1U,3421
|
5
5
|
validmind/api_client.py,sha256=xr9VNqCmA_WFf8rVm-0M0pmzVyLAPFOnfEe4dAog1LA,17144
|
@@ -110,8 +110,8 @@ validmind/tests/data_validation/FeatureTargetCorrelationPlot.py,sha256=zSrHZ1kuZ
|
|
110
110
|
validmind/tests/data_validation/HeatmapFeatureCorrelations.py,sha256=yGA7douxs7jga6RaS8wIBs9JEee_67w0uN8mQP-7_to,5671
|
111
111
|
validmind/tests/data_validation/HighCardinality.py,sha256=HE3lH5cL2Gc9HNR3F0kx4Jhk4WJiXtWfdoCG1mlWS1A,5071
|
112
112
|
validmind/tests/data_validation/HighPearsonCorrelation.py,sha256=n4zPGj-ydV1pZIyDidFybC3kItDJrxZgOPHessJcwU0,5702
|
113
|
-
validmind/tests/data_validation/IQROutliersBarPlot.py,sha256=
|
114
|
-
validmind/tests/data_validation/IQROutliersTable.py,sha256=
|
113
|
+
validmind/tests/data_validation/IQROutliersBarPlot.py,sha256=BAFGtEA5ySKMuxQcjDZl1uV1bIZek5J10jF5fTVYqhw,6370
|
114
|
+
validmind/tests/data_validation/IQROutliersTable.py,sha256=URpkGCP_OWkumDMiRbrKGs5ePcmAAQtTEna2OjvDbUA,5947
|
115
115
|
validmind/tests/data_validation/IsolationForestOutliers.py,sha256=fYTM4H8JWgpJH-G8Sxxj5cKmJ8_EVxKn4lm64frGp4A,4841
|
116
116
|
validmind/tests/data_validation/KPSS.py,sha256=Ob9UVSlwbxGND0B0qJKmaBskJmGCKxpctpnqosLFK-8,5114
|
117
117
|
validmind/tests/data_validation/LaggedCorrelationHeatmap.py,sha256=iAXQU4KJKX1Rum3vJiJmIQsOZ2a-bFqSy4xfTKlfFBs,6008
|
@@ -266,7 +266,7 @@ validmind/tests/prompt_validation/Robustness.py,sha256=fBdkYnO9yoBazz4wD-l62tT8D
|
|
266
266
|
validmind/tests/prompt_validation/Specificity.py,sha256=h3gKRTTi2rfnGWmGC1YnSt2s_VbZU4KX0iY7LciZ3PU,6068
|
267
267
|
validmind/tests/prompt_validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
268
268
|
validmind/tests/prompt_validation/ai_powered_test.py,sha256=7TTeIR5GotQosm7oVT8Y3KnwPB3XkVT1Fzhckpr-SgE,1963
|
269
|
-
validmind/tests/run.py,sha256=
|
269
|
+
validmind/tests/run.py,sha256=24E5pRg6p0dUHoK6shB9KeKHWTOEJa5HymT6tD8Ozl4,15574
|
270
270
|
validmind/tests/test_providers.py,sha256=47xe5eb5ufvj1jmhdRsbSvDQTXSDpFDFNeXg3xtXwhw,5320
|
271
271
|
validmind/tests/utils.py,sha256=kNrxfUYbj4DwmkZtpp_1rG4GMUGxYEhvqnYR_A7qAKM,471
|
272
272
|
validmind/unit_metrics/__init__.py,sha256=AlFnWA9pmzVf8xysPxYpQ3kBTQ81-YVxRTJpgC0Q41w,7344
|
@@ -307,8 +307,8 @@ validmind/vm_models/test_suite/runner.py,sha256=aewxadRfoOPH48jes2Gtb3Ju_FWFfVM_
|
|
307
307
|
validmind/vm_models/test_suite/summary.py,sha256=GQRNe2ZvvqjQN0yKmaN7ohAUjRFQIN4YYUYxfOuWN6M,4682
|
308
308
|
validmind/vm_models/test_suite/test.py,sha256=_GfbK36l98SjzgVcucmp0OKBJKqMW3neO7SqJ3EWeps,5049
|
309
309
|
validmind/vm_models/test_suite/test_suite.py,sha256=Cns2wL54v0T5Mv5_HJb3kMeaa4rtycdqT8KxK9_rWEU,6279
|
310
|
-
validmind-2.4.
|
311
|
-
validmind-2.4.
|
312
|
-
validmind-2.4.
|
313
|
-
validmind-2.4.
|
314
|
-
validmind-2.4.
|
310
|
+
validmind-2.4.6.dist-info/LICENSE,sha256=XonPUfwjvrC5Ombl3y-ko0Wubb1xdG_7nzvIbkZRKHw,35772
|
311
|
+
validmind-2.4.6.dist-info/METADATA,sha256=fcDV-5tH1o8RdJpJD07ablIH3yDsar6sWfWeEuXo0Fg,4222
|
312
|
+
validmind-2.4.6.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
313
|
+
validmind-2.4.6.dist-info/entry_points.txt,sha256=HuW7YyOv9u_OEWpViQXtv0nfoI67uieJHawKWA4Hv9A,76
|
314
|
+
validmind-2.4.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|