validmind 2.4.4__py3-none-any.whl → 2.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/tests/data_validation/IQROutliersBarPlot.py +16 -9
- validmind/tests/data_validation/IQROutliersTable.py +13 -6
- {validmind-2.4.4.dist-info → validmind-2.4.5.dist-info}/METADATA +2 -2
- {validmind-2.4.4.dist-info → validmind-2.4.5.dist-info}/RECORD +8 -8
- {validmind-2.4.4.dist-info → validmind-2.4.5.dist-info}/LICENSE +0 -0
- {validmind-2.4.4.dist-info → validmind-2.4.5.dist-info}/WHEEL +0 -0
- {validmind-2.4.4.dist-info → validmind-2.4.5.dist-info}/entry_points.txt +0 -0
validmind/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "2.4.
|
1
|
+
__version__ = "2.4.5"
|
@@ -4,7 +4,6 @@
|
|
4
4
|
|
5
5
|
from dataclasses import dataclass
|
6
6
|
|
7
|
-
import numpy as np
|
8
7
|
import plotly.graph_objects as go
|
9
8
|
|
10
9
|
from validmind.vm_models import Figure, Metric
|
@@ -62,22 +61,27 @@ class IQROutliersBarPlot(Metric):
|
|
62
61
|
|
63
62
|
name = "iqr_outliers_bar_plot"
|
64
63
|
required_inputs = ["dataset"]
|
65
|
-
default_params = {"threshold": 1.5, "
|
64
|
+
default_params = {"threshold": 1.5, "fig_width": 800}
|
66
65
|
tasks = ["classification", "regression"]
|
67
66
|
tags = ["tabular_data", "visualization", "numerical_data"]
|
68
67
|
|
69
68
|
def run(self):
|
70
69
|
df = self.inputs.dataset.df
|
71
|
-
|
70
|
+
|
71
|
+
# Select numerical features
|
72
|
+
features = self.inputs.dataset.feature_columns_numeric
|
73
|
+
|
74
|
+
# Select non-binary features
|
75
|
+
features = [
|
76
|
+
feature
|
77
|
+
for feature in features
|
78
|
+
if len(self.inputs.dataset.df[feature].unique()) > 2
|
79
|
+
]
|
80
|
+
|
72
81
|
threshold = self.params["threshold"]
|
73
82
|
fig_width = self.params["fig_width"]
|
74
83
|
|
75
|
-
|
76
|
-
# Otherwise, only use the columns provided in num_features.
|
77
|
-
if num_features is None:
|
78
|
-
df = df.select_dtypes(include=[np.number])
|
79
|
-
else:
|
80
|
-
df = df[num_features]
|
84
|
+
df = df[features]
|
81
85
|
|
82
86
|
return self.detect_and_visualize_outliers(df, threshold, fig_width)
|
83
87
|
|
@@ -98,6 +102,9 @@ class IQROutliersBarPlot(Metric):
|
|
98
102
|
# Compute outliers
|
99
103
|
outliers = self.compute_outliers(df[col], threshold)
|
100
104
|
|
105
|
+
if outliers.empty:
|
106
|
+
continue # Skip plotting if there are no outliers
|
107
|
+
|
101
108
|
Q1_count = outliers[
|
102
109
|
(outliers >= 0) & (outliers < outliers.quantile(0.25))
|
103
110
|
].count()
|
@@ -4,7 +4,6 @@
|
|
4
4
|
|
5
5
|
from dataclasses import dataclass
|
6
6
|
|
7
|
-
import numpy as np
|
8
7
|
import pandas as pd
|
9
8
|
|
10
9
|
from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
|
@@ -53,12 +52,22 @@ class IQROutliersTable(Metric):
|
|
53
52
|
|
54
53
|
name = "iqr_outliers_table"
|
55
54
|
required_inputs = ["dataset"]
|
56
|
-
default_params = {"
|
55
|
+
default_params = {"threshold": 1.5}
|
57
56
|
tasks = ["classification", "regression"]
|
58
57
|
tags = ["tabular_data", "numerical_data"]
|
59
58
|
|
60
59
|
def run(self):
|
61
|
-
|
60
|
+
|
61
|
+
# Select numerical features
|
62
|
+
features = self.inputs.dataset.feature_columns_numeric
|
63
|
+
|
64
|
+
# Select non-binary features
|
65
|
+
features = [
|
66
|
+
feature
|
67
|
+
for feature in features
|
68
|
+
if len(self.inputs.dataset.df[feature].unique()) > 2
|
69
|
+
]
|
70
|
+
|
62
71
|
threshold = self.params["threshold"]
|
63
72
|
|
64
73
|
df = self.inputs.dataset.df
|
@@ -80,9 +89,7 @@ class IQROutliersTable(Metric):
|
|
80
89
|
upper_bound = Q3 + threshold * IQR
|
81
90
|
return series[(series < lower_bound) | (series > upper_bound)]
|
82
91
|
|
83
|
-
def detect_and_analyze_outliers(self, df, features
|
84
|
-
if features is None:
|
85
|
-
features = df.select_dtypes(include=[np.number]).columns.tolist()
|
92
|
+
def detect_and_analyze_outliers(self, df, features, threshold=1.5):
|
86
93
|
|
87
94
|
outliers_summary = []
|
88
95
|
for feature in features:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: validmind
|
3
|
-
Version: 2.4.
|
3
|
+
Version: 2.4.5
|
4
4
|
Summary: ValidMind Developer Framework
|
5
5
|
License: Commercial License
|
6
6
|
Author: Andres Rodriguez
|
@@ -32,7 +32,7 @@ Requires-Dist: mistune (>=3.0.2,<4.0.0)
|
|
32
32
|
Requires-Dist: nltk (>=3.8.1,<4.0.0)
|
33
33
|
Requires-Dist: numba (<0.59.0)
|
34
34
|
Requires-Dist: numpy
|
35
|
-
Requires-Dist: openai (>=1) ; extra == "all"
|
35
|
+
Requires-Dist: openai (>=1) ; extra == "all"
|
36
36
|
Requires-Dist: pandas (>=1.1,<2)
|
37
37
|
Requires-Dist: plotly
|
38
38
|
Requires-Dist: plotly-express
|
@@ -1,5 +1,5 @@
|
|
1
1
|
validmind/__init__.py,sha256=UfmzPwUCdUWbWq3zPqqmq4jw0_kfl3hX4U72p_seE4I,3700
|
2
|
-
validmind/__version__.py,sha256=
|
2
|
+
validmind/__version__.py,sha256=zcb2VyQ6EBDMkktW4mmZ-qJGw-tYB_UjmzqDEz6RI9w,22
|
3
3
|
validmind/ai/test_descriptions.py,sha256=Q1Ftus4x5eiVLKWJu7hqPLukBQZzhy-dARqq_6_JWtk,9464
|
4
4
|
validmind/ai/utils.py,sha256=TEXII_S5CpkpczzSyHwTlqLcPMLnPBJWEBR6QFMKh1U,3421
|
5
5
|
validmind/api_client.py,sha256=xr9VNqCmA_WFf8rVm-0M0pmzVyLAPFOnfEe4dAog1LA,17144
|
@@ -110,8 +110,8 @@ validmind/tests/data_validation/FeatureTargetCorrelationPlot.py,sha256=zSrHZ1kuZ
|
|
110
110
|
validmind/tests/data_validation/HeatmapFeatureCorrelations.py,sha256=yGA7douxs7jga6RaS8wIBs9JEee_67w0uN8mQP-7_to,5671
|
111
111
|
validmind/tests/data_validation/HighCardinality.py,sha256=HE3lH5cL2Gc9HNR3F0kx4Jhk4WJiXtWfdoCG1mlWS1A,5071
|
112
112
|
validmind/tests/data_validation/HighPearsonCorrelation.py,sha256=n4zPGj-ydV1pZIyDidFybC3kItDJrxZgOPHessJcwU0,5702
|
113
|
-
validmind/tests/data_validation/IQROutliersBarPlot.py,sha256=
|
114
|
-
validmind/tests/data_validation/IQROutliersTable.py,sha256=
|
113
|
+
validmind/tests/data_validation/IQROutliersBarPlot.py,sha256=BAFGtEA5ySKMuxQcjDZl1uV1bIZek5J10jF5fTVYqhw,6370
|
114
|
+
validmind/tests/data_validation/IQROutliersTable.py,sha256=URpkGCP_OWkumDMiRbrKGs5ePcmAAQtTEna2OjvDbUA,5947
|
115
115
|
validmind/tests/data_validation/IsolationForestOutliers.py,sha256=fYTM4H8JWgpJH-G8Sxxj5cKmJ8_EVxKn4lm64frGp4A,4841
|
116
116
|
validmind/tests/data_validation/KPSS.py,sha256=Ob9UVSlwbxGND0B0qJKmaBskJmGCKxpctpnqosLFK-8,5114
|
117
117
|
validmind/tests/data_validation/LaggedCorrelationHeatmap.py,sha256=iAXQU4KJKX1Rum3vJiJmIQsOZ2a-bFqSy4xfTKlfFBs,6008
|
@@ -307,8 +307,8 @@ validmind/vm_models/test_suite/runner.py,sha256=aewxadRfoOPH48jes2Gtb3Ju_FWFfVM_
|
|
307
307
|
validmind/vm_models/test_suite/summary.py,sha256=GQRNe2ZvvqjQN0yKmaN7ohAUjRFQIN4YYUYxfOuWN6M,4682
|
308
308
|
validmind/vm_models/test_suite/test.py,sha256=_GfbK36l98SjzgVcucmp0OKBJKqMW3neO7SqJ3EWeps,5049
|
309
309
|
validmind/vm_models/test_suite/test_suite.py,sha256=Cns2wL54v0T5Mv5_HJb3kMeaa4rtycdqT8KxK9_rWEU,6279
|
310
|
-
validmind-2.4.
|
311
|
-
validmind-2.4.
|
312
|
-
validmind-2.4.
|
313
|
-
validmind-2.4.
|
314
|
-
validmind-2.4.
|
310
|
+
validmind-2.4.5.dist-info/LICENSE,sha256=XonPUfwjvrC5Ombl3y-ko0Wubb1xdG_7nzvIbkZRKHw,35772
|
311
|
+
validmind-2.4.5.dist-info/METADATA,sha256=Dj-H444StTjuG_j6I4KG3nYtmKDJClmReWm1morfegc,4222
|
312
|
+
validmind-2.4.5.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
313
|
+
validmind-2.4.5.dist-info/entry_points.txt,sha256=HuW7YyOv9u_OEWpViQXtv0nfoI67uieJHawKWA4Hv9A,76
|
314
|
+
validmind-2.4.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|