validmind 2.0.1__py3-none-any.whl → 2.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +4 -1
- validmind/__version__.py +1 -1
- validmind/ai.py +197 -0
- validmind/api_client.py +16 -4
- validmind/client.py +23 -3
- validmind/datasets/classification/customer_churn.py +2 -2
- validmind/datasets/nlp/__init__.py +5 -0
- validmind/datasets/nlp/cnn_dailymail.py +98 -0
- validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv +255 -0
- validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv +1277 -0
- validmind/datasets/nlp/datasets/sentiments_with_predictions.csv +4847 -0
- validmind/errors.py +11 -1
- validmind/models/huggingface.py +2 -2
- validmind/models/pytorch.py +3 -3
- validmind/models/sklearn.py +4 -4
- validmind/tests/__init__.py +47 -9
- validmind/tests/data_validation/DatasetDescription.py +0 -1
- validmind/tests/data_validation/nlp/StopWords.py +1 -6
- validmind/tests/data_validation/nlp/TextDescription.py +20 -9
- validmind/tests/decorator.py +189 -0
- validmind/tests/model_validation/MeteorScore.py +92 -0
- validmind/tests/model_validation/RegardHistogram.py +5 -6
- validmind/tests/model_validation/RegardScore.py +3 -5
- validmind/tests/model_validation/RougeMetrics.py +6 -4
- validmind/tests/model_validation/SelfCheckNLIScore.py +112 -0
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +17 -22
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +3 -1
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +30 -4
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +9 -3
- validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +1 -1
- validmind/tests/prompt_validation/ai_powered_test.py +2 -0
- validmind/unit_metrics/__init__.py +0 -2
- validmind/unit_metrics/composite.py +275 -0
- validmind/unit_metrics/regression/GiniCoefficient.py +39 -0
- validmind/unit_metrics/regression/HuberLoss.py +27 -0
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +36 -0
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +22 -0
- validmind/unit_metrics/regression/MeanBiasDeviation.py +22 -0
- validmind/unit_metrics/regression/QuantileLoss.py +25 -0
- validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +27 -0
- validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py +22 -0
- validmind/unit_metrics/regression/sklearn/MeanSquaredError.py +22 -0
- validmind/unit_metrics/regression/sklearn/RSquaredScore.py +22 -0
- validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py +23 -0
- validmind/unit_metrics/sklearn/classification/Accuracy.py +2 -0
- validmind/unit_metrics/sklearn/classification/F1.py +2 -0
- validmind/unit_metrics/sklearn/classification/Precision.py +2 -0
- validmind/unit_metrics/sklearn/classification/ROC_AUC.py +2 -0
- validmind/unit_metrics/sklearn/classification/Recall.py +2 -0
- validmind/utils.py +17 -1
- validmind/vm_models/dataset.py +376 -21
- validmind/vm_models/figure.py +52 -17
- validmind/vm_models/test/metric.py +33 -30
- validmind/vm_models/test/output_template.py +0 -27
- validmind/vm_models/test/result_wrapper.py +57 -24
- validmind/vm_models/test/test.py +2 -1
- validmind/vm_models/test/threshold_test.py +24 -13
- validmind/vm_models/test_context.py +7 -0
- validmind/vm_models/test_suite/runner.py +1 -1
- validmind/vm_models/test_suite/test.py +1 -1
- {validmind-2.0.1.dist-info → validmind-2.0.7.dist-info}/METADATA +9 -13
- {validmind-2.0.1.dist-info → validmind-2.0.7.dist-info}/RECORD +65 -44
- validmind-2.0.7.dist-info/entry_points.txt +3 -0
- {validmind-2.0.1.dist-info → validmind-2.0.7.dist-info}/LICENSE +0 -0
- {validmind-2.0.1.dist-info → validmind-2.0.7.dist-info}/WHEEL +0 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from dataclasses import dataclass
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
|
9
|
+
from validmind.vm_models import UnitMetric
|
10
|
+
|
11
|
+
|
12
|
+
@dataclass
|
13
|
+
class KolmogorovSmirnovStatistic(UnitMetric):
|
14
|
+
required_inputs = ["dataset", "model"]
|
15
|
+
|
16
|
+
def run(self):
|
17
|
+
y_true = self.inputs.dataset.y.flatten()
|
18
|
+
y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
|
19
|
+
|
20
|
+
# Sort true values and corresponding predicted values
|
21
|
+
idx_true = np.argsort(y_true)
|
22
|
+
idx_pred = np.argsort(y_pred)
|
23
|
+
y_true_sorted = y_true[idx_true]
|
24
|
+
y_pred_sorted = y_pred[idx_pred]
|
25
|
+
|
26
|
+
# Compute cumulative distribution functions (CDFs)
|
27
|
+
cdf_true = np.arange(1, len(y_true_sorted) + 1) / len(y_true_sorted)
|
28
|
+
cdf_pred = np.arange(1, len(y_pred_sorted) + 1) / len(y_pred_sorted)
|
29
|
+
|
30
|
+
# Compute absolute differences between CDFs
|
31
|
+
diff_cdf = np.abs(cdf_true - cdf_pred)
|
32
|
+
|
33
|
+
# Find maximum absolute difference
|
34
|
+
ks_statistic = np.max(diff_cdf)
|
35
|
+
|
36
|
+
return self.cache_results(metric_value=ks_statistic)
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from dataclasses import dataclass
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
|
9
|
+
from validmind.vm_models import UnitMetric
|
10
|
+
|
11
|
+
|
12
|
+
@dataclass
|
13
|
+
class MeanAbsolutePercentageError(UnitMetric):
|
14
|
+
required_inputs = ["dataset", "model"]
|
15
|
+
|
16
|
+
def run(self):
|
17
|
+
y_true = self.inputs.dataset.y
|
18
|
+
y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
|
19
|
+
|
20
|
+
value = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
|
21
|
+
|
22
|
+
return self.cache_results(metric_value=value)
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from dataclasses import dataclass
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
|
9
|
+
from validmind.vm_models import UnitMetric
|
10
|
+
|
11
|
+
|
12
|
+
@dataclass
|
13
|
+
class MeanBiasDeviation(UnitMetric):
|
14
|
+
required_inputs = ["dataset", "model"]
|
15
|
+
|
16
|
+
def run(self):
|
17
|
+
y_true = self.inputs.dataset.y
|
18
|
+
y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
|
19
|
+
|
20
|
+
value = np.mean(y_pred - y_true)
|
21
|
+
|
22
|
+
return self.cache_results(metric_value=value)
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from dataclasses import dataclass
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
|
9
|
+
from validmind.vm_models import UnitMetric
|
10
|
+
|
11
|
+
|
12
|
+
@dataclass
|
13
|
+
class QuantileLoss(UnitMetric):
|
14
|
+
required_inputs = ["dataset", "model"]
|
15
|
+
|
16
|
+
def run(self):
|
17
|
+
y_true = self.inputs.dataset.y
|
18
|
+
y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
|
19
|
+
|
20
|
+
error = y_true - y_pred
|
21
|
+
# Quantile value (between 0 and 1).
|
22
|
+
quantile = 0.5
|
23
|
+
value = np.mean(np.maximum(quantile * error, (quantile - 1) * error))
|
24
|
+
|
25
|
+
return self.cache_results(metric_value=value)
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from dataclasses import dataclass
|
6
|
+
|
7
|
+
import sklearn.metrics as metrics
|
8
|
+
|
9
|
+
from validmind.vm_models import UnitMetric
|
10
|
+
|
11
|
+
|
12
|
+
@dataclass
|
13
|
+
class AdjustedRSquaredScore(UnitMetric):
|
14
|
+
required_inputs = ["dataset", "model"]
|
15
|
+
|
16
|
+
def run(self):
|
17
|
+
y_true = self.inputs.dataset.y
|
18
|
+
y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
|
19
|
+
|
20
|
+
X_columns = self.inputs.dataset.get_features_columns()
|
21
|
+
row_count = len(y_true)
|
22
|
+
feature_count = len(X_columns)
|
23
|
+
value = 1 - (1 - metrics.r2_score(y_true, y_pred)) * (row_count - 1) / (
|
24
|
+
row_count - feature_count
|
25
|
+
)
|
26
|
+
|
27
|
+
return self.cache_results(metric_value=value)
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from dataclasses import dataclass
|
6
|
+
|
7
|
+
from sklearn.metrics import mean_absolute_error
|
8
|
+
|
9
|
+
from validmind.vm_models import UnitMetric
|
10
|
+
|
11
|
+
|
12
|
+
@dataclass
|
13
|
+
class MeanAbsoluteError(UnitMetric):
|
14
|
+
required_inputs = ["dataset", "model"]
|
15
|
+
|
16
|
+
def run(self):
|
17
|
+
y_true = self.inputs.dataset.y
|
18
|
+
y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
|
19
|
+
|
20
|
+
value = mean_absolute_error(y_true, y_pred, **self.params)
|
21
|
+
|
22
|
+
return self.cache_results(metric_value=value)
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from dataclasses import dataclass
|
6
|
+
|
7
|
+
from sklearn.metrics import mean_squared_error
|
8
|
+
|
9
|
+
from validmind.vm_models import UnitMetric
|
10
|
+
|
11
|
+
|
12
|
+
@dataclass
|
13
|
+
class MeanSquaredError(UnitMetric):
|
14
|
+
required_inputs = ["dataset", "model"]
|
15
|
+
|
16
|
+
def run(self):
|
17
|
+
y_true = self.inputs.dataset.y
|
18
|
+
y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
|
19
|
+
|
20
|
+
value = mean_squared_error(y_true, y_pred, **self.params)
|
21
|
+
|
22
|
+
return self.cache_results(metric_value=value)
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from dataclasses import dataclass
|
6
|
+
|
7
|
+
import sklearn.metrics as metrics
|
8
|
+
|
9
|
+
from validmind.vm_models import UnitMetric
|
10
|
+
|
11
|
+
|
12
|
+
@dataclass
|
13
|
+
class RSquaredScore(UnitMetric):
|
14
|
+
required_inputs = ["dataset", "model"]
|
15
|
+
|
16
|
+
def run(self):
|
17
|
+
y_true = self.inputs.dataset.y
|
18
|
+
y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
|
19
|
+
|
20
|
+
value = metrics.r2_score(y_true, y_pred)
|
21
|
+
|
22
|
+
return self.cache_results(metric_value=value)
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from dataclasses import dataclass
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
from sklearn.metrics import mean_squared_error
|
9
|
+
|
10
|
+
from validmind.vm_models import UnitMetric
|
11
|
+
|
12
|
+
|
13
|
+
@dataclass
|
14
|
+
class RootMeanSquaredError(UnitMetric):
|
15
|
+
required_inputs = ["dataset", "model"]
|
16
|
+
|
17
|
+
def run(self):
|
18
|
+
y_true = self.inputs.dataset.y
|
19
|
+
y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
|
20
|
+
|
21
|
+
value = np.sqrt(mean_squared_error(y_true, y_pred, **self.params))
|
22
|
+
|
23
|
+
return self.cache_results(metric_value=value)
|
@@ -11,6 +11,8 @@ from validmind.vm_models import UnitMetric
|
|
11
11
|
|
12
12
|
@dataclass
|
13
13
|
class Accuracy(UnitMetric):
|
14
|
+
required_inputs = ["dataset", "model"]
|
15
|
+
|
14
16
|
def run(self):
|
15
17
|
y_true = self.inputs.dataset.y
|
16
18
|
y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
|
@@ -11,6 +11,8 @@ from validmind.vm_models import UnitMetric
|
|
11
11
|
|
12
12
|
@dataclass
|
13
13
|
class Precision(UnitMetric):
|
14
|
+
required_inputs = ["dataset", "model"]
|
15
|
+
|
14
16
|
def run(self):
|
15
17
|
y_true = self.inputs.dataset.y
|
16
18
|
y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
|
@@ -11,6 +11,8 @@ from validmind.vm_models import UnitMetric
|
|
11
11
|
|
12
12
|
@dataclass
|
13
13
|
class ROC_AUC(UnitMetric):
|
14
|
+
required_inputs = ["dataset", "model"]
|
15
|
+
|
14
16
|
def run(self):
|
15
17
|
y_true = self.inputs.dataset.y
|
16
18
|
y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
|
@@ -11,6 +11,8 @@ from validmind.vm_models import UnitMetric
|
|
11
11
|
|
12
12
|
@dataclass
|
13
13
|
class Recall(UnitMetric):
|
14
|
+
required_inputs = ["dataset", "model"]
|
15
|
+
|
14
16
|
def run(self):
|
15
17
|
y_true = self.inputs.dataset.y
|
16
18
|
y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
|
validmind/utils.py
CHANGED
@@ -85,6 +85,8 @@ def nan_to_none(obj):
|
|
85
85
|
|
86
86
|
class NumpyEncoder(json.JSONEncoder):
|
87
87
|
def default(self, obj):
|
88
|
+
if isinstance(obj, pd.Interval):
|
89
|
+
return f"[{obj.left}, {obj.right}]"
|
88
90
|
if isinstance(obj, np.integer):
|
89
91
|
return int(obj)
|
90
92
|
if isinstance(obj, np.floating):
|
@@ -253,7 +255,21 @@ def clean_docstring(docstring: str) -> str:
|
|
253
255
|
# Join paragraphs with double newlines for markdown
|
254
256
|
description = "\n\n".join(paragraphs)
|
255
257
|
|
256
|
-
|
258
|
+
lines = description.split("\n")
|
259
|
+
in_bullet_list = False
|
260
|
+
for i, line in enumerate([line for line in lines]):
|
261
|
+
if line.strip().startswith("-") and not in_bullet_list:
|
262
|
+
if lines[i - 1] != "":
|
263
|
+
lines[i] = "\n" + line
|
264
|
+
|
265
|
+
in_bullet_list = True
|
266
|
+
continue
|
267
|
+
elif line.strip().startswith("-") and in_bullet_list:
|
268
|
+
continue
|
269
|
+
elif line.strip() == "" and in_bullet_list:
|
270
|
+
in_bullet_list = False
|
271
|
+
|
272
|
+
return "\n".join(lines)
|
257
273
|
|
258
274
|
|
259
275
|
def format_number(number):
|