validmind 2.0.1__py3-none-any.whl → 2.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. validmind/__init__.py +4 -1
  2. validmind/__version__.py +1 -1
  3. validmind/ai.py +197 -0
  4. validmind/api_client.py +16 -4
  5. validmind/client.py +23 -3
  6. validmind/datasets/classification/customer_churn.py +2 -2
  7. validmind/datasets/nlp/__init__.py +5 -0
  8. validmind/datasets/nlp/cnn_dailymail.py +98 -0
  9. validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv +255 -0
  10. validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv +1277 -0
  11. validmind/datasets/nlp/datasets/sentiments_with_predictions.csv +4847 -0
  12. validmind/errors.py +11 -1
  13. validmind/models/huggingface.py +2 -2
  14. validmind/models/pytorch.py +3 -3
  15. validmind/models/sklearn.py +4 -4
  16. validmind/tests/__init__.py +47 -9
  17. validmind/tests/data_validation/DatasetDescription.py +0 -1
  18. validmind/tests/data_validation/nlp/StopWords.py +1 -6
  19. validmind/tests/data_validation/nlp/TextDescription.py +20 -9
  20. validmind/tests/decorator.py +189 -0
  21. validmind/tests/model_validation/MeteorScore.py +92 -0
  22. validmind/tests/model_validation/RegardHistogram.py +5 -6
  23. validmind/tests/model_validation/RegardScore.py +3 -5
  24. validmind/tests/model_validation/RougeMetrics.py +6 -4
  25. validmind/tests/model_validation/SelfCheckNLIScore.py +112 -0
  26. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +17 -22
  27. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +3 -1
  28. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +30 -4
  29. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +9 -3
  30. validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +1 -1
  31. validmind/tests/prompt_validation/ai_powered_test.py +2 -0
  32. validmind/unit_metrics/__init__.py +0 -2
  33. validmind/unit_metrics/composite.py +275 -0
  34. validmind/unit_metrics/regression/GiniCoefficient.py +39 -0
  35. validmind/unit_metrics/regression/HuberLoss.py +27 -0
  36. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +36 -0
  37. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +22 -0
  38. validmind/unit_metrics/regression/MeanBiasDeviation.py +22 -0
  39. validmind/unit_metrics/regression/QuantileLoss.py +25 -0
  40. validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +27 -0
  41. validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py +22 -0
  42. validmind/unit_metrics/regression/sklearn/MeanSquaredError.py +22 -0
  43. validmind/unit_metrics/regression/sklearn/RSquaredScore.py +22 -0
  44. validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py +23 -0
  45. validmind/unit_metrics/sklearn/classification/Accuracy.py +2 -0
  46. validmind/unit_metrics/sklearn/classification/F1.py +2 -0
  47. validmind/unit_metrics/sklearn/classification/Precision.py +2 -0
  48. validmind/unit_metrics/sklearn/classification/ROC_AUC.py +2 -0
  49. validmind/unit_metrics/sklearn/classification/Recall.py +2 -0
  50. validmind/utils.py +17 -1
  51. validmind/vm_models/dataset.py +376 -21
  52. validmind/vm_models/figure.py +52 -17
  53. validmind/vm_models/test/metric.py +33 -30
  54. validmind/vm_models/test/output_template.py +0 -27
  55. validmind/vm_models/test/result_wrapper.py +57 -24
  56. validmind/vm_models/test/test.py +2 -1
  57. validmind/vm_models/test/threshold_test.py +24 -13
  58. validmind/vm_models/test_context.py +7 -0
  59. validmind/vm_models/test_suite/runner.py +1 -1
  60. validmind/vm_models/test_suite/test.py +1 -1
  61. {validmind-2.0.1.dist-info → validmind-2.0.7.dist-info}/METADATA +9 -13
  62. {validmind-2.0.1.dist-info → validmind-2.0.7.dist-info}/RECORD +65 -44
  63. validmind-2.0.7.dist-info/entry_points.txt +3 -0
  64. {validmind-2.0.1.dist-info → validmind-2.0.7.dist-info}/LICENSE +0 -0
  65. {validmind-2.0.1.dist-info → validmind-2.0.7.dist-info}/WHEEL +0 -0
@@ -0,0 +1,36 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from dataclasses import dataclass
6
+
7
+ import numpy as np
8
+
9
+ from validmind.vm_models import UnitMetric
10
+
11
+
12
+ @dataclass
13
+ class KolmogorovSmirnovStatistic(UnitMetric):
14
+ required_inputs = ["dataset", "model"]
15
+
16
+ def run(self):
17
+ y_true = self.inputs.dataset.y.flatten()
18
+ y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
19
+
20
+ # Sort true values and corresponding predicted values
21
+ idx_true = np.argsort(y_true)
22
+ idx_pred = np.argsort(y_pred)
23
+ y_true_sorted = y_true[idx_true]
24
+ y_pred_sorted = y_pred[idx_pred]
25
+
26
+ # Compute cumulative distribution functions (CDFs)
27
+ cdf_true = np.arange(1, len(y_true_sorted) + 1) / len(y_true_sorted)
28
+ cdf_pred = np.arange(1, len(y_pred_sorted) + 1) / len(y_pred_sorted)
29
+
30
+ # Compute absolute differences between CDFs
31
+ diff_cdf = np.abs(cdf_true - cdf_pred)
32
+
33
+ # Find maximum absolute difference
34
+ ks_statistic = np.max(diff_cdf)
35
+
36
+ return self.cache_results(metric_value=ks_statistic)
@@ -0,0 +1,22 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from dataclasses import dataclass
6
+
7
+ import numpy as np
8
+
9
+ from validmind.vm_models import UnitMetric
10
+
11
+
12
+ @dataclass
13
+ class MeanAbsolutePercentageError(UnitMetric):
14
+ required_inputs = ["dataset", "model"]
15
+
16
+ def run(self):
17
+ y_true = self.inputs.dataset.y
18
+ y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
19
+
20
+ value = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
21
+
22
+ return self.cache_results(metric_value=value)
@@ -0,0 +1,22 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from dataclasses import dataclass
6
+
7
+ import numpy as np
8
+
9
+ from validmind.vm_models import UnitMetric
10
+
11
+
12
+ @dataclass
13
+ class MeanBiasDeviation(UnitMetric):
14
+ required_inputs = ["dataset", "model"]
15
+
16
+ def run(self):
17
+ y_true = self.inputs.dataset.y
18
+ y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
19
+
20
+ value = np.mean(y_pred - y_true)
21
+
22
+ return self.cache_results(metric_value=value)
@@ -0,0 +1,25 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from dataclasses import dataclass
6
+
7
+ import numpy as np
8
+
9
+ from validmind.vm_models import UnitMetric
10
+
11
+
12
+ @dataclass
13
+ class QuantileLoss(UnitMetric):
14
+ required_inputs = ["dataset", "model"]
15
+
16
+ def run(self):
17
+ y_true = self.inputs.dataset.y
18
+ y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
19
+
20
+ error = y_true - y_pred
21
+ # Quantile value (between 0 and 1).
22
+ quantile = 0.5
23
+ value = np.mean(np.maximum(quantile * error, (quantile - 1) * error))
24
+
25
+ return self.cache_results(metric_value=value)
@@ -0,0 +1,27 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from dataclasses import dataclass
6
+
7
+ import sklearn.metrics as metrics
8
+
9
+ from validmind.vm_models import UnitMetric
10
+
11
+
12
+ @dataclass
13
+ class AdjustedRSquaredScore(UnitMetric):
14
+ required_inputs = ["dataset", "model"]
15
+
16
+ def run(self):
17
+ y_true = self.inputs.dataset.y
18
+ y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
19
+
20
+ X_columns = self.inputs.dataset.get_features_columns()
21
+ row_count = len(y_true)
22
+ feature_count = len(X_columns)
23
+ value = 1 - (1 - metrics.r2_score(y_true, y_pred)) * (row_count - 1) / (
24
+ row_count - feature_count
25
+ )
26
+
27
+ return self.cache_results(metric_value=value)
@@ -0,0 +1,22 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from dataclasses import dataclass
6
+
7
+ from sklearn.metrics import mean_absolute_error
8
+
9
+ from validmind.vm_models import UnitMetric
10
+
11
+
12
+ @dataclass
13
+ class MeanAbsoluteError(UnitMetric):
14
+ required_inputs = ["dataset", "model"]
15
+
16
+ def run(self):
17
+ y_true = self.inputs.dataset.y
18
+ y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
19
+
20
+ value = mean_absolute_error(y_true, y_pred, **self.params)
21
+
22
+ return self.cache_results(metric_value=value)
@@ -0,0 +1,22 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from dataclasses import dataclass
6
+
7
+ from sklearn.metrics import mean_squared_error
8
+
9
+ from validmind.vm_models import UnitMetric
10
+
11
+
12
+ @dataclass
13
+ class MeanSquaredError(UnitMetric):
14
+ required_inputs = ["dataset", "model"]
15
+
16
+ def run(self):
17
+ y_true = self.inputs.dataset.y
18
+ y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
19
+
20
+ value = mean_squared_error(y_true, y_pred, **self.params)
21
+
22
+ return self.cache_results(metric_value=value)
@@ -0,0 +1,22 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from dataclasses import dataclass
6
+
7
+ import sklearn.metrics as metrics
8
+
9
+ from validmind.vm_models import UnitMetric
10
+
11
+
12
+ @dataclass
13
+ class RSquaredScore(UnitMetric):
14
+ required_inputs = ["dataset", "model"]
15
+
16
+ def run(self):
17
+ y_true = self.inputs.dataset.y
18
+ y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
19
+
20
+ value = metrics.r2_score(y_true, y_pred)
21
+
22
+ return self.cache_results(metric_value=value)
@@ -0,0 +1,23 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from dataclasses import dataclass
6
+
7
+ import numpy as np
8
+ from sklearn.metrics import mean_squared_error
9
+
10
+ from validmind.vm_models import UnitMetric
11
+
12
+
13
+ @dataclass
14
+ class RootMeanSquaredError(UnitMetric):
15
+ required_inputs = ["dataset", "model"]
16
+
17
+ def run(self):
18
+ y_true = self.inputs.dataset.y
19
+ y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
20
+
21
+ value = np.sqrt(mean_squared_error(y_true, y_pred, **self.params))
22
+
23
+ return self.cache_results(metric_value=value)
@@ -11,6 +11,8 @@ from validmind.vm_models import UnitMetric
11
11
 
12
12
  @dataclass
13
13
  class Accuracy(UnitMetric):
14
+ required_inputs = ["dataset", "model"]
15
+
14
16
  def run(self):
15
17
  y_true = self.inputs.dataset.y
16
18
  y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
@@ -11,6 +11,8 @@ from validmind.vm_models import UnitMetric
11
11
 
12
12
  @dataclass
13
13
  class F1(UnitMetric):
14
+ required_inputs = ["dataset", "model"]
15
+
14
16
  def run(self):
15
17
  y_true = self.inputs.dataset.y
16
18
  y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
@@ -11,6 +11,8 @@ from validmind.vm_models import UnitMetric
11
11
 
12
12
  @dataclass
13
13
  class Precision(UnitMetric):
14
+ required_inputs = ["dataset", "model"]
15
+
14
16
  def run(self):
15
17
  y_true = self.inputs.dataset.y
16
18
  y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
@@ -11,6 +11,8 @@ from validmind.vm_models import UnitMetric
11
11
 
12
12
  @dataclass
13
13
  class ROC_AUC(UnitMetric):
14
+ required_inputs = ["dataset", "model"]
15
+
14
16
  def run(self):
15
17
  y_true = self.inputs.dataset.y
16
18
  y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
@@ -11,6 +11,8 @@ from validmind.vm_models import UnitMetric
11
11
 
12
12
  @dataclass
13
13
  class Recall(UnitMetric):
14
+ required_inputs = ["dataset", "model"]
15
+
14
16
  def run(self):
15
17
  y_true = self.inputs.dataset.y
16
18
  y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
validmind/utils.py CHANGED
@@ -85,6 +85,8 @@ def nan_to_none(obj):
85
85
 
86
86
  class NumpyEncoder(json.JSONEncoder):
87
87
  def default(self, obj):
88
+ if isinstance(obj, pd.Interval):
89
+ return f"[{obj.left}, {obj.right}]"
88
90
  if isinstance(obj, np.integer):
89
91
  return int(obj)
90
92
  if isinstance(obj, np.floating):
@@ -253,7 +255,21 @@ def clean_docstring(docstring: str) -> str:
253
255
  # Join paragraphs with double newlines for markdown
254
256
  description = "\n\n".join(paragraphs)
255
257
 
256
- return description
258
+ lines = description.split("\n")
259
+ in_bullet_list = False
260
+ for i, line in enumerate([line for line in lines]):
261
+ if line.strip().startswith("-") and not in_bullet_list:
262
+ if lines[i - 1] != "":
263
+ lines[i] = "\n" + line
264
+
265
+ in_bullet_list = True
266
+ continue
267
+ elif line.strip().startswith("-") and in_bullet_list:
268
+ continue
269
+ elif line.strip() == "" and in_bullet_list:
270
+ in_bullet_list = False
271
+
272
+ return "\n".join(lines)
257
273
 
258
274
 
259
275
  def format_number(number):