validmind 2.3.5__py3-none-any.whl → 2.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/test_descriptions.py +8 -1
  3. validmind/ai/utils.py +2 -1
  4. validmind/client.py +1 -0
  5. validmind/template.py +2 -0
  6. validmind/tests/__init__.py +14 -468
  7. validmind/tests/_store.py +102 -0
  8. validmind/tests/data_validation/ACFandPACFPlot.py +7 -9
  9. validmind/tests/data_validation/ADF.py +8 -10
  10. validmind/tests/data_validation/ANOVAOneWayTable.py +8 -10
  11. validmind/tests/data_validation/AutoAR.py +2 -4
  12. validmind/tests/data_validation/AutoMA.py +2 -4
  13. validmind/tests/data_validation/AutoSeasonality.py +8 -10
  14. validmind/tests/data_validation/AutoStationarity.py +8 -10
  15. validmind/tests/data_validation/BivariateFeaturesBarPlots.py +8 -10
  16. validmind/tests/data_validation/BivariateHistograms.py +8 -10
  17. validmind/tests/data_validation/BivariateScatterPlots.py +8 -10
  18. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +8 -10
  19. validmind/tests/data_validation/ClassImbalance.py +2 -4
  20. validmind/tests/data_validation/DFGLSArch.py +2 -4
  21. validmind/tests/data_validation/DatasetDescription.py +7 -9
  22. validmind/tests/data_validation/DatasetSplit.py +8 -9
  23. validmind/tests/data_validation/DescriptiveStatistics.py +2 -4
  24. validmind/tests/data_validation/Duplicates.py +2 -4
  25. validmind/tests/data_validation/EngleGrangerCoint.py +2 -4
  26. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +2 -4
  27. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +2 -4
  28. validmind/tests/data_validation/HighCardinality.py +2 -4
  29. validmind/tests/data_validation/HighPearsonCorrelation.py +2 -4
  30. validmind/tests/data_validation/IQROutliersBarPlot.py +2 -4
  31. validmind/tests/data_validation/IQROutliersTable.py +2 -4
  32. validmind/tests/data_validation/IsolationForestOutliers.py +2 -4
  33. validmind/tests/data_validation/KPSS.py +8 -10
  34. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +2 -4
  35. validmind/tests/data_validation/MissingValues.py +2 -4
  36. validmind/tests/data_validation/MissingValuesBarPlot.py +2 -4
  37. validmind/tests/data_validation/MissingValuesRisk.py +2 -4
  38. validmind/tests/data_validation/PearsonCorrelationMatrix.py +2 -4
  39. validmind/tests/data_validation/PhillipsPerronArch.py +7 -9
  40. validmind/tests/data_validation/RollingStatsPlot.py +2 -4
  41. validmind/tests/data_validation/ScatterPlot.py +2 -4
  42. validmind/tests/data_validation/SeasonalDecompose.py +2 -4
  43. validmind/tests/data_validation/Skewness.py +2 -4
  44. validmind/tests/data_validation/SpreadPlot.py +2 -4
  45. validmind/tests/data_validation/TabularCategoricalBarPlots.py +2 -4
  46. validmind/tests/data_validation/TabularDateTimeHistograms.py +2 -4
  47. validmind/tests/data_validation/TabularDescriptionTables.py +2 -4
  48. validmind/tests/data_validation/TabularNumericalHistograms.py +2 -4
  49. validmind/tests/data_validation/TargetRateBarPlots.py +2 -4
  50. validmind/tests/data_validation/TimeSeriesFrequency.py +2 -4
  51. validmind/tests/data_validation/TimeSeriesLinePlot.py +2 -4
  52. validmind/tests/data_validation/TimeSeriesMissingValues.py +2 -4
  53. validmind/tests/data_validation/TimeSeriesOutliers.py +2 -4
  54. validmind/tests/data_validation/TooManyZeroValues.py +2 -4
  55. validmind/tests/data_validation/UniqueRows.py +2 -4
  56. validmind/tests/data_validation/WOEBinPlots.py +2 -4
  57. validmind/tests/data_validation/WOEBinTable.py +2 -4
  58. validmind/tests/data_validation/ZivotAndrewsArch.py +2 -4
  59. validmind/tests/data_validation/nlp/CommonWords.py +2 -4
  60. validmind/tests/data_validation/nlp/Hashtags.py +2 -4
  61. validmind/tests/data_validation/nlp/Mentions.py +2 -4
  62. validmind/tests/data_validation/nlp/Punctuations.py +2 -4
  63. validmind/tests/data_validation/nlp/StopWords.py +2 -4
  64. validmind/tests/data_validation/nlp/TextDescription.py +2 -4
  65. validmind/tests/decorator.py +10 -8
  66. validmind/tests/load.py +264 -0
  67. validmind/tests/metadata.py +59 -0
  68. validmind/tests/model_validation/ClusterSizeDistribution.py +5 -7
  69. validmind/tests/model_validation/FeaturesAUC.py +6 -8
  70. validmind/tests/model_validation/ModelMetadata.py +8 -9
  71. validmind/tests/model_validation/RegressionResidualsPlot.py +2 -6
  72. validmind/tests/model_validation/embeddings/ClusterDistribution.py +2 -4
  73. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +2 -4
  74. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +2 -4
  75. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +2 -4
  76. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +2 -4
  77. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -7
  78. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -7
  79. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +7 -9
  80. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -7
  81. validmind/tests/model_validation/sklearn/ClusterPerformance.py +5 -7
  82. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +2 -7
  83. validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -7
  84. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +19 -10
  85. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -7
  86. validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -7
  87. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -7
  88. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +4 -7
  89. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +7 -9
  90. validmind/tests/model_validation/sklearn/MinimumF1Score.py +7 -9
  91. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +7 -9
  92. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +8 -10
  93. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +7 -9
  94. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +8 -10
  95. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +7 -9
  96. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +8 -10
  97. validmind/tests/model_validation/sklearn/ROCCurve.py +10 -11
  98. validmind/tests/model_validation/sklearn/RegressionErrors.py +5 -7
  99. validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +5 -7
  100. validmind/tests/model_validation/sklearn/RegressionR2Square.py +5 -7
  101. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +10 -14
  102. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +8 -10
  103. validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -7
  104. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +8 -10
  105. validmind/tests/model_validation/sklearn/VMeasure.py +5 -7
  106. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +8 -10
  107. validmind/tests/model_validation/statsmodels/AutoARIMA.py +2 -4
  108. validmind/tests/model_validation/statsmodels/BoxPierce.py +2 -4
  109. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +3 -4
  110. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +2 -4
  111. validmind/tests/model_validation/statsmodels/GINITable.py +2 -4
  112. validmind/tests/model_validation/statsmodels/JarqueBera.py +7 -9
  113. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +7 -9
  114. validmind/tests/model_validation/statsmodels/LJungBox.py +2 -4
  115. validmind/tests/model_validation/statsmodels/Lilliefors.py +7 -9
  116. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +2 -4
  117. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +2 -4
  118. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -9
  119. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +2 -4
  120. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -4
  121. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +2 -4
  122. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +2 -4
  123. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +2 -4
  124. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +6 -8
  125. validmind/tests/model_validation/statsmodels/RunsTest.py +2 -4
  126. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +3 -4
  127. validmind/tests/model_validation/statsmodels/ShapiroWilk.py +2 -4
  128. validmind/tests/prompt_validation/Bias.py +2 -4
  129. validmind/tests/prompt_validation/Clarity.py +2 -4
  130. validmind/tests/prompt_validation/Conciseness.py +2 -4
  131. validmind/tests/prompt_validation/Delimitation.py +2 -4
  132. validmind/tests/prompt_validation/NegativeInstruction.py +2 -4
  133. validmind/tests/prompt_validation/Robustness.py +2 -4
  134. validmind/tests/prompt_validation/Specificity.py +2 -4
  135. validmind/tests/run.py +394 -0
  136. validmind/tests/test_providers.py +12 -0
  137. validmind/tests/utils.py +16 -0
  138. validmind/unit_metrics/__init__.py +12 -4
  139. validmind/unit_metrics/composite.py +3 -0
  140. validmind/vm_models/test/metric.py +8 -5
  141. validmind/vm_models/test/result_wrapper.py +2 -1
  142. validmind/vm_models/test/test.py +14 -11
  143. validmind/vm_models/test/threshold_test.py +1 -0
  144. validmind/vm_models/test_suite/runner.py +1 -0
  145. {validmind-2.3.5.dist-info → validmind-2.4.1.dist-info}/METADATA +1 -1
  146. {validmind-2.3.5.dist-info → validmind-2.4.1.dist-info}/RECORD +149 -144
  147. {validmind-2.3.5.dist-info → validmind-2.4.1.dist-info}/LICENSE +0 -0
  148. {validmind-2.3.5.dist-info → validmind-2.4.1.dist-info}/WHEEL +0 -0
  149. {validmind-2.3.5.dist-info → validmind-2.4.1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,59 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import pandas as pd
6
+
7
+ from validmind.utils import format_dataframe
8
+
9
+ from .load import list_tests
10
+
11
+
12
+ def list_tags():
13
+ """
14
+ List unique tags from all test classes.
15
+ """
16
+
17
+ unique_tags = set()
18
+
19
+ for test in list_tests(__as_class=True):
20
+ unique_tags.update(test.tags)
21
+
22
+ return list(unique_tags)
23
+
24
+
25
+ def list_tasks_and_tags():
26
+ """
27
+ List all task types and their associated tags, with one row per task type and
28
+ all tags for a task type in one row.
29
+
30
+ Returns:
31
+ pandas.DataFrame: A DataFrame with 'Task Type' and concatenated 'Tags'.
32
+ """
33
+ task_tags_dict = {}
34
+
35
+ for test in list_tests(__as_class=True):
36
+ for task in test.tasks:
37
+ task_tags_dict.setdefault(task, set()).update(test.tags)
38
+
39
+ return format_dataframe(
40
+ pd.DataFrame(
41
+ [
42
+ {"Task": task, "Tags": ", ".join(tags)}
43
+ for task, tags in task_tags_dict.items()
44
+ ]
45
+ )
46
+ )
47
+
48
+
49
+ def list_tasks():
50
+ """
51
+ List unique tasks from all test classes.
52
+ """
53
+
54
+ unique_tasks = set()
55
+
56
+ for test in list_tests(__as_class=True):
57
+ unique_tasks.update(test.tasks)
58
+
59
+ return list(unique_tasks)
@@ -51,13 +51,11 @@ class ClusterSizeDistribution(Metric):
51
51
 
52
52
  name = "cluster_size_distribution"
53
53
  required_inputs = ["model", "dataset"]
54
- metadata = {
55
- "task_types": ["clustering"],
56
- "tags": [
57
- "sklearn",
58
- "model_performance",
59
- ],
60
- }
54
+ tasks = ["clustering"]
55
+ tags = [
56
+ "sklearn",
57
+ "model_performance",
58
+ ]
61
59
 
62
60
  def run(self):
63
61
  y_true_train = self.inputs.dataset.y
@@ -45,14 +45,12 @@ class FeaturesAUC(Metric):
45
45
  "fontsize": 12,
46
46
  "figure_height": 500,
47
47
  }
48
- metadata = {
49
- "task_types": ["classification"],
50
- "tags": [
51
- "feature_importance",
52
- "AUC",
53
- "visualization",
54
- ],
55
- }
48
+ tasks = ["classification"]
49
+ tags = [
50
+ "feature_importance",
51
+ "AUC",
52
+ "visualization",
53
+ ]
56
54
 
57
55
  def run(self):
58
56
  dataset = self.inputs.dataset
@@ -53,15 +53,14 @@ class ModelMetadata(Metric):
53
53
 
54
54
  name = "model_metadata"
55
55
  required_inputs = ["model"]
56
- metadata = {
57
- "task_types": [
58
- "classification",
59
- "regression",
60
- "text_classification",
61
- "text_summarization",
62
- ],
63
- "tags": ["model_metadata"],
64
- }
56
+ tasks = [
57
+ "classification",
58
+ "regression",
59
+ "text_classification",
60
+ "text_summarization",
61
+ ]
62
+
63
+ tags = ["model_metadata"]
65
64
 
66
65
  column_labels = {
67
66
  "architecture": "Modeling Technique",
@@ -52,12 +52,8 @@ class RegressionResidualsPlot(Metric):
52
52
 
53
53
  name = "regression_residuals_plot"
54
54
  required_inputs = ["model", "dataset"]
55
- metadata = {
56
- "task_types": ["regression"],
57
- "tags": [
58
- "model_performance",
59
- ],
60
- }
55
+ tasks = ["regression"]
56
+ tags = ["model_performance"]
61
57
  default_params = {"bin_size": 0.1}
62
58
 
63
59
  def run(self):
@@ -51,10 +51,8 @@ class ClusterDistribution(Metric):
51
51
  default_params = {
52
52
  "num_clusters": 5,
53
53
  }
54
- metadata = {
55
- "task_types": ["feature_extraction"],
56
- "tags": ["llm", "text_data", "text_embeddings", "visualization"],
57
- }
54
+ tasks = ["feature_extraction"]
55
+ tags = ["llm", "text_data", "text_embeddings", "visualization"]
58
56
 
59
57
  def run(self):
60
58
  # run kmeans clustering on embeddings
@@ -50,10 +50,8 @@ class CosineSimilarityDistribution(Metric):
50
50
 
51
51
  name = "Text Embeddings Cosine Similarity Distribution"
52
52
  required_inputs = ["model", "dataset"]
53
- metadata = {
54
- "task_types": ["feature_extraction"],
55
- "tags": ["llm", "text_data", "text_embeddings", "visualization"],
56
- }
53
+ tasks = ["feature_extraction"]
54
+ tags = ["llm", "text_data", "text_embeddings", "visualization"]
57
55
 
58
56
  def run(self):
59
57
  # Compute cosine similarity
@@ -53,10 +53,8 @@ class DescriptiveAnalytics(Metric):
53
53
 
54
54
  name = "Descriptive Analytics for Text Embeddings Models"
55
55
  required_inputs = ["model", "dataset"]
56
- metadata = {
57
- "task_types": ["feature_extraction"],
58
- "tags": ["llm", "text_data", "text_embeddings", "visualization"],
59
- }
56
+ tasks = ["feature_extraction"]
57
+ tags = ["llm", "text_data", "text_embeddings", "visualization"]
60
58
 
61
59
  def run(self):
62
60
  # Assuming y_pred returns a 2D array of embeddings [samples, features]
@@ -53,10 +53,8 @@ class EmbeddingsVisualization2D(Metric):
53
53
  "cluster_column": None,
54
54
  "perplexity": 30,
55
55
  }
56
- metadata = {
57
- "task_types": ["feature_extraction"],
58
- "tags": ["llm", "text_data", "text_embeddings", "visualization"],
59
- }
56
+ tasks = ["feature_extraction"]
57
+ tags = ["llm", "text_data", "text_embeddings", "visualization"]
60
58
 
61
59
  def run(self):
62
60
  cluster_column = self.params.get("cluster_column")
@@ -29,10 +29,8 @@ class StabilityAnalysis(ThresholdTest):
29
29
  default_params = {
30
30
  "mean_similarity_threshold": 0.7,
31
31
  }
32
- metadata = {
33
- "task_types": ["feature_extraction"],
34
- "tags": ["llm", "text_data", "text_embeddings", "visualization"],
35
- }
32
+ tasks = ["feature_extraction"]
33
+ tags = ["llm", "text_data", "text_embeddings", "visualization"]
36
34
 
37
35
  @abstractmethod
38
36
  def perturb_data(self, data: str) -> str:
@@ -48,13 +48,11 @@ class AdjustedMutualInformation(ClusterPerformance):
48
48
 
49
49
  name = "adjusted_mutual_information"
50
50
  required_inputs = ["model", "datasets"]
51
- metadata = {
52
- "task_types": ["clustering"],
53
- "tags": [
54
- "sklearn",
55
- "model_performance",
56
- ],
57
- }
51
+ tasks = ["clustering"]
52
+ tags = [
53
+ "sklearn",
54
+ "model_performance",
55
+ ]
58
56
 
59
57
  def metric_info(self):
60
58
  return {"Adjusted Mutual Information": metrics.adjusted_mutual_info_score}
@@ -47,13 +47,11 @@ class AdjustedRandIndex(ClusterPerformance):
47
47
 
48
48
  name = "adjusted_rand_index"
49
49
  required_inputs = ["model", "datasets"]
50
- metadata = {
51
- "task_types": ["clustering"],
52
- "tags": [
53
- "sklearn",
54
- "model_performance",
55
- ],
56
- }
50
+ tasks = ["clustering"]
51
+ tags = [
52
+ "sklearn",
53
+ "model_performance",
54
+ ]
57
55
 
58
56
  def metric_info(self):
59
57
  return {"Adjusted Rand Index": metrics.adjusted_rand_score}
@@ -58,15 +58,13 @@ class ClassifierPerformance(Metric):
58
58
 
59
59
  name = "classifier_performance"
60
60
  required_inputs = ["model", "dataset"]
61
- metadata = {
62
- "task_types": ["classification", "text_classification"],
63
- "tags": [
64
- "sklearn",
65
- "binary_classification",
66
- "multiclass_classification",
67
- "model_performance",
68
- ],
69
- }
61
+ tasks = ["classification", "text_classification"]
62
+ tags = [
63
+ "sklearn",
64
+ "binary_classification",
65
+ "multiclass_classification",
66
+ "model_performance",
67
+ ]
70
68
 
71
69
  def summary(self, metric_value: dict):
72
70
  """
@@ -57,13 +57,11 @@ class ClusterCosineSimilarity(Metric):
57
57
 
58
58
  name = "cluster_cosine_similarity"
59
59
  required_inputs = ["model", "dataset"]
60
- metadata = {
61
- "task_types": ["clustering"],
62
- "tags": [
63
- "sklearn",
64
- "model_performance",
65
- ],
66
- }
60
+ tasks = ["clustering"]
61
+ tags = [
62
+ "sklearn",
63
+ "model_performance",
64
+ ]
67
65
 
68
66
  def run(self):
69
67
  y_true_train = self.inputs.dataset.y
@@ -51,13 +51,11 @@ class ClusterPerformance(Metric):
51
51
 
52
52
  name = "cluster_performance_metrics"
53
53
  required_inputs = ["model", "datasets"]
54
- metadata = {
55
- "task_types": ["clustering"],
56
- "tags": [
57
- "sklearn",
58
- "model_performance",
59
- ],
60
- }
54
+ tasks = ["clustering"]
55
+ tags = [
56
+ "sklearn",
57
+ "model_performance",
58
+ ]
61
59
 
62
60
  def cluser_performance_metrics(
63
61
  self, y_true_train, y_pred_train, y_true_test, y_pred_test, samples, metric_info
@@ -61,13 +61,8 @@ class ClusterPerformanceMetrics(ClusterPerformance):
61
61
 
62
62
  name = "homogeneity_score"
63
63
  required_inputs = ["model", "datasets"]
64
- metadata = {
65
- "task_types": ["clustering"],
66
- "tags": [
67
- "sklearn",
68
- "model_performance",
69
- ],
70
- }
64
+ tasks = ["clustering"]
65
+ tags = ["sklearn", "model_performance"]
71
66
  default_metrics = {
72
67
  "Homogeneity Score": metrics.homogeneity_score,
73
68
  "Completeness Score": metrics.completeness_score,
@@ -44,13 +44,11 @@ class CompletenessScore(ClusterPerformance):
44
44
 
45
45
  name = "homogeneity_score"
46
46
  required_inputs = ["model", "datasets"]
47
- metadata = {
48
- "task_types": ["clustering"],
49
- "tags": [
50
- "sklearn",
51
- "model_performance",
52
- ],
53
- }
47
+ tasks = ["clustering"]
48
+ tags = [
49
+ "sklearn",
50
+ "model_performance",
51
+ ]
54
52
 
55
53
  def metric_info(self):
56
54
  return {"Completeness Score": metrics.completeness_score}
@@ -55,16 +55,14 @@ class ConfusionMatrix(Metric):
55
55
 
56
56
  name = "confusion_matrix"
57
57
  required_inputs = ["model", "dataset"]
58
- metadata = {
59
- "task_types": ["classification", "text_classification"],
60
- "tags": [
61
- "sklearn",
62
- "binary_classification",
63
- "multiclass_classification",
64
- "model_performance",
65
- "visualization",
66
- ],
67
- }
58
+ tasks = ["classification", "text_classification"]
59
+ tags = [
60
+ "sklearn",
61
+ "binary_classification",
62
+ "multiclass_classification",
63
+ "model_performance",
64
+ "visualization",
65
+ ]
68
66
 
69
67
  def run(self):
70
68
  y_true = self.inputs.dataset.y
@@ -113,6 +111,17 @@ class ConfusionMatrix(Metric):
113
111
  height=600,
114
112
  )
115
113
 
114
+ # Add an annotation at the bottom of the heatmap
115
+ fig.add_annotation(
116
+ x=0.5,
117
+ y=-0.1,
118
+ xref="paper",
119
+ yref="paper",
120
+ text=f"Confusion Matrix for {self.inputs.model.input_id} on {self.inputs.dataset.input_id}",
121
+ showarrow=False,
122
+ font=dict(size=14),
123
+ )
124
+
116
125
  return self.cache_results(
117
126
  metric_value={
118
127
  "confusion_matrix": cm,
@@ -55,13 +55,11 @@ class FowlkesMallowsScore(ClusterPerformance):
55
55
 
56
56
  name = "fowlkes_mallows_score"
57
57
  required_inputs = ["model", "datasets"]
58
- metadata = {
59
- "task_types": ["clustering"],
60
- "tags": [
61
- "sklearn",
62
- "model_performance",
63
- ],
64
- }
58
+ tasks = ["clustering"]
59
+ tags = [
60
+ "sklearn",
61
+ "model_performance",
62
+ ]
65
63
 
66
64
  def metric_info(self):
67
65
  return {"Fowlkes-Mallows score": metrics.fowlkes_mallows_score}
@@ -46,13 +46,11 @@ class HomogeneityScore(ClusterPerformance):
46
46
 
47
47
  name = "homogeneity_score"
48
48
  required_inputs = ["model", "datasets"]
49
- metadata = {
50
- "task_types": ["clustering"],
51
- "tags": [
52
- "sklearn",
53
- "model_performance",
54
- ],
55
- }
49
+ tasks = ["clustering"]
50
+ tags = [
51
+ "sklearn",
52
+ "model_performance",
53
+ ]
56
54
 
57
55
  def metric_info(self):
58
56
  return {"Homogeneity Score": metrics.homogeneity_score}
@@ -52,13 +52,8 @@ class HyperParametersTuning(Metric):
52
52
 
53
53
  name = "hyper_parameters_tuning"
54
54
  required_inputs = ["model", "dataset"]
55
- metadata = {
56
- "task_types": ["classification", "clustering"],
57
- "tags": [
58
- "sklearn",
59
- "model_performance",
60
- ],
61
- }
55
+ tasks = ["classification", "clustering"]
56
+ tags = ["sklearn", "model_performance"]
62
57
  default_params = {"param_grid": None, "scoring": None}
63
58
 
64
59
  def run(self):
@@ -60,13 +60,10 @@ class KMeansClustersOptimization(Metric):
60
60
 
61
61
  name = "clusters_optimize_elbow_method"
62
62
  required_inputs = ["model", "dataset"]
63
- metadata = {
64
- "task_types": ["clustering"],
65
- "tags": ["sklearn", "model_performance", "kmeans"],
66
- }
67
- default_params = {
68
- "n_clusters": None,
69
- }
63
+ tasks = ["clustering"]
64
+ tags = ["sklearn", "model_performance", "kmeans"]
65
+
66
+ default_params = {"n_clusters": None}
70
67
 
71
68
  def run(self):
72
69
  n_clusters = self.params["n_clusters"]
@@ -59,15 +59,13 @@ class MinimumAccuracy(ThresholdTest):
59
59
  name = "accuracy_score"
60
60
  required_inputs = ["model", "dataset"]
61
61
  default_params = {"min_threshold": 0.7}
62
- metadata = {
63
- "task_types": ["classification", "text_classification"],
64
- "tags": [
65
- "sklearn",
66
- "binary_classification",
67
- "multiclass_classification",
68
- "model_performance",
69
- ],
70
- }
62
+ tasks = ["classification", "text_classification"]
63
+ tags = [
64
+ "sklearn",
65
+ "binary_classification",
66
+ "multiclass_classification",
67
+ "model_performance",
68
+ ]
71
69
 
72
70
  def summary(self, results: List[ThresholdTestResult], all_passed: bool):
73
71
  """
@@ -62,15 +62,13 @@ class MinimumF1Score(ThresholdTest):
62
62
  name = "f1_score"
63
63
  required_inputs = ["model", "dataset"]
64
64
  default_params = {"min_threshold": 0.5}
65
- metadata = {
66
- "task_types": ["classification", "text_classification"],
67
- "tags": [
68
- "sklearn",
69
- "binary_classification",
70
- "multiclass_classification",
71
- "model_performance",
72
- ],
73
- }
65
+ tasks = ["classification", "text_classification"]
66
+ tags = [
67
+ "sklearn",
68
+ "binary_classification",
69
+ "multiclass_classification",
70
+ "model_performance",
71
+ ]
74
72
 
75
73
  def summary(self, results: List[ThresholdTestResult], all_passed: bool):
76
74
  """
@@ -59,15 +59,13 @@ class MinimumROCAUCScore(ThresholdTest):
59
59
  name = "roc_auc_score"
60
60
  required_inputs = ["model", "dataset"]
61
61
  default_params = {"min_threshold": 0.5}
62
- metadata = {
63
- "task_types": ["classification", "text_classification"],
64
- "tags": [
65
- "sklearn",
66
- "binary_classification",
67
- "multiclass_classification",
68
- "model_performance",
69
- ],
70
- }
62
+ tasks = ["classification", "text_classification"]
63
+ tags = [
64
+ "sklearn",
65
+ "binary_classification",
66
+ "multiclass_classification",
67
+ "model_performance",
68
+ ]
71
69
 
72
70
  def summary(self, results: List[ThresholdTestResult], all_passed: bool):
73
71
  """
@@ -53,16 +53,14 @@ class ModelsPerformanceComparison(ClassifierPerformance):
53
53
 
54
54
  name = "models_performance_comparison"
55
55
  required_inputs = ["dataset", "models"]
56
- metadata = {
57
- "task_types": ["classification", "text_classification"],
58
- "tags": [
59
- "sklearn",
60
- "binary_classification",
61
- "multiclass_classification",
62
- "model_performance",
63
- "model_comparison",
64
- ],
65
- }
56
+ tasks = ["classification", "text_classification"]
57
+ tags = [
58
+ "sklearn",
59
+ "binary_classification",
60
+ "multiclass_classification",
61
+ "model_performance",
62
+ "model_comparison",
63
+ ]
66
64
 
67
65
  def summary(self, metric_value: dict):
68
66
  """
@@ -67,15 +67,13 @@ class OverfitDiagnosis(ThresholdTest):
67
67
  name = "overfit_regions"
68
68
  required_inputs = ["model", "datasets"]
69
69
  default_params = {"features_columns": None, "cut_off_percentage": 4}
70
- metadata = {
71
- "task_types": ["classification", "text_classification"],
72
- "tags": [
73
- "sklearn",
74
- "binary_classification",
75
- "multiclass_classification",
76
- "model_diagnosis",
77
- ],
78
- }
70
+ tasks = ["classification", "text_classification"]
71
+ tags = [
72
+ "sklearn",
73
+ "binary_classification",
74
+ "multiclass_classification",
75
+ "model_diagnosis",
76
+ ]
79
77
 
80
78
  default_metrics = {
81
79
  "accuracy": metrics.accuracy_score,
@@ -56,16 +56,14 @@ class PermutationFeatureImportance(Metric):
56
56
  "fontsize": None,
57
57
  "figure_height": 1000,
58
58
  }
59
- metadata = {
60
- "task_types": ["classification", "text_classification"],
61
- "tags": [
62
- "sklearn",
63
- "binary_classification",
64
- "multiclass_classification",
65
- "feature_importance",
66
- "visualization",
67
- ],
68
- }
59
+ tasks = ["classification", "text_classification"]
60
+ tags = [
61
+ "sklearn",
62
+ "binary_classification",
63
+ "multiclass_classification",
64
+ "feature_importance",
65
+ "visualization",
66
+ ]
69
67
 
70
68
  def run(self):
71
69
  x = self.inputs.dataset.x_df()
@@ -73,15 +73,13 @@ class PopulationStabilityIndex(Metric):
73
73
 
74
74
  name = "psi"
75
75
  required_inputs = ["model", "datasets"]
76
- metadata = {
77
- "task_types": ["classification", "text_classification"],
78
- "tags": [
79
- "sklearn",
80
- "binary_classification",
81
- "multiclass_classification",
82
- "model_performance",
83
- ],
84
- }
76
+ tasks = ["classification", "text_classification"]
77
+ tags = [
78
+ "sklearn",
79
+ "binary_classification",
80
+ "multiclass_classification",
81
+ "model_performance",
82
+ ]
85
83
  default_params = {
86
84
  "num_bins": 10,
87
85
  "mode": "fixed",