validmind 2.4.0__py3-none-any.whl → 2.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/api_client.py +17 -15
  3. validmind/datasets/nlp/cnn_dailymail.py +10 -1
  4. validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv +33 -209
  5. validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv +156 -1051
  6. validmind/models/huggingface.py +0 -1
  7. validmind/template.py +2 -0
  8. validmind/tests/data_validation/TabularDescriptionTables.py +96 -148
  9. validmind/tests/model_validation/embeddings/ClusterDistribution.py +1 -1
  10. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +1 -1
  11. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +1 -1
  12. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +1 -1
  13. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +3 -2
  14. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +9 -2
  15. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +0 -1
  16. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -1
  17. validmind/tests/run.py +1 -1
  18. validmind/vm_models/dataset/dataset.py +18 -6
  19. validmind/vm_models/test_suite/summary.py +2 -2
  20. {validmind-2.4.0.dist-info → validmind-2.4.4.dist-info}/METADATA +3 -2
  21. {validmind-2.4.0.dist-info → validmind-2.4.4.dist-info}/RECORD +24 -24
  22. {validmind-2.4.0.dist-info → validmind-2.4.4.dist-info}/LICENSE +0 -0
  23. {validmind-2.4.0.dist-info → validmind-2.4.4.dist-info}/WHEEL +0 -0
  24. {validmind-2.4.0.dist-info → validmind-2.4.4.dist-info}/entry_points.txt +0 -0
@@ -56,7 +56,6 @@ class HFModel(VMModel):
56
56
  return [result["label"] for result in results]
57
57
  elif tasks[-1] == "feature_extraction":
58
58
  # Extract [CLS] token embedding for each input and return as list of lists
59
- print(f"len(results): {len(results)}")
60
59
  return [embedding[0][0] for embedding in results]
61
60
  else:
62
61
  return results
validmind/template.py CHANGED
@@ -22,6 +22,8 @@ CONTENT_TYPE_MAP = {
22
22
  "dynamic": "Dynamic Content",
23
23
  "text": "Text",
24
24
  "risk_assessment": "Risk Assessment",
25
+ "assessment_summary": "Assessment Summary",
26
+ "guideline": "Guideline Assessment",
25
27
  }
26
28
 
27
29
 
@@ -2,15 +2,14 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
6
-
7
5
  import pandas as pd
8
6
 
9
- from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
7
+ from validmind import tags, tasks
10
8
 
11
9
 
12
- @dataclass
13
- class TabularDescriptionTables(Metric):
10
+ @tags("tabular_data")
11
+ @tasks("classification", "regression")
12
+ def TabularDescriptionTables(dataset):
14
13
  """
15
14
  Summarizes key descriptive statistics for numerical, categorical, and datetime variables in a dataset.
16
15
 
@@ -54,155 +53,104 @@ class TabularDescriptionTables(Metric):
54
53
  chosen algorithm.
55
54
  """
56
55
 
57
- name = "tabular_description_tables"
58
- required_inputs = ["dataset"]
59
-
60
- tasks = ["classification", "regression"]
61
- tags = ["tabular_data"]
62
-
63
- def get_summary_statistics_numerical(self, numerical_fields):
64
- summary_stats = self.inputs.dataset.df[numerical_fields].describe().T
65
- summary_stats["Missing Values (%)"] = (
66
- self.inputs.dataset.df[numerical_fields].isnull().mean() * 100
67
- )
68
- summary_stats["Data Type"] = self.inputs.dataset.df[
69
- numerical_fields
70
- ].dtypes.astype(str)
71
- summary_stats = summary_stats[
72
- ["count", "mean", "min", "max", "Missing Values (%)", "Data Type"]
73
- ]
74
- summary_stats.columns = [
75
- "Num of Obs",
76
- "Mean",
77
- "Min",
78
- "Max",
79
- "Missing Values (%)",
80
- "Data Type",
81
- ]
82
- summary_stats["Num of Obs"] = summary_stats["Num of Obs"].astype(int)
83
- summary_stats = summary_stats.sort_values(
84
- by="Missing Values (%)", ascending=False
85
- )
86
- summary_stats.reset_index(inplace=True)
87
- summary_stats.rename(columns={"index": "Numerical Variable"}, inplace=True)
88
- return summary_stats
89
-
90
- def get_summary_statistics_categorical(self, categorical_fields):
91
- summary_stats = pd.DataFrame()
92
- if categorical_fields: # check if the list is not empty
93
- for column in self.inputs.dataset.df[categorical_fields].columns:
94
- summary_stats.loc[column, "Num of Obs"] = int(
95
- self.inputs.dataset.df[column].count()
96
- )
97
- summary_stats.loc[
98
- column, "Num of Unique Values"
99
- ] = self.inputs.dataset.df[column].nunique()
100
- summary_stats.loc[column, "Unique Values"] = str(
101
- self.inputs.dataset.df[column].unique()
102
- )
103
- summary_stats.loc[column, "Missing Values (%)"] = (
104
- self.inputs.dataset.df[column].isnull().mean() * 100
105
- )
106
- summary_stats.loc[column, "Data Type"] = str(
107
- self.inputs.dataset.df[column].dtype
108
- )
109
-
110
- summary_stats = summary_stats.sort_values(
111
- by="Missing Values (%)", ascending=False
112
- )
113
- summary_stats.reset_index(inplace=True)
114
- summary_stats.rename(
115
- columns={"index": "Categorical Variable"}, inplace=True
116
- )
117
- return summary_stats
118
-
119
- def get_summary_statistics_datetime(self, datetime_fields):
120
- summary_stats = pd.DataFrame()
121
- for column in self.inputs.dataset.df[datetime_fields].columns:
122
- summary_stats.loc[column, "Num of Obs"] = int(
123
- self.inputs.dataset.df[column].count()
124
- )
125
- summary_stats.loc[column, "Num of Unique Values"] = self.inputs.dataset.df[
56
+ numerical_fields = get_numerical_columns(dataset)
57
+ categorical_fields = get_categorical_columns(dataset)
58
+ datetime_fields = get_datetime_columns(dataset)
59
+
60
+ summary_stats_numerical = get_summary_statistics_numerical(
61
+ dataset, numerical_fields
62
+ )
63
+ summary_stats_categorical = get_summary_statistics_categorical(
64
+ dataset, categorical_fields
65
+ )
66
+ summary_stats_datetime = get_summary_statistics_datetime(dataset, datetime_fields)
67
+
68
+ return (summary_stats_numerical, summary_stats_categorical, summary_stats_datetime)
69
+
70
+
71
+ def get_summary_statistics_numerical(dataset, numerical_fields):
72
+ summary_stats = dataset.df[numerical_fields].describe().T
73
+ summary_stats["Missing Values (%)"] = (
74
+ dataset.df[numerical_fields].isnull().mean() * 100
75
+ )
76
+ summary_stats["Data Type"] = dataset.df[numerical_fields].dtypes.astype(str)
77
+ summary_stats = summary_stats[
78
+ ["count", "mean", "min", "max", "Missing Values (%)", "Data Type"]
79
+ ]
80
+ summary_stats.columns = [
81
+ "Num of Obs",
82
+ "Mean",
83
+ "Min",
84
+ "Max",
85
+ "Missing Values (%)",
86
+ "Data Type",
87
+ ]
88
+ summary_stats["Num of Obs"] = summary_stats["Num of Obs"].astype(int)
89
+ summary_stats = summary_stats.sort_values(by="Missing Values (%)", ascending=False)
90
+ summary_stats.reset_index(inplace=True)
91
+ summary_stats.rename(columns={"index": "Numerical Variable"}, inplace=True)
92
+ return summary_stats
93
+
94
+
95
+ def get_summary_statistics_categorical(dataset, categorical_fields):
96
+ summary_stats = pd.DataFrame()
97
+ if categorical_fields: # check if the list is not empty
98
+ for column in dataset.df[categorical_fields].columns:
99
+ summary_stats.loc[column, "Num of Obs"] = int(dataset.df[column].count())
100
+ summary_stats.loc[column, "Num of Unique Values"] = dataset.df[
126
101
  column
127
102
  ].nunique()
128
- summary_stats.loc[column, "Earliest Date"] = self.inputs.dataset.df[
129
- column
130
- ].min()
131
- summary_stats.loc[column, "Latest Date"] = self.inputs.dataset.df[
132
- column
133
- ].max()
134
- summary_stats.loc[column, "Missing Values (%)"] = (
135
- self.inputs.dataset.df[column].isnull().mean() * 100
103
+ summary_stats.loc[column, "Unique Values"] = str(
104
+ dataset.df[column].unique()
136
105
  )
137
- summary_stats.loc[column, "Data Type"] = str(
138
- self.inputs.dataset.df[column].dtype
106
+ summary_stats.loc[column, "Missing Values (%)"] = (
107
+ dataset.df[column].isnull().mean() * 100
139
108
  )
109
+ summary_stats.loc[column, "Data Type"] = str(dataset.df[column].dtype)
140
110
 
141
- if not summary_stats.empty:
142
- summary_stats = summary_stats.sort_values(
143
- by="Missing Values (%)", ascending=False
144
- )
111
+ summary_stats = summary_stats.sort_values(
112
+ by="Missing Values (%)", ascending=False
113
+ )
145
114
  summary_stats.reset_index(inplace=True)
146
- summary_stats.rename(columns={"index": "Datetime Variable"}, inplace=True)
147
- return summary_stats
148
-
149
- def summary(self, metric_value):
150
- summary_stats_numerical = metric_value["numerical"]
151
- summary_stats_categorical = metric_value["categorical"]
152
- summary_stats_datetime = metric_value["datetime"]
153
-
154
- return ResultSummary(
155
- results=[
156
- ResultTable(
157
- data=summary_stats_numerical,
158
- metadata=ResultTableMetadata(title="Numerical Variables"),
159
- ),
160
- ResultTable(
161
- data=summary_stats_categorical,
162
- metadata=ResultTableMetadata(title="Categorical Variables"),
163
- ),
164
- ResultTable(
165
- data=summary_stats_datetime,
166
- metadata=ResultTableMetadata(title="Datetime Variables"),
167
- ),
168
- ]
115
+ summary_stats.rename(columns={"index": "Categorical Variable"}, inplace=True)
116
+ return summary_stats
117
+
118
+
119
+ def get_summary_statistics_datetime(dataset, datetime_fields):
120
+ summary_stats = pd.DataFrame()
121
+ for column in dataset.df[datetime_fields].columns:
122
+ summary_stats.loc[column, "Num of Obs"] = int(dataset.df[column].count())
123
+ summary_stats.loc[column, "Num of Unique Values"] = dataset.df[column].nunique()
124
+ summary_stats.loc[column, "Earliest Date"] = dataset.df[column].min()
125
+ summary_stats.loc[column, "Latest Date"] = dataset.df[column].max()
126
+ summary_stats.loc[column, "Missing Values (%)"] = (
127
+ dataset.df[column].isnull().mean() * 100
169
128
  )
129
+ summary_stats.loc[column, "Data Type"] = str(dataset.df[column].dtype)
170
130
 
171
- def get_categorical_columns(self):
172
- categorical_columns = self.inputs.dataset.df.select_dtypes(
173
- include=["object", "category"]
174
- ).columns.tolist()
175
- return categorical_columns
176
-
177
- def get_numerical_columns(self):
178
- numerical_columns = self.inputs.dataset.df.select_dtypes(
179
- include=["int", "float", "uint8"]
180
- ).columns.tolist()
181
- return numerical_columns
182
-
183
- def get_datetime_columns(self):
184
- datetime_columns = self.inputs.dataset.df.select_dtypes(
185
- include=["datetime"]
186
- ).columns.tolist()
187
- return datetime_columns
188
-
189
- def run(self):
190
- numerical_fields = self.get_numerical_columns()
191
- categorical_fields = self.get_categorical_columns()
192
- datetime_fields = self.get_datetime_columns()
193
-
194
- summary_stats_numerical = self.get_summary_statistics_numerical(
195
- numerical_fields
196
- )
197
- summary_stats_categorical = self.get_summary_statistics_categorical(
198
- categorical_fields
199
- )
200
- summary_stats_datetime = self.get_summary_statistics_datetime(datetime_fields)
201
-
202
- return self.cache_results(
203
- {
204
- "numerical": summary_stats_numerical.to_dict(orient="records"),
205
- "categorical": summary_stats_categorical.to_dict(orient="records"),
206
- "datetime": summary_stats_datetime.to_dict(orient="records"),
207
- }
131
+ if not summary_stats.empty:
132
+ summary_stats = summary_stats.sort_values(
133
+ by="Missing Values (%)", ascending=False
208
134
  )
135
+ summary_stats.reset_index(inplace=True)
136
+ summary_stats.rename(columns={"index": "Datetime Variable"}, inplace=True)
137
+ return summary_stats
138
+
139
+
140
+ def get_categorical_columns(dataset):
141
+ categorical_columns = dataset.df.select_dtypes(
142
+ include=["object", "category"]
143
+ ).columns.tolist()
144
+ return categorical_columns
145
+
146
+
147
+ def get_numerical_columns(dataset):
148
+ numerical_columns = dataset.df.select_dtypes(
149
+ include=["int", "float", "uint8"]
150
+ ).columns.tolist()
151
+ return numerical_columns
152
+
153
+
154
+ def get_datetime_columns(dataset):
155
+ datetime_columns = dataset.df.select_dtypes(include=["datetime"]).columns.tolist()
156
+ return datetime_columns
@@ -52,7 +52,7 @@ class ClusterDistribution(Metric):
52
52
  "num_clusters": 5,
53
53
  }
54
54
  tasks = ["feature_extraction"]
55
- tags = ["llm", "text_data", "text_embeddings", "visualization"]
55
+ tags = ["llm", "text_data", "embeddings", "visualization"]
56
56
 
57
57
  def run(self):
58
58
  # run kmeans clustering on embeddings
@@ -51,7 +51,7 @@ class CosineSimilarityDistribution(Metric):
51
51
  name = "Text Embeddings Cosine Similarity Distribution"
52
52
  required_inputs = ["model", "dataset"]
53
53
  tasks = ["feature_extraction"]
54
- tags = ["llm", "text_data", "text_embeddings", "visualization"]
54
+ tags = ["llm", "text_data", "embeddings", "visualization"]
55
55
 
56
56
  def run(self):
57
57
  # Compute cosine similarity
@@ -54,7 +54,7 @@ class DescriptiveAnalytics(Metric):
54
54
  name = "Descriptive Analytics for Text Embeddings Models"
55
55
  required_inputs = ["model", "dataset"]
56
56
  tasks = ["feature_extraction"]
57
- tags = ["llm", "text_data", "text_embeddings", "visualization"]
57
+ tags = ["llm", "text_data", "embeddings", "visualization"]
58
58
 
59
59
  def run(self):
60
60
  # Assuming y_pred returns a 2D array of embeddings [samples, features]
@@ -54,7 +54,7 @@ class EmbeddingsVisualization2D(Metric):
54
54
  "perplexity": 30,
55
55
  }
56
56
  tasks = ["feature_extraction"]
57
- tags = ["llm", "text_data", "text_embeddings", "visualization"]
57
+ tags = ["llm", "text_data", "embeddings", "visualization"]
58
58
 
59
59
  def run(self):
60
60
  cluster_column = self.params.get("cluster_column")
@@ -30,7 +30,7 @@ class StabilityAnalysis(ThresholdTest):
30
30
  "mean_similarity_threshold": 0.7,
31
31
  }
32
32
  tasks = ["feature_extraction"]
33
- tags = ["llm", "text_data", "text_embeddings", "visualization"]
33
+ tags = ["llm", "text_data", "embeddings", "visualization"]
34
34
 
35
35
  @abstractmethod
36
36
  def perturb_data(self, data: str) -> str:
@@ -62,7 +62,8 @@ class StabilityAnalysis(ThresholdTest):
62
62
 
63
63
  def run(self):
64
64
  # Perturb the test dataset
65
- original = self.inputs.dataset.df
65
+ text_column = self.inputs.dataset.text_column
66
+ original = self.inputs.dataset.df[[text_column]]
66
67
  perturbed = original.copy()
67
68
  perturbed.update(
68
69
  perturbed.select_dtypes(include="object").applymap(self.perturb_data)
@@ -4,8 +4,12 @@
4
4
 
5
5
  from transformers import MarianMTModel, MarianTokenizer
6
6
 
7
+ from validmind.logging import get_logger
8
+
7
9
  from .StabilityAnalysis import StabilityAnalysis
8
10
 
11
+ logger = get_logger(__name__)
12
+
9
13
 
10
14
  class StabilityAnalysisTranslation(StabilityAnalysis):
11
15
  """
@@ -61,8 +65,11 @@ class StabilityAnalysisTranslation(StabilityAnalysis):
61
65
  }
62
66
 
63
67
  def perturb_data(self, data: str):
64
- if not isinstance(data, str):
65
- return data
68
+ if len(data) > 512:
69
+ logger.info(
70
+ "Data length exceeds 512 tokens. Truncating data to 512 tokens."
71
+ )
72
+ data = data[:512]
66
73
 
67
74
  source_lang = self.params["source_lang"]
68
75
  target_lang = self.params["target_lang"]
@@ -53,7 +53,6 @@ def TSNEComponentsPairwisePlots(
53
53
  - t-SNE visualizations can be misleading if interpreted without considering the stochastic nature of the algorithm;
54
54
  two runs with the same parameters might yield different visual outputs, necessitating multiple runs for a consistent interpretation.
55
55
  """
56
-
57
56
  # Get embeddings from the dataset using the model
58
57
  embeddings = np.stack(dataset.y_pred(model))
59
58
 
@@ -60,8 +60,9 @@ class HyperParametersTuning(Metric):
60
60
  param_grid = self.params["param_grid"]
61
61
  if param_grid is None:
62
62
  raise SkipTestError(
63
- "param_grid in dictnary format must be provide to run hyper parameter tuning"
63
+ "param_grid in dictonary format must be provided to run this test"
64
64
  )
65
+
65
66
  model = self.inputs.model.model
66
67
  estimators = GridSearchCV(
67
68
  model, param_grid=param_grid, scoring=self.params["scoring"]
validmind/tests/run.py CHANGED
@@ -118,7 +118,7 @@ def _combine_figures(figure_lists: List[List[Any]], input_groups: List[Dict[str,
118
118
 
119
119
  title_template = "{current_title}({input_description})"
120
120
 
121
- for i, figures in enumerate(list(zip(*figure_lists))):
121
+ for figures in list(zip(*figure_lists)):
122
122
  if is_plotly_figure(figures[0].figure):
123
123
  _update_plotly_titles(figures, input_groups, title_template)
124
124
  elif is_matplotlib_figure(figures[0].figure):
@@ -139,13 +139,25 @@ class VMDataset:
139
139
  )
140
140
 
141
141
  def _add_column(self, column_name, column_values):
142
- if len(column_values) != len(self.df):
143
- raise ValueError(
144
- "Length of values doesn't match number of rows in the DataFrame."
145
- )
142
+ column_values = np.array(column_values)
143
+
144
+ if column_values.ndim == 1:
145
+ if len(column_values) != len(self.df):
146
+ raise ValueError(
147
+ "Length of values doesn't match number of rows in the DataFrame."
148
+ )
149
+ self.columns.append(column_name)
150
+ self.df[column_name] = column_values
151
+ elif column_values.ndim == 2:
152
+ if column_values.shape[0] != len(self.df):
153
+ raise ValueError(
154
+ "Number of rows in values doesn't match number of rows in the DataFrame."
155
+ )
156
+ self.columns.append(column_name)
157
+ self.df[column_name] = column_values.tolist()
146
158
 
147
- self.columns.append(column_name)
148
- self.df[column_name] = column_values
159
+ else:
160
+ raise ValueError("Only 1D and 2D arrays are supported for column_values.")
149
161
 
150
162
  def _validate_assign_predictions(
151
163
  self,
@@ -93,10 +93,10 @@ class TestSuiteSummary:
93
93
 
94
94
  def _add_results_link(self):
95
95
  # avoid circular import
96
- from ...api_client import get_api_host, get_api_project
96
+ from ...api_client import get_api_host, get_api_model
97
97
 
98
98
  ui_host = get_api_host().replace("/api/v1/tracking", "").replace("api", "app")
99
- link = f"{ui_host}/projects/{get_api_project()}/project-overview"
99
+ link = f"{ui_host}/projects/{get_api_model()}/project-overview"
100
100
  results_link = f"""
101
101
  <h3>
102
102
  Check out the updated documentation in your
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: validmind
3
- Version: 2.4.0
3
+ Version: 2.4.4
4
4
  Summary: ValidMind Developer Framework
5
5
  License: Commercial License
6
6
  Author: Andres Rodriguez
@@ -46,6 +46,7 @@ Requires-Dist: scikit-learn
46
46
  Requires-Dist: scipy
47
47
  Requires-Dist: scorecardpy (>=0.1.9.6,<0.2.0.0)
48
48
  Requires-Dist: seaborn
49
+ Requires-Dist: sentencepiece (>=0.2.0,<0.3.0) ; extra == "all" or extra == "huggingface" or extra == "llm"
49
50
  Requires-Dist: sentry-sdk (>=1.24.0,<2.0.0)
50
51
  Requires-Dist: shap (>=0.42.0,<0.43.0)
51
52
  Requires-Dist: statsmodels
@@ -53,7 +54,7 @@ Requires-Dist: tabulate (>=0.8.9,<0.9.0)
53
54
  Requires-Dist: textblob (>=0.18.0.post0,<0.19.0)
54
55
  Requires-Dist: torch (>=1.10.0) ; extra == "all" or extra == "llm" or extra == "pytorch"
55
56
  Requires-Dist: tqdm
56
- Requires-Dist: transformers (>=4.32.0,<5.0.0) ; extra == "all" or extra == "llm" or extra == "huggingface"
57
+ Requires-Dist: transformers (>=4.32.0,<5.0.0) ; extra == "all" or extra == "huggingface" or extra == "llm"
57
58
  Requires-Dist: xgboost (>=1.5.2,<3)
58
59
  Requires-Dist: ydata-profiling
59
60
  Description-Content-Type: text/markdown
@@ -1,8 +1,8 @@
1
1
  validmind/__init__.py,sha256=UfmzPwUCdUWbWq3zPqqmq4jw0_kfl3hX4U72p_seE4I,3700
2
- validmind/__version__.py,sha256=yLaFvd-K80rs_ClRVYULStijkok4RfYSaanIt_E-aKM,22
2
+ validmind/__version__.py,sha256=zUyWRJ2idk3EkNSUQlyMfzQtAlpD5CCHF2ifejOcf-w,22
3
3
  validmind/ai/test_descriptions.py,sha256=Q1Ftus4x5eiVLKWJu7hqPLukBQZzhy-dARqq_6_JWtk,9464
4
4
  validmind/ai/utils.py,sha256=TEXII_S5CpkpczzSyHwTlqLcPMLnPBJWEBR6QFMKh1U,3421
5
- validmind/api_client.py,sha256=0IR8MpH_GxBykOs4Egz7oEKZLoOEwoli81X1oFL0DD8,16893
5
+ validmind/api_client.py,sha256=xr9VNqCmA_WFf8rVm-0M0pmzVyLAPFOnfEe4dAog1LA,17144
6
6
  validmind/client.py,sha256=UnsEwWK_s3nuktr6i2U3haLjjlWRGR6H431jsZpKEDA,18649
7
7
  validmind/client_config.py,sha256=58L6s6-9vFWC9vkSs_98CjV1YWmlksdhblJtPQxQsAk,1611
8
8
  validmind/datasets/__init__.py,sha256=oYfcvW7BAyUgpghBOnTeGbQF6tpFAWg38rRirdLr8m8,262
@@ -23,10 +23,10 @@ validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_4.csv,sha256=H
23
23
  validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_5.csv,sha256=jJuRVsDbtv3ky8mJVHzpK_4bSlnIZH-hDW6a8DGGvfY,24551
24
24
  validmind/datasets/llm/rag/rfp.py,sha256=mlx4I1ipMoPplZOqRPza6XEN5D4FE2xc6_SSuGOkgqk,1234
25
25
  validmind/datasets/nlp/__init__.py,sha256=lu2-SGOSECgrxhDtHdlh_FsbpsFtuZ4DbLSL1sww5nc,323
26
- validmind/datasets/nlp/cnn_dailymail.py,sha256=9gAHFWQhSJJ2JF_zarc2oLErZwPtRyfvXV4molhVzXM,3471
26
+ validmind/datasets/nlp/cnn_dailymail.py,sha256=_N_a19nk7uaGQdDAEpSO-XHshV_8U6BH6s76Pmmi3eI,3692
27
27
  validmind/datasets/nlp/datasets/Covid_19.csv,sha256=cwAJHgek3JWUhtZQIiC9-wRWycxbiod6nyZikf09iKE,11545066
28
- validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv,sha256=MSaj0RJ1lbgePDTfpmgganzit64FQ846J1qjm8PJtfI,426676
29
- validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv,sha256=bR8Z-m43lngaFW983ECJwSKea3GlfhZDYfPC0MDJMec,2246787
28
+ validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv,sha256=Z-twNp-uRCA736YCQ2FK_95uZUc8nBtKd2ZJh_j6n2U,608859
29
+ validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv,sha256=g-TKvZHmnysaE1jW8OR42x119Ilo1S-V6sD2nf1BHyw,3072969
30
30
  validmind/datasets/nlp/datasets/sentiments_with_predictions.csv,sha256=zwYI-XjUy72XqD0om-cMVYMrEUsL36u4rPAwsicoQtg,754686
31
31
  validmind/datasets/nlp/twitter_covid_19.py,sha256=PtBOh6pri6pVQKpR0ivQfXy4Wcom40G9-dyYgcv0jFI,878
32
32
  validmind/datasets/regression/__init__.py,sha256=wBcJyMyX4U6XjY6Y-aLozHdbjQlXbeFgB2YqFTk2kVU,1653
@@ -65,13 +65,13 @@ validmind/logging.py,sha256=J1Y1dYCH1dtkoYCHoXMOQH_B7EO4fJytWRDrDqZZz8U,5204
65
65
  validmind/models/__init__.py,sha256=lraTbNwoKckXNP3Dbyj-euI78UTkZ_w5wpUOb8l5nWs,729
66
66
  validmind/models/foundation.py,sha256=ZdVmwwRVbjgqMyfjguyf9Lka_KcgJnDD7ho8zv0gQok,1842
67
67
  validmind/models/function.py,sha256=xLNtgzRiCfF4jrIedHrX1lmCR-92fB3fVDzLS7el4SM,1785
68
- validmind/models/huggingface.py,sha256=oDB32iwP_FQ_ZtZgAC5iBPG4suPaSF-J6317TM8Ob2g,2304
68
+ validmind/models/huggingface.py,sha256=DMHekLpWi6c4N0svh-3G0NYYwzxPXOvqiU95M4QffUs,2253
69
69
  validmind/models/metadata.py,sha256=PMcdYuACkSPvuG8io5BhZeMwclQr_q79mXbvd1SC-7I,1665
70
70
  validmind/models/pipeline.py,sha256=nSskKWxaS4SGmx_B0IAvS5ogDZyh6tdx_aUkyxSXt88,2051
71
71
  validmind/models/pytorch.py,sha256=aAEUWtISwLh-PMvHkcLwBEbBStAByt4J-NpK-Ndv38E,1826
72
72
  validmind/models/r_model.py,sha256=eYdpCREgBpYv-PxJDuG91I77OOAx3-43FoaYT560ziE,7172
73
73
  validmind/models/sklearn.py,sha256=lOCJlP2wvd5IJHtBS1XG9FXrtIvO_f8xm2Qp1UdsiBw,2406
74
- validmind/template.py,sha256=d5I8TjX8Sh5CJPWo_zezMfyvnh7OXo07VV8iKXxlk9E,7184
74
+ validmind/template.py,sha256=Ja7PHeJsn7jNHop_DwU6tFrsi0bLy87lWZLAO130ZXE,7273
75
75
  validmind/test_suites/__init__.py,sha256=T7446YfTnxfBKNFwIXVcbMb4uIyRzHFAyUQLdDQCjVc,7039
76
76
  validmind/test_suites/classifier.py,sha256=0ZE3z5X_ZewTvmwQ3cVGJQh7dPgg0IlqcQshJJxCFWQ,4003
77
77
  validmind/test_suites/cluster.py,sha256=Wc2NViwivjiuiJMwrnGbOJYeZ3ApN8usWlOPYZYWAgE,2276
@@ -127,7 +127,7 @@ validmind/tests/data_validation/Skewness.py,sha256=MouMHv5wvpUTDueieqIceXG7JZji1
127
127
  validmind/tests/data_validation/SpreadPlot.py,sha256=yctCXm_hFVmhbpPywr9GGWf2W8jIbvmyxQIkr6w9sVo,4551
128
128
  validmind/tests/data_validation/TabularCategoricalBarPlots.py,sha256=UKGThQmKKxL2SL7siPlY7ODYgooNSrtUFdbLAalh9fU,4201
129
129
  validmind/tests/data_validation/TabularDateTimeHistograms.py,sha256=_1FU3kdAN7GI3208OGmlKrAJ8rIrTeRTa8bGNWSmBiI,3947
130
- validmind/tests/data_validation/TabularDescriptionTables.py,sha256=SEE_Ewxfja2HjJEZnN17f4xTHoKpDAJDU7vK9ZHLyRk,9241
130
+ validmind/tests/data_validation/TabularDescriptionTables.py,sha256=nLv_9XB0d8_5KUUHFPUxzTx_usHxby7qhS3HyrS4nc0,7132
131
131
  validmind/tests/data_validation/TabularNumericalHistograms.py,sha256=-WqG6XI2hb5VEVOeNrfpUzyWzw6mcxJvn7wHCI7o4q4,4130
132
132
  validmind/tests/data_validation/TargetRateBarPlots.py,sha256=omgPJbA1WZ-5YEiatSdEvpEQ-QCJULU8kDOHAUE610E,5697
133
133
  validmind/tests/data_validation/TimeSeriesDescription.py,sha256=YIfet30KZSAOdYAkTQadKLNuY_SAM3a5Fn2z2AqQz7I,3130
@@ -175,21 +175,21 @@ validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py,sha256=ec90bxP9
175
175
  validmind/tests/model_validation/TokenDisparity.py,sha256=EZlpFQH6qRWedjTQT5o4u-OIdgj1iKK-JB8GEQQlxoA,4394
176
176
  validmind/tests/model_validation/ToxicityScore.py,sha256=nFDHU1Z8mGpJrdKE6sWxo9nOqqzne1JsYIiNFyn_gYA,5299
177
177
  validmind/tests/model_validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
178
- validmind/tests/model_validation/embeddings/ClusterDistribution.py,sha256=fs4Xrq18qQrxfmMWRlOGVSES48_Y81KDrNrwEArGs50,3512
178
+ validmind/tests/model_validation/embeddings/ClusterDistribution.py,sha256=1Ywx0OTRZ13F0deQKoXzzuxDr31lYWjlC7HZqJY5Ys8,3507
179
179
  validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py,sha256=xXM1pI-4XOLLSE-sZqQzdUryrAvql5maytFNjzMlrn4,4900
180
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py,sha256=zU4H9t2lQVvwhpHXJQKRbG1iGkMMZbw3_JiLFBSHy5E,3419
180
+ validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py,sha256=uqWgh0Qbc9OQc0RArv6NCHbSNBA9Lko4jybJ_NVdTLY,3414
181
181
  validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py,sha256=R16Jq2BQPG4xknAmDGEbRcX8RmDb879NWazSPmB0ARo,3495
182
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py,sha256=t_nYQWSO8QpS15PZZmOFje6Ypj2wP2b9U_mX6-M85zU,4207
183
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py,sha256=M2bnEKeiuD6cuTr1hDHEaXKUp-FIXP6Q9asioG8ZdCI,4302
182
+ validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py,sha256=urnF9XBMgJ7tmQzHkmYLKCSWrPjFM95PfhIRuO7-F7w,4202
183
+ validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py,sha256=vqFF_a4VcBL-iwtG_sgOiK04E00CzL-dmOLHGtFwSEw,4297
184
184
  validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py,sha256=UG5etgWn9Rnqts5X_1U6CTj-uDfz6VLqXdQVZ9Mu3y0,4502
185
185
  validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py,sha256=ruMY1LK2LwmzwdKMcgFLeaebhEi4_kbJNpEafVCTPfw,3375
186
186
  validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py,sha256=nUQU9e32RHa3Ytofi_dzId_7VSNH4bJG9I_r8VJNbFk,3893
187
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py,sha256=jP-6oG0VmoWLqiWTJfeGrBtphocwlEYEAs40Xtq5gwU,4539
187
+ validmind/tests/model_validation/embeddings/StabilityAnalysis.py,sha256=2pWHSV8weWX-KmdQ1SAKAFGQiDKff0qgwzH3oy4oVTE,4603
188
188
  validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py,sha256=exnaM-XHiHHflflXfJQLNGQByTBDeKwCtxBoNPKNev8,3970
189
189
  validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py,sha256=N1hYFnAQKLxSpjd6ZHwi57Zdx10ssEX2Ci73rrEXTGs,5796
190
190
  validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py,sha256=npnOPAoXb5FoiwQEwp_gDcbGa5xk4rYnXChTJnuGX64,4405
191
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py,sha256=9dL06PRlzokEpaEQAj_12FuQnlzfQ2-__6alYfSajmU,4636
192
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py,sha256=jfEJjLiBhrXGmafNPI0rk3sDugDmuQ6pbpek4rF8v1k,4512
191
+ validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py,sha256=qLydyTQ6mzHOYQzqysjPPe_ltiTsRfPEhZDEDm5XxX8,4825
192
+ validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py,sha256=ereo_dGf19xqvOGz7zcGwhDRU_UqvjFRi5n4KmGDKl8,4511
193
193
  validmind/tests/model_validation/ragas/AnswerCorrectness.py,sha256=PgSxFcyWx2WAI6DsgYJQW3N5EKQgm-uCAZgm8r9Cly4,5002
194
194
  validmind/tests/model_validation/ragas/AnswerRelevance.py,sha256=iIgN6CTAYs8V9sfJVhR_p6B5TQLUQ413rXYqwVP3770,4861
195
195
  validmind/tests/model_validation/ragas/AnswerSimilarity.py,sha256=csCdKnjKXwIAOcc6PdAWl0IO2ttou2ihoXtU-G37hz4,4321
@@ -211,7 +211,7 @@ validmind/tests/model_validation/sklearn/ConfusionMatrix.py,sha256=wdEr8KopFOgbY
211
211
  validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py,sha256=zUXNy9SookZQQfLvwvI2W0yG6BlRBdWIK0IHgYZKNpg,3250
212
212
  validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py,sha256=OxDyofYDNjPqhNrYg4_1NbDvE4J7W6QR-Mzn17NlIkY,2997
213
213
  validmind/tests/model_validation/sklearn/HomogeneityScore.py,sha256=VWRVfrLUptIxCzsZJRNe1p3lvE8Xv4sfC9r-dqyUmIc,2701
214
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py,sha256=VbTgQ1NBfoHUAVvxfLkwu4cHy9_quSN4ywwD49cUH_U,4585
214
+ validmind/tests/model_validation/sklearn/HyperParametersTuning.py,sha256=5a4K15UlecxFnKYVsgPksf29LjvdfHWPGT2t5-2JO9A,4575
215
215
  validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py,sha256=lmSktn7usYZt-2v8Ykrig-9vEM910AfUdRkyOrsZd3U,5906
216
216
  validmind/tests/model_validation/sklearn/MinimumAccuracy.py,sha256=5KSAd29dbKs3nNJ734t1j806Td3H5LRIw950g3Xp2nc,5277
217
217
  validmind/tests/model_validation/sklearn/MinimumF1Score.py,sha256=TaLHk98CwQigyt17L1uBBLC25D5J_IKb6a_IFJFO7AE,4618
@@ -266,7 +266,7 @@ validmind/tests/prompt_validation/Robustness.py,sha256=fBdkYnO9yoBazz4wD-l62tT8D
266
266
  validmind/tests/prompt_validation/Specificity.py,sha256=h3gKRTTi2rfnGWmGC1YnSt2s_VbZU4KX0iY7LciZ3PU,6068
267
267
  validmind/tests/prompt_validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
268
268
  validmind/tests/prompt_validation/ai_powered_test.py,sha256=7TTeIR5GotQosm7oVT8Y3KnwPB3XkVT1Fzhckpr-SgE,1963
269
- validmind/tests/run.py,sha256=H9LRkJ4mZFqKUje31Lz0J0VL-DVYAyztdvpOa3NaAoA,14319
269
+ validmind/tests/run.py,sha256=OI0SxHvJGH0L2o7LyiK-723Cui1UFAu30YxG52ooU7M,14305
270
270
  validmind/tests/test_providers.py,sha256=47xe5eb5ufvj1jmhdRsbSvDQTXSDpFDFNeXg3xtXwhw,5320
271
271
  validmind/tests/utils.py,sha256=kNrxfUYbj4DwmkZtpp_1rG4GMUGxYEhvqnYR_A7qAKM,471
272
272
  validmind/unit_metrics/__init__.py,sha256=AlFnWA9pmzVf8xysPxYpQ3kBTQ81-YVxRTJpgC0Q41w,7344
@@ -290,7 +290,7 @@ validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py,sha256=_5IQIU9
290
290
  validmind/utils.py,sha256=MQDsW7YuwEJ50tA01n3xb8D_Ihmji_Mn22AlMnJJQT8,15819
291
291
  validmind/vm_models/__init__.py,sha256=lmWCD2u4tW6_AH39UnJ24sCcMUcsHbUttz7SaZfrh3s,1168
292
292
  validmind/vm_models/dataset/__init__.py,sha256=U4CxZjdoc0dd9u2AqBl5PJh1UVbzXWNrmundmjLF-qE,346
293
- validmind/vm_models/dataset/dataset.py,sha256=YP6l5sq7SJNExWK3RvkxeCBTLj4z2GkBsmv4KxfBS1I,22753
293
+ validmind/vm_models/dataset/dataset.py,sha256=X0vKp1NuL0k5WLILAAlmnPs_WHR8Ji6ovlfWLjJs3qk,23305
294
294
  validmind/vm_models/dataset/utils.py,sha256=ygT6hUw0KklKCboo7tqLxh_hf-dEiaccVyCpR9DCPF8,5177
295
295
  validmind/vm_models/figure.py,sha256=iSrvPcCG5sQrMkX1Fh6c5utRzaroh3bc6IlnGDOK_Eg,6651
296
296
  validmind/vm_models/model.py,sha256=b-UL73EWOpj-X5aQbHQ3HLkONHCH9hYwUlKxVwPC6gI,6088
@@ -304,11 +304,11 @@ validmind/vm_models/test/threshold_test.py,sha256=xSadM5t9Z-XZjkxu7LKmeljy2bdwTw
304
304
  validmind/vm_models/test/threshold_test_result.py,sha256=EXP-g_e3NsnpkvNgYew030qVUoY6ZTHyuuFUXaq-BuM,1954
305
305
  validmind/vm_models/test_context.py,sha256=AN7-atBgOcD04MLVitCFJYooxF6_iNmvI2H4nkv32iw,9035
306
306
  validmind/vm_models/test_suite/runner.py,sha256=aewxadRfoOPH48jes2Gtb3Ju_FWFfVM_9ARIAJHD4wA,6982
307
- validmind/vm_models/test_suite/summary.py,sha256=co-xJJMUYGb7cOiVmw0i8vpZlfiMqrWjaCOmHKMAbcE,4686
307
+ validmind/vm_models/test_suite/summary.py,sha256=GQRNe2ZvvqjQN0yKmaN7ohAUjRFQIN4YYUYxfOuWN6M,4682
308
308
  validmind/vm_models/test_suite/test.py,sha256=_GfbK36l98SjzgVcucmp0OKBJKqMW3neO7SqJ3EWeps,5049
309
309
  validmind/vm_models/test_suite/test_suite.py,sha256=Cns2wL54v0T5Mv5_HJb3kMeaa4rtycdqT8KxK9_rWEU,6279
310
- validmind-2.4.0.dist-info/LICENSE,sha256=XonPUfwjvrC5Ombl3y-ko0Wubb1xdG_7nzvIbkZRKHw,35772
311
- validmind-2.4.0.dist-info/METADATA,sha256=aN9HlbzIlfBx98XfsOSYr2Y0ob1UjLSJuBG2E9g3xVM,4133
312
- validmind-2.4.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
313
- validmind-2.4.0.dist-info/entry_points.txt,sha256=HuW7YyOv9u_OEWpViQXtv0nfoI67uieJHawKWA4Hv9A,76
314
- validmind-2.4.0.dist-info/RECORD,,
310
+ validmind-2.4.4.dist-info/LICENSE,sha256=XonPUfwjvrC5Ombl3y-ko0Wubb1xdG_7nzvIbkZRKHw,35772
311
+ validmind-2.4.4.dist-info/METADATA,sha256=keXWOaqBlRbo2FO8FDtwgd37iscJMcloL1A3qmY6NGk,4240
312
+ validmind-2.4.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
313
+ validmind-2.4.4.dist-info/entry_points.txt,sha256=HuW7YyOv9u_OEWpViQXtv0nfoI67uieJHawKWA4Hv9A,76
314
+ validmind-2.4.4.dist-info/RECORD,,