validmind 2.4.1__py3-none-any.whl → 2.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/api_client.py +17 -15
  3. validmind/datasets/nlp/cnn_dailymail.py +10 -1
  4. validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv +33 -209
  5. validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv +156 -1051
  6. validmind/models/huggingface.py +0 -1
  7. validmind/tests/data_validation/IQROutliersBarPlot.py +16 -9
  8. validmind/tests/data_validation/IQROutliersTable.py +13 -6
  9. validmind/tests/data_validation/TabularDescriptionTables.py +96 -148
  10. validmind/tests/model_validation/embeddings/ClusterDistribution.py +1 -1
  11. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +1 -1
  12. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +1 -1
  13. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +1 -1
  14. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +3 -2
  15. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +9 -2
  16. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +0 -1
  17. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -1
  18. validmind/tests/run.py +1 -1
  19. validmind/vm_models/dataset/dataset.py +18 -6
  20. validmind/vm_models/test_suite/summary.py +2 -2
  21. {validmind-2.4.1.dist-info → validmind-2.4.5.dist-info}/METADATA +4 -3
  22. {validmind-2.4.1.dist-info → validmind-2.4.5.dist-info}/RECORD +25 -25
  23. {validmind-2.4.1.dist-info → validmind-2.4.5.dist-info}/LICENSE +0 -0
  24. {validmind-2.4.1.dist-info → validmind-2.4.5.dist-info}/WHEEL +0 -0
  25. {validmind-2.4.1.dist-info → validmind-2.4.5.dist-info}/entry_points.txt +0 -0
@@ -56,7 +56,6 @@ class HFModel(VMModel):
56
56
  return [result["label"] for result in results]
57
57
  elif tasks[-1] == "feature_extraction":
58
58
  # Extract [CLS] token embedding for each input and return as list of lists
59
- print(f"len(results): {len(results)}")
60
59
  return [embedding[0][0] for embedding in results]
61
60
  else:
62
61
  return results
@@ -4,7 +4,6 @@
4
4
 
5
5
  from dataclasses import dataclass
6
6
 
7
- import numpy as np
8
7
  import plotly.graph_objects as go
9
8
 
10
9
  from validmind.vm_models import Figure, Metric
@@ -62,22 +61,27 @@ class IQROutliersBarPlot(Metric):
62
61
 
63
62
  name = "iqr_outliers_bar_plot"
64
63
  required_inputs = ["dataset"]
65
- default_params = {"threshold": 1.5, "num_features": None, "fig_width": 800}
64
+ default_params = {"threshold": 1.5, "fig_width": 800}
66
65
  tasks = ["classification", "regression"]
67
66
  tags = ["tabular_data", "visualization", "numerical_data"]
68
67
 
69
68
  def run(self):
70
69
  df = self.inputs.dataset.df
71
- num_features = self.params["num_features"]
70
+
71
+ # Select numerical features
72
+ features = self.inputs.dataset.feature_columns_numeric
73
+
74
+ # Select non-binary features
75
+ features = [
76
+ feature
77
+ for feature in features
78
+ if len(self.inputs.dataset.df[feature].unique()) > 2
79
+ ]
80
+
72
81
  threshold = self.params["threshold"]
73
82
  fig_width = self.params["fig_width"]
74
83
 
75
- # If num_features is None, use all numeric columns.
76
- # Otherwise, only use the columns provided in num_features.
77
- if num_features is None:
78
- df = df.select_dtypes(include=[np.number])
79
- else:
80
- df = df[num_features]
84
+ df = df[features]
81
85
 
82
86
  return self.detect_and_visualize_outliers(df, threshold, fig_width)
83
87
 
@@ -98,6 +102,9 @@ class IQROutliersBarPlot(Metric):
98
102
  # Compute outliers
99
103
  outliers = self.compute_outliers(df[col], threshold)
100
104
 
105
+ if outliers.empty:
106
+ continue # Skip plotting if there are no outliers
107
+
101
108
  Q1_count = outliers[
102
109
  (outliers >= 0) & (outliers < outliers.quantile(0.25))
103
110
  ].count()
@@ -4,7 +4,6 @@
4
4
 
5
5
  from dataclasses import dataclass
6
6
 
7
- import numpy as np
8
7
  import pandas as pd
9
8
 
10
9
  from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
@@ -53,12 +52,22 @@ class IQROutliersTable(Metric):
53
52
 
54
53
  name = "iqr_outliers_table"
55
54
  required_inputs = ["dataset"]
56
- default_params = {"features": None, "threshold": 1.5}
55
+ default_params = {"threshold": 1.5}
57
56
  tasks = ["classification", "regression"]
58
57
  tags = ["tabular_data", "numerical_data"]
59
58
 
60
59
  def run(self):
61
- features = self.params["features"]
60
+
61
+ # Select numerical features
62
+ features = self.inputs.dataset.feature_columns_numeric
63
+
64
+ # Select non-binary features
65
+ features = [
66
+ feature
67
+ for feature in features
68
+ if len(self.inputs.dataset.df[feature].unique()) > 2
69
+ ]
70
+
62
71
  threshold = self.params["threshold"]
63
72
 
64
73
  df = self.inputs.dataset.df
@@ -80,9 +89,7 @@ class IQROutliersTable(Metric):
80
89
  upper_bound = Q3 + threshold * IQR
81
90
  return series[(series < lower_bound) | (series > upper_bound)]
82
91
 
83
- def detect_and_analyze_outliers(self, df, features=None, threshold=1.5):
84
- if features is None:
85
- features = df.select_dtypes(include=[np.number]).columns.tolist()
92
+ def detect_and_analyze_outliers(self, df, features, threshold=1.5):
86
93
 
87
94
  outliers_summary = []
88
95
  for feature in features:
@@ -2,15 +2,14 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
6
-
7
5
  import pandas as pd
8
6
 
9
- from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
7
+ from validmind import tags, tasks
10
8
 
11
9
 
12
- @dataclass
13
- class TabularDescriptionTables(Metric):
10
+ @tags("tabular_data")
11
+ @tasks("classification", "regression")
12
+ def TabularDescriptionTables(dataset):
14
13
  """
15
14
  Summarizes key descriptive statistics for numerical, categorical, and datetime variables in a dataset.
16
15
 
@@ -54,155 +53,104 @@ class TabularDescriptionTables(Metric):
54
53
  chosen algorithm.
55
54
  """
56
55
 
57
- name = "tabular_description_tables"
58
- required_inputs = ["dataset"]
59
-
60
- tasks = ["classification", "regression"]
61
- tags = ["tabular_data"]
62
-
63
- def get_summary_statistics_numerical(self, numerical_fields):
64
- summary_stats = self.inputs.dataset.df[numerical_fields].describe().T
65
- summary_stats["Missing Values (%)"] = (
66
- self.inputs.dataset.df[numerical_fields].isnull().mean() * 100
67
- )
68
- summary_stats["Data Type"] = self.inputs.dataset.df[
69
- numerical_fields
70
- ].dtypes.astype(str)
71
- summary_stats = summary_stats[
72
- ["count", "mean", "min", "max", "Missing Values (%)", "Data Type"]
73
- ]
74
- summary_stats.columns = [
75
- "Num of Obs",
76
- "Mean",
77
- "Min",
78
- "Max",
79
- "Missing Values (%)",
80
- "Data Type",
81
- ]
82
- summary_stats["Num of Obs"] = summary_stats["Num of Obs"].astype(int)
83
- summary_stats = summary_stats.sort_values(
84
- by="Missing Values (%)", ascending=False
85
- )
86
- summary_stats.reset_index(inplace=True)
87
- summary_stats.rename(columns={"index": "Numerical Variable"}, inplace=True)
88
- return summary_stats
89
-
90
- def get_summary_statistics_categorical(self, categorical_fields):
91
- summary_stats = pd.DataFrame()
92
- if categorical_fields: # check if the list is not empty
93
- for column in self.inputs.dataset.df[categorical_fields].columns:
94
- summary_stats.loc[column, "Num of Obs"] = int(
95
- self.inputs.dataset.df[column].count()
96
- )
97
- summary_stats.loc[
98
- column, "Num of Unique Values"
99
- ] = self.inputs.dataset.df[column].nunique()
100
- summary_stats.loc[column, "Unique Values"] = str(
101
- self.inputs.dataset.df[column].unique()
102
- )
103
- summary_stats.loc[column, "Missing Values (%)"] = (
104
- self.inputs.dataset.df[column].isnull().mean() * 100
105
- )
106
- summary_stats.loc[column, "Data Type"] = str(
107
- self.inputs.dataset.df[column].dtype
108
- )
109
-
110
- summary_stats = summary_stats.sort_values(
111
- by="Missing Values (%)", ascending=False
112
- )
113
- summary_stats.reset_index(inplace=True)
114
- summary_stats.rename(
115
- columns={"index": "Categorical Variable"}, inplace=True
116
- )
117
- return summary_stats
118
-
119
- def get_summary_statistics_datetime(self, datetime_fields):
120
- summary_stats = pd.DataFrame()
121
- for column in self.inputs.dataset.df[datetime_fields].columns:
122
- summary_stats.loc[column, "Num of Obs"] = int(
123
- self.inputs.dataset.df[column].count()
124
- )
125
- summary_stats.loc[column, "Num of Unique Values"] = self.inputs.dataset.df[
56
+ numerical_fields = get_numerical_columns(dataset)
57
+ categorical_fields = get_categorical_columns(dataset)
58
+ datetime_fields = get_datetime_columns(dataset)
59
+
60
+ summary_stats_numerical = get_summary_statistics_numerical(
61
+ dataset, numerical_fields
62
+ )
63
+ summary_stats_categorical = get_summary_statistics_categorical(
64
+ dataset, categorical_fields
65
+ )
66
+ summary_stats_datetime = get_summary_statistics_datetime(dataset, datetime_fields)
67
+
68
+ return (summary_stats_numerical, summary_stats_categorical, summary_stats_datetime)
69
+
70
+
71
+ def get_summary_statistics_numerical(dataset, numerical_fields):
72
+ summary_stats = dataset.df[numerical_fields].describe().T
73
+ summary_stats["Missing Values (%)"] = (
74
+ dataset.df[numerical_fields].isnull().mean() * 100
75
+ )
76
+ summary_stats["Data Type"] = dataset.df[numerical_fields].dtypes.astype(str)
77
+ summary_stats = summary_stats[
78
+ ["count", "mean", "min", "max", "Missing Values (%)", "Data Type"]
79
+ ]
80
+ summary_stats.columns = [
81
+ "Num of Obs",
82
+ "Mean",
83
+ "Min",
84
+ "Max",
85
+ "Missing Values (%)",
86
+ "Data Type",
87
+ ]
88
+ summary_stats["Num of Obs"] = summary_stats["Num of Obs"].astype(int)
89
+ summary_stats = summary_stats.sort_values(by="Missing Values (%)", ascending=False)
90
+ summary_stats.reset_index(inplace=True)
91
+ summary_stats.rename(columns={"index": "Numerical Variable"}, inplace=True)
92
+ return summary_stats
93
+
94
+
95
+ def get_summary_statistics_categorical(dataset, categorical_fields):
96
+ summary_stats = pd.DataFrame()
97
+ if categorical_fields: # check if the list is not empty
98
+ for column in dataset.df[categorical_fields].columns:
99
+ summary_stats.loc[column, "Num of Obs"] = int(dataset.df[column].count())
100
+ summary_stats.loc[column, "Num of Unique Values"] = dataset.df[
126
101
  column
127
102
  ].nunique()
128
- summary_stats.loc[column, "Earliest Date"] = self.inputs.dataset.df[
129
- column
130
- ].min()
131
- summary_stats.loc[column, "Latest Date"] = self.inputs.dataset.df[
132
- column
133
- ].max()
134
- summary_stats.loc[column, "Missing Values (%)"] = (
135
- self.inputs.dataset.df[column].isnull().mean() * 100
103
+ summary_stats.loc[column, "Unique Values"] = str(
104
+ dataset.df[column].unique()
136
105
  )
137
- summary_stats.loc[column, "Data Type"] = str(
138
- self.inputs.dataset.df[column].dtype
106
+ summary_stats.loc[column, "Missing Values (%)"] = (
107
+ dataset.df[column].isnull().mean() * 100
139
108
  )
109
+ summary_stats.loc[column, "Data Type"] = str(dataset.df[column].dtype)
140
110
 
141
- if not summary_stats.empty:
142
- summary_stats = summary_stats.sort_values(
143
- by="Missing Values (%)", ascending=False
144
- )
111
+ summary_stats = summary_stats.sort_values(
112
+ by="Missing Values (%)", ascending=False
113
+ )
145
114
  summary_stats.reset_index(inplace=True)
146
- summary_stats.rename(columns={"index": "Datetime Variable"}, inplace=True)
147
- return summary_stats
148
-
149
- def summary(self, metric_value):
150
- summary_stats_numerical = metric_value["numerical"]
151
- summary_stats_categorical = metric_value["categorical"]
152
- summary_stats_datetime = metric_value["datetime"]
153
-
154
- return ResultSummary(
155
- results=[
156
- ResultTable(
157
- data=summary_stats_numerical,
158
- metadata=ResultTableMetadata(title="Numerical Variables"),
159
- ),
160
- ResultTable(
161
- data=summary_stats_categorical,
162
- metadata=ResultTableMetadata(title="Categorical Variables"),
163
- ),
164
- ResultTable(
165
- data=summary_stats_datetime,
166
- metadata=ResultTableMetadata(title="Datetime Variables"),
167
- ),
168
- ]
115
+ summary_stats.rename(columns={"index": "Categorical Variable"}, inplace=True)
116
+ return summary_stats
117
+
118
+
119
+ def get_summary_statistics_datetime(dataset, datetime_fields):
120
+ summary_stats = pd.DataFrame()
121
+ for column in dataset.df[datetime_fields].columns:
122
+ summary_stats.loc[column, "Num of Obs"] = int(dataset.df[column].count())
123
+ summary_stats.loc[column, "Num of Unique Values"] = dataset.df[column].nunique()
124
+ summary_stats.loc[column, "Earliest Date"] = dataset.df[column].min()
125
+ summary_stats.loc[column, "Latest Date"] = dataset.df[column].max()
126
+ summary_stats.loc[column, "Missing Values (%)"] = (
127
+ dataset.df[column].isnull().mean() * 100
169
128
  )
129
+ summary_stats.loc[column, "Data Type"] = str(dataset.df[column].dtype)
170
130
 
171
- def get_categorical_columns(self):
172
- categorical_columns = self.inputs.dataset.df.select_dtypes(
173
- include=["object", "category"]
174
- ).columns.tolist()
175
- return categorical_columns
176
-
177
- def get_numerical_columns(self):
178
- numerical_columns = self.inputs.dataset.df.select_dtypes(
179
- include=["int", "float", "uint8"]
180
- ).columns.tolist()
181
- return numerical_columns
182
-
183
- def get_datetime_columns(self):
184
- datetime_columns = self.inputs.dataset.df.select_dtypes(
185
- include=["datetime"]
186
- ).columns.tolist()
187
- return datetime_columns
188
-
189
- def run(self):
190
- numerical_fields = self.get_numerical_columns()
191
- categorical_fields = self.get_categorical_columns()
192
- datetime_fields = self.get_datetime_columns()
193
-
194
- summary_stats_numerical = self.get_summary_statistics_numerical(
195
- numerical_fields
196
- )
197
- summary_stats_categorical = self.get_summary_statistics_categorical(
198
- categorical_fields
199
- )
200
- summary_stats_datetime = self.get_summary_statistics_datetime(datetime_fields)
201
-
202
- return self.cache_results(
203
- {
204
- "numerical": summary_stats_numerical.to_dict(orient="records"),
205
- "categorical": summary_stats_categorical.to_dict(orient="records"),
206
- "datetime": summary_stats_datetime.to_dict(orient="records"),
207
- }
131
+ if not summary_stats.empty:
132
+ summary_stats = summary_stats.sort_values(
133
+ by="Missing Values (%)", ascending=False
208
134
  )
135
+ summary_stats.reset_index(inplace=True)
136
+ summary_stats.rename(columns={"index": "Datetime Variable"}, inplace=True)
137
+ return summary_stats
138
+
139
+
140
+ def get_categorical_columns(dataset):
141
+ categorical_columns = dataset.df.select_dtypes(
142
+ include=["object", "category"]
143
+ ).columns.tolist()
144
+ return categorical_columns
145
+
146
+
147
+ def get_numerical_columns(dataset):
148
+ numerical_columns = dataset.df.select_dtypes(
149
+ include=["int", "float", "uint8"]
150
+ ).columns.tolist()
151
+ return numerical_columns
152
+
153
+
154
+ def get_datetime_columns(dataset):
155
+ datetime_columns = dataset.df.select_dtypes(include=["datetime"]).columns.tolist()
156
+ return datetime_columns
@@ -52,7 +52,7 @@ class ClusterDistribution(Metric):
52
52
  "num_clusters": 5,
53
53
  }
54
54
  tasks = ["feature_extraction"]
55
- tags = ["llm", "text_data", "text_embeddings", "visualization"]
55
+ tags = ["llm", "text_data", "embeddings", "visualization"]
56
56
 
57
57
  def run(self):
58
58
  # run kmeans clustering on embeddings
@@ -51,7 +51,7 @@ class CosineSimilarityDistribution(Metric):
51
51
  name = "Text Embeddings Cosine Similarity Distribution"
52
52
  required_inputs = ["model", "dataset"]
53
53
  tasks = ["feature_extraction"]
54
- tags = ["llm", "text_data", "text_embeddings", "visualization"]
54
+ tags = ["llm", "text_data", "embeddings", "visualization"]
55
55
 
56
56
  def run(self):
57
57
  # Compute cosine similarity
@@ -54,7 +54,7 @@ class DescriptiveAnalytics(Metric):
54
54
  name = "Descriptive Analytics for Text Embeddings Models"
55
55
  required_inputs = ["model", "dataset"]
56
56
  tasks = ["feature_extraction"]
57
- tags = ["llm", "text_data", "text_embeddings", "visualization"]
57
+ tags = ["llm", "text_data", "embeddings", "visualization"]
58
58
 
59
59
  def run(self):
60
60
  # Assuming y_pred returns a 2D array of embeddings [samples, features]
@@ -54,7 +54,7 @@ class EmbeddingsVisualization2D(Metric):
54
54
  "perplexity": 30,
55
55
  }
56
56
  tasks = ["feature_extraction"]
57
- tags = ["llm", "text_data", "text_embeddings", "visualization"]
57
+ tags = ["llm", "text_data", "embeddings", "visualization"]
58
58
 
59
59
  def run(self):
60
60
  cluster_column = self.params.get("cluster_column")
@@ -30,7 +30,7 @@ class StabilityAnalysis(ThresholdTest):
30
30
  "mean_similarity_threshold": 0.7,
31
31
  }
32
32
  tasks = ["feature_extraction"]
33
- tags = ["llm", "text_data", "text_embeddings", "visualization"]
33
+ tags = ["llm", "text_data", "embeddings", "visualization"]
34
34
 
35
35
  @abstractmethod
36
36
  def perturb_data(self, data: str) -> str:
@@ -62,7 +62,8 @@ class StabilityAnalysis(ThresholdTest):
62
62
 
63
63
  def run(self):
64
64
  # Perturb the test dataset
65
- original = self.inputs.dataset.df
65
+ text_column = self.inputs.dataset.text_column
66
+ original = self.inputs.dataset.df[[text_column]]
66
67
  perturbed = original.copy()
67
68
  perturbed.update(
68
69
  perturbed.select_dtypes(include="object").applymap(self.perturb_data)
@@ -4,8 +4,12 @@
4
4
 
5
5
  from transformers import MarianMTModel, MarianTokenizer
6
6
 
7
+ from validmind.logging import get_logger
8
+
7
9
  from .StabilityAnalysis import StabilityAnalysis
8
10
 
11
+ logger = get_logger(__name__)
12
+
9
13
 
10
14
  class StabilityAnalysisTranslation(StabilityAnalysis):
11
15
  """
@@ -61,8 +65,11 @@ class StabilityAnalysisTranslation(StabilityAnalysis):
61
65
  }
62
66
 
63
67
  def perturb_data(self, data: str):
64
- if not isinstance(data, str):
65
- return data
68
+ if len(data) > 512:
69
+ logger.info(
70
+ "Data length exceeds 512 tokens. Truncating data to 512 tokens."
71
+ )
72
+ data = data[:512]
66
73
 
67
74
  source_lang = self.params["source_lang"]
68
75
  target_lang = self.params["target_lang"]
@@ -53,7 +53,6 @@ def TSNEComponentsPairwisePlots(
53
53
  - t-SNE visualizations can be misleading if interpreted without considering the stochastic nature of the algorithm;
54
54
  two runs with the same parameters might yield different visual outputs, necessitating multiple runs for a consistent interpretation.
55
55
  """
56
-
57
56
  # Get embeddings from the dataset using the model
58
57
  embeddings = np.stack(dataset.y_pred(model))
59
58
 
@@ -60,8 +60,9 @@ class HyperParametersTuning(Metric):
60
60
  param_grid = self.params["param_grid"]
61
61
  if param_grid is None:
62
62
  raise SkipTestError(
63
- "param_grid in dictnary format must be provide to run hyper parameter tuning"
63
+ "param_grid in dictonary format must be provided to run this test"
64
64
  )
65
+
65
66
  model = self.inputs.model.model
66
67
  estimators = GridSearchCV(
67
68
  model, param_grid=param_grid, scoring=self.params["scoring"]
validmind/tests/run.py CHANGED
@@ -118,7 +118,7 @@ def _combine_figures(figure_lists: List[List[Any]], input_groups: List[Dict[str,
118
118
 
119
119
  title_template = "{current_title}({input_description})"
120
120
 
121
- for i, figures in enumerate(list(zip(*figure_lists))):
121
+ for figures in list(zip(*figure_lists)):
122
122
  if is_plotly_figure(figures[0].figure):
123
123
  _update_plotly_titles(figures, input_groups, title_template)
124
124
  elif is_matplotlib_figure(figures[0].figure):
@@ -139,13 +139,25 @@ class VMDataset:
139
139
  )
140
140
 
141
141
  def _add_column(self, column_name, column_values):
142
- if len(column_values) != len(self.df):
143
- raise ValueError(
144
- "Length of values doesn't match number of rows in the DataFrame."
145
- )
142
+ column_values = np.array(column_values)
143
+
144
+ if column_values.ndim == 1:
145
+ if len(column_values) != len(self.df):
146
+ raise ValueError(
147
+ "Length of values doesn't match number of rows in the DataFrame."
148
+ )
149
+ self.columns.append(column_name)
150
+ self.df[column_name] = column_values
151
+ elif column_values.ndim == 2:
152
+ if column_values.shape[0] != len(self.df):
153
+ raise ValueError(
154
+ "Number of rows in values doesn't match number of rows in the DataFrame."
155
+ )
156
+ self.columns.append(column_name)
157
+ self.df[column_name] = column_values.tolist()
146
158
 
147
- self.columns.append(column_name)
148
- self.df[column_name] = column_values
159
+ else:
160
+ raise ValueError("Only 1D and 2D arrays are supported for column_values.")
149
161
 
150
162
  def _validate_assign_predictions(
151
163
  self,
@@ -93,10 +93,10 @@ class TestSuiteSummary:
93
93
 
94
94
  def _add_results_link(self):
95
95
  # avoid circular import
96
- from ...api_client import get_api_host, get_api_project
96
+ from ...api_client import get_api_host, get_api_model
97
97
 
98
98
  ui_host = get_api_host().replace("/api/v1/tracking", "").replace("api", "app")
99
- link = f"{ui_host}/projects/{get_api_project()}/project-overview"
99
+ link = f"{ui_host}/projects/{get_api_model()}/project-overview"
100
100
  results_link = f"""
101
101
  <h3>
102
102
  Check out the updated documentation in your
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: validmind
3
- Version: 2.4.1
3
+ Version: 2.4.5
4
4
  Summary: ValidMind Developer Framework
5
5
  License: Commercial License
6
6
  Author: Andres Rodriguez
@@ -32,7 +32,7 @@ Requires-Dist: mistune (>=3.0.2,<4.0.0)
32
32
  Requires-Dist: nltk (>=3.8.1,<4.0.0)
33
33
  Requires-Dist: numba (<0.59.0)
34
34
  Requires-Dist: numpy
35
- Requires-Dist: openai (>=1) ; extra == "all" or extra == "llm"
35
+ Requires-Dist: openai (>=1) ; extra == "all"
36
36
  Requires-Dist: pandas (>=1.1,<2)
37
37
  Requires-Dist: plotly
38
38
  Requires-Dist: plotly-express
@@ -46,6 +46,7 @@ Requires-Dist: scikit-learn
46
46
  Requires-Dist: scipy
47
47
  Requires-Dist: scorecardpy (>=0.1.9.6,<0.2.0.0)
48
48
  Requires-Dist: seaborn
49
+ Requires-Dist: sentencepiece (>=0.2.0,<0.3.0) ; extra == "all" or extra == "huggingface" or extra == "llm"
49
50
  Requires-Dist: sentry-sdk (>=1.24.0,<2.0.0)
50
51
  Requires-Dist: shap (>=0.42.0,<0.43.0)
51
52
  Requires-Dist: statsmodels
@@ -53,7 +54,7 @@ Requires-Dist: tabulate (>=0.8.9,<0.9.0)
53
54
  Requires-Dist: textblob (>=0.18.0.post0,<0.19.0)
54
55
  Requires-Dist: torch (>=1.10.0) ; extra == "all" or extra == "llm" or extra == "pytorch"
55
56
  Requires-Dist: tqdm
56
- Requires-Dist: transformers (>=4.32.0,<5.0.0) ; extra == "all" or extra == "llm" or extra == "huggingface"
57
+ Requires-Dist: transformers (>=4.32.0,<5.0.0) ; extra == "all" or extra == "huggingface" or extra == "llm"
57
58
  Requires-Dist: xgboost (>=1.5.2,<3)
58
59
  Requires-Dist: ydata-profiling
59
60
  Description-Content-Type: text/markdown