workbench 0.8.213__py3-none-any.whl → 0.8.219__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- workbench/algorithms/dataframe/feature_space_proximity.py +168 -75
- workbench/algorithms/dataframe/fingerprint_proximity.py +257 -80
- workbench/algorithms/dataframe/projection_2d.py +38 -21
- workbench/algorithms/dataframe/proximity.py +75 -150
- workbench/algorithms/graph/light/proximity_graph.py +5 -5
- workbench/algorithms/models/cleanlab_model.py +382 -0
- workbench/algorithms/models/noise_model.py +2 -2
- workbench/algorithms/sql/outliers.py +3 -3
- workbench/api/__init__.py +3 -0
- workbench/api/endpoint.py +10 -5
- workbench/api/feature_set.py +76 -6
- workbench/api/meta_model.py +289 -0
- workbench/api/model.py +43 -4
- workbench/core/artifacts/endpoint_core.py +65 -117
- workbench/core/artifacts/feature_set_core.py +3 -3
- workbench/core/artifacts/model_core.py +6 -4
- workbench/core/pipelines/pipeline_executor.py +1 -1
- workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +30 -10
- workbench/model_script_utils/model_script_utils.py +15 -11
- workbench/model_script_utils/pytorch_utils.py +11 -1
- workbench/model_scripts/chemprop/chemprop.template +147 -71
- workbench/model_scripts/chemprop/generated_model_script.py +151 -75
- workbench/model_scripts/chemprop/model_script_utils.py +15 -11
- workbench/model_scripts/custom_models/chem_info/fingerprints.py +87 -46
- workbench/model_scripts/custom_models/proximity/feature_space_proximity.py +194 -0
- workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +6 -6
- workbench/model_scripts/custom_models/uq_models/feature_space_proximity.py +194 -0
- workbench/model_scripts/meta_model/generated_model_script.py +209 -0
- workbench/model_scripts/meta_model/meta_model.template +209 -0
- workbench/model_scripts/pytorch_model/generated_model_script.py +45 -27
- workbench/model_scripts/pytorch_model/model_script_utils.py +15 -11
- workbench/model_scripts/pytorch_model/pytorch.template +42 -24
- workbench/model_scripts/pytorch_model/pytorch_utils.py +11 -1
- workbench/model_scripts/script_generation.py +4 -0
- workbench/model_scripts/xgb_model/generated_model_script.py +167 -156
- workbench/model_scripts/xgb_model/model_script_utils.py +15 -11
- workbench/model_scripts/xgb_model/xgb_model.template +163 -152
- workbench/repl/workbench_shell.py +0 -5
- workbench/scripts/endpoint_test.py +2 -2
- workbench/scripts/meta_model_sim.py +35 -0
- workbench/utils/chem_utils/fingerprints.py +87 -46
- workbench/utils/chemprop_utils.py +23 -5
- workbench/utils/meta_model_simulator.py +499 -0
- workbench/utils/metrics_utils.py +94 -10
- workbench/utils/model_utils.py +91 -9
- workbench/utils/pytorch_utils.py +1 -1
- workbench/utils/shap_utils.py +1 -55
- workbench/web_interface/components/plugins/scatter_plot.py +4 -8
- {workbench-0.8.213.dist-info → workbench-0.8.219.dist-info}/METADATA +2 -1
- {workbench-0.8.213.dist-info → workbench-0.8.219.dist-info}/RECORD +54 -50
- {workbench-0.8.213.dist-info → workbench-0.8.219.dist-info}/entry_points.txt +1 -0
- workbench/model_scripts/custom_models/meta_endpoints/example.py +0 -53
- workbench/model_scripts/custom_models/proximity/proximity.py +0 -410
- workbench/model_scripts/custom_models/uq_models/meta_uq.template +0 -377
- workbench/model_scripts/custom_models/uq_models/proximity.py +0 -410
- {workbench-0.8.213.dist-info → workbench-0.8.219.dist-info}/WHEEL +0 -0
- {workbench-0.8.213.dist-info → workbench-0.8.219.dist-info}/licenses/LICENSE +0 -0
- {workbench-0.8.213.dist-info → workbench-0.8.219.dist-info}/top_level.txt +0 -0
workbench/utils/metrics_utils.py
CHANGED
|
@@ -18,10 +18,32 @@ from sklearn.metrics import (
|
|
|
18
18
|
log = logging.getLogger("workbench")
|
|
19
19
|
|
|
20
20
|
|
|
21
|
+
def validate_proba_columns(predictions_df: pd.DataFrame, class_labels: List[str], guessing: bool = False) -> bool:
|
|
22
|
+
"""Validate that probability columns match class labels.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
predictions_df: DataFrame with prediction results
|
|
26
|
+
class_labels: List of class labels
|
|
27
|
+
guessing: Whether class labels were guessed from data
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
True if validation passes
|
|
31
|
+
|
|
32
|
+
Raises:
|
|
33
|
+
ValueError: If probability columns don't match class labels
|
|
34
|
+
"""
|
|
35
|
+
proba_columns = [col.replace("_proba", "") for col in predictions_df.columns if col.endswith("_proba")]
|
|
36
|
+
|
|
37
|
+
if sorted(class_labels) != sorted(proba_columns):
|
|
38
|
+
label_type = "GUESSED class_labels" if guessing else "class_labels"
|
|
39
|
+
raise ValueError(f"_proba columns {proba_columns} != {label_type} {class_labels}!")
|
|
40
|
+
return True
|
|
41
|
+
|
|
42
|
+
|
|
21
43
|
def compute_classification_metrics(
|
|
22
44
|
predictions_df: pd.DataFrame,
|
|
23
45
|
target_col: str,
|
|
24
|
-
class_labels: List[str],
|
|
46
|
+
class_labels: Optional[List[str]] = None,
|
|
25
47
|
prediction_col: str = "prediction",
|
|
26
48
|
) -> pd.DataFrame:
|
|
27
49
|
"""Compute classification metrics from a predictions DataFrame.
|
|
@@ -29,26 +51,62 @@ def compute_classification_metrics(
|
|
|
29
51
|
Args:
|
|
30
52
|
predictions_df: DataFrame with target and prediction columns
|
|
31
53
|
target_col: Name of the target column
|
|
32
|
-
class_labels: List of class labels in order
|
|
54
|
+
class_labels: List of class labels in order (if None, inferred from target column)
|
|
33
55
|
prediction_col: Name of the prediction column (default: "prediction")
|
|
34
56
|
|
|
35
57
|
Returns:
|
|
36
58
|
DataFrame with per-class metrics (precision, recall, f1, roc_auc, support)
|
|
37
|
-
plus a weighted 'all' row
|
|
59
|
+
plus a weighted 'all' row. Returns empty DataFrame if validation fails.
|
|
38
60
|
"""
|
|
39
|
-
|
|
40
|
-
|
|
61
|
+
# Validate inputs
|
|
62
|
+
if predictions_df.empty:
|
|
63
|
+
log.warning("Empty DataFrame provided. Returning empty metrics.")
|
|
64
|
+
return pd.DataFrame()
|
|
65
|
+
|
|
66
|
+
if prediction_col not in predictions_df.columns:
|
|
67
|
+
log.warning(f"Prediction column '{prediction_col}' not found in DataFrame. Returning empty metrics.")
|
|
68
|
+
return pd.DataFrame()
|
|
69
|
+
|
|
70
|
+
if target_col not in predictions_df.columns:
|
|
71
|
+
log.warning(f"Target column '{target_col}' not found in DataFrame. Returning empty metrics.")
|
|
72
|
+
return pd.DataFrame()
|
|
73
|
+
|
|
74
|
+
# Handle NaN predictions
|
|
75
|
+
df = predictions_df.copy()
|
|
76
|
+
nan_pred = df[prediction_col].isnull().sum()
|
|
77
|
+
if nan_pred > 0:
|
|
78
|
+
log.warning(f"Dropping {nan_pred} rows with NaN predictions.")
|
|
79
|
+
df = df[~df[prediction_col].isnull()]
|
|
80
|
+
|
|
81
|
+
if df.empty:
|
|
82
|
+
log.warning("No valid rows after dropping NaNs. Returning empty metrics.")
|
|
83
|
+
return pd.DataFrame()
|
|
84
|
+
|
|
85
|
+
# Handle class labels
|
|
86
|
+
guessing = False
|
|
87
|
+
if class_labels is None:
|
|
88
|
+
log.warning("Class labels not provided. Inferring from target column.")
|
|
89
|
+
class_labels = df[target_col].unique().tolist()
|
|
90
|
+
guessing = True
|
|
91
|
+
|
|
92
|
+
# Validate probability columns if present
|
|
93
|
+
proba_cols = [col for col in df.columns if col.endswith("_proba")]
|
|
94
|
+
if proba_cols:
|
|
95
|
+
validate_proba_columns(df, class_labels, guessing=guessing)
|
|
96
|
+
|
|
97
|
+
y_true = df[target_col]
|
|
98
|
+
y_pred = df[prediction_col]
|
|
41
99
|
|
|
42
100
|
# Precision, recall, f1, support per class
|
|
43
101
|
prec, rec, f1, support = precision_recall_fscore_support(y_true, y_pred, labels=class_labels, zero_division=0)
|
|
44
102
|
|
|
45
103
|
# ROC AUC per class (requires probability columns and sorted labels)
|
|
46
|
-
|
|
47
|
-
if all(col in
|
|
104
|
+
proba_col_names = [f"{label}_proba" for label in class_labels]
|
|
105
|
+
if all(col in df.columns for col in proba_col_names):
|
|
48
106
|
# roc_auc_score requires labels to be sorted, so we sort and reorder results back
|
|
49
107
|
sorted_labels = sorted(class_labels)
|
|
50
108
|
sorted_proba_cols = [f"{label}_proba" for label in sorted_labels]
|
|
51
|
-
y_score_sorted =
|
|
109
|
+
y_score_sorted = df[sorted_proba_cols].values
|
|
52
110
|
roc_auc_sorted = roc_auc_score(y_true, y_score_sorted, labels=sorted_labels, multi_class="ovr", average=None)
|
|
53
111
|
# Map back to original class_labels order
|
|
54
112
|
label_to_auc = dict(zip(sorted_labels, roc_auc_sorted))
|
|
@@ -97,9 +155,35 @@ def compute_regression_metrics(
|
|
|
97
155
|
|
|
98
156
|
Returns:
|
|
99
157
|
DataFrame with regression metrics (rmse, mae, medae, r2, spearmanr, support)
|
|
158
|
+
Returns empty DataFrame if validation fails or no valid data.
|
|
100
159
|
"""
|
|
101
|
-
|
|
102
|
-
|
|
160
|
+
# Validate inputs
|
|
161
|
+
if predictions_df.empty:
|
|
162
|
+
log.warning("Empty DataFrame provided. Returning empty metrics.")
|
|
163
|
+
return pd.DataFrame()
|
|
164
|
+
|
|
165
|
+
if prediction_col not in predictions_df.columns:
|
|
166
|
+
log.warning(f"Prediction column '{prediction_col}' not found in DataFrame. Returning empty metrics.")
|
|
167
|
+
return pd.DataFrame()
|
|
168
|
+
|
|
169
|
+
if target_col not in predictions_df.columns:
|
|
170
|
+
log.warning(f"Target column '{target_col}' not found in DataFrame. Returning empty metrics.")
|
|
171
|
+
return pd.DataFrame()
|
|
172
|
+
|
|
173
|
+
# Handle NaN values
|
|
174
|
+
df = predictions_df[[target_col, prediction_col]].copy()
|
|
175
|
+
nan_target = df[target_col].isnull().sum()
|
|
176
|
+
nan_pred = df[prediction_col].isnull().sum()
|
|
177
|
+
if nan_target > 0 or nan_pred > 0:
|
|
178
|
+
log.warning(f"NaNs found: {target_col}={nan_target}, {prediction_col}={nan_pred}. Dropping NaN rows.")
|
|
179
|
+
df = df.dropna()
|
|
180
|
+
|
|
181
|
+
if df.empty:
|
|
182
|
+
log.warning("No valid rows after dropping NaNs. Returning empty metrics.")
|
|
183
|
+
return pd.DataFrame()
|
|
184
|
+
|
|
185
|
+
y_true = df[target_col].values
|
|
186
|
+
y_pred = df[prediction_col].values
|
|
103
187
|
|
|
104
188
|
return pd.DataFrame(
|
|
105
189
|
[
|
workbench/utils/model_utils.py
CHANGED
|
@@ -93,16 +93,17 @@ def get_custom_script_path(package: str, script_name: str) -> Path:
|
|
|
93
93
|
return script_path
|
|
94
94
|
|
|
95
95
|
|
|
96
|
-
def proximity_model_local(model: "Model"):
|
|
97
|
-
"""Create a
|
|
96
|
+
def proximity_model_local(model: "Model", include_all_columns: bool = False):
|
|
97
|
+
"""Create a FeatureSpaceProximity Model for this Model
|
|
98
98
|
|
|
99
99
|
Args:
|
|
100
100
|
model (Model): The Model/FeatureSet used to create the proximity model
|
|
101
|
+
include_all_columns (bool): Include all DataFrame columns in neighbor results (default: False)
|
|
101
102
|
|
|
102
103
|
Returns:
|
|
103
|
-
|
|
104
|
+
FeatureSpaceProximity: The proximity model
|
|
104
105
|
"""
|
|
105
|
-
from workbench.algorithms.dataframe.
|
|
106
|
+
from workbench.algorithms.dataframe.feature_space_proximity import FeatureSpaceProximity # noqa: F401
|
|
106
107
|
from workbench.api import Model, FeatureSet # noqa: F401 (avoid circular import)
|
|
107
108
|
|
|
108
109
|
# Get Feature and Target Columns from the existing given Model
|
|
@@ -121,8 +122,59 @@ def proximity_model_local(model: "Model"):
|
|
|
121
122
|
model_ids = set(model_df[id_column])
|
|
122
123
|
full_df["in_model"] = full_df[id_column].isin(model_ids)
|
|
123
124
|
|
|
124
|
-
# Create and return the
|
|
125
|
-
return
|
|
125
|
+
# Create and return the FeatureSpaceProximity Model
|
|
126
|
+
return FeatureSpaceProximity(
|
|
127
|
+
full_df, id_column=id_column, features=features, target=target, include_all_columns=include_all_columns
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def fingerprint_prox_model_local(
|
|
132
|
+
model: "Model",
|
|
133
|
+
include_all_columns: bool = False,
|
|
134
|
+
radius: int = 2,
|
|
135
|
+
n_bits: int = 1024,
|
|
136
|
+
counts: bool = False,
|
|
137
|
+
):
|
|
138
|
+
"""Create a FingerprintProximity Model for this Model
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
model (Model): The Model used to create the fingerprint proximity model
|
|
142
|
+
include_all_columns (bool): Include all DataFrame columns in neighbor results (default: False)
|
|
143
|
+
radius (int): Morgan fingerprint radius (default: 2)
|
|
144
|
+
n_bits (int): Number of bits for the fingerprint (default: 1024)
|
|
145
|
+
counts (bool): Use count fingerprints instead of binary (default: False)
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
FingerprintProximity: The fingerprint proximity model
|
|
149
|
+
"""
|
|
150
|
+
from workbench.algorithms.dataframe.fingerprint_proximity import FingerprintProximity # noqa: F401
|
|
151
|
+
from workbench.api import Model, FeatureSet # noqa: F401 (avoid circular import)
|
|
152
|
+
|
|
153
|
+
# Get Target Column from the existing given Model
|
|
154
|
+
target = model.target()
|
|
155
|
+
|
|
156
|
+
# Backtrack our FeatureSet to get the ID column
|
|
157
|
+
fs = FeatureSet(model.get_input())
|
|
158
|
+
id_column = fs.id_column
|
|
159
|
+
|
|
160
|
+
# Create the Proximity Model from both the full FeatureSet and the Model training data
|
|
161
|
+
full_df = fs.pull_dataframe()
|
|
162
|
+
model_df = model.training_view().pull_dataframe()
|
|
163
|
+
|
|
164
|
+
# Mark rows that are in the model
|
|
165
|
+
model_ids = set(model_df[id_column])
|
|
166
|
+
full_df["in_model"] = full_df[id_column].isin(model_ids)
|
|
167
|
+
|
|
168
|
+
# Create and return the FingerprintProximity Model
|
|
169
|
+
return FingerprintProximity(
|
|
170
|
+
full_df,
|
|
171
|
+
id_column=id_column,
|
|
172
|
+
target=target,
|
|
173
|
+
include_all_columns=include_all_columns,
|
|
174
|
+
radius=radius,
|
|
175
|
+
n_bits=n_bits,
|
|
176
|
+
counts=counts,
|
|
177
|
+
)
|
|
126
178
|
|
|
127
179
|
|
|
128
180
|
def noise_model_local(model: "Model"):
|
|
@@ -157,13 +209,43 @@ def noise_model_local(model: "Model"):
|
|
|
157
209
|
return NoiseModel(full_df, id_column, features, target)
|
|
158
210
|
|
|
159
211
|
|
|
160
|
-
def
|
|
212
|
+
def cleanlab_model_local(model: "Model"):
|
|
213
|
+
"""Create a CleanlabModels instance for detecting data quality issues in a Model's training data.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
model (Model): The Model used to create the cleanlab models
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
CleanlabModels: Factory providing access to CleanLearning and Datalab models.
|
|
220
|
+
- clean_learning(): CleanLearning model with enhanced get_label_issues()
|
|
221
|
+
- datalab(): Datalab instance with report(), get_issues()
|
|
222
|
+
"""
|
|
223
|
+
from workbench.algorithms.models.cleanlab_model import create_cleanlab_model # noqa: F401 (avoid circular import)
|
|
224
|
+
from workbench.api import Model, FeatureSet # noqa: F401 (avoid circular import)
|
|
225
|
+
|
|
226
|
+
# Get Feature and Target Columns from the existing given Model
|
|
227
|
+
features = model.features()
|
|
228
|
+
target = model.target()
|
|
229
|
+
model_type = model.model_type
|
|
230
|
+
|
|
231
|
+
# Backtrack our FeatureSet to get the ID column
|
|
232
|
+
fs = FeatureSet(model.get_input())
|
|
233
|
+
id_column = fs.id_column
|
|
234
|
+
|
|
235
|
+
# Get the full FeatureSet data
|
|
236
|
+
full_df = fs.pull_dataframe()
|
|
237
|
+
|
|
238
|
+
# Create and return the CleanLearning model
|
|
239
|
+
return create_cleanlab_model(full_df, id_column, features, target, model_type=model_type)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def published_proximity_model(model: "Model", prox_model_name: str, include_all_columns: bool = False) -> "Model":
|
|
161
243
|
"""Create a published proximity model based on the given model
|
|
162
244
|
|
|
163
245
|
Args:
|
|
164
246
|
model (Model): The model to create the proximity model from
|
|
165
247
|
prox_model_name (str): The name of the proximity model to create
|
|
166
|
-
|
|
248
|
+
include_all_columns (bool): Include all DataFrame columns in results (default: False)
|
|
167
249
|
Returns:
|
|
168
250
|
Model: The proximity model
|
|
169
251
|
"""
|
|
@@ -186,7 +268,7 @@ def published_proximity_model(model: "Model", prox_model_name: str, track_column
|
|
|
186
268
|
description=f"Proximity Model for {model.name}",
|
|
187
269
|
tags=["proximity", model.name],
|
|
188
270
|
custom_script=script_path,
|
|
189
|
-
custom_args={"
|
|
271
|
+
custom_args={"include_all_columns": include_all_columns},
|
|
190
272
|
)
|
|
191
273
|
return prox_model
|
|
192
274
|
|
workbench/utils/pytorch_utils.py
CHANGED
|
@@ -75,7 +75,7 @@ if __name__ == "__main__":
|
|
|
75
75
|
from workbench.api import Model
|
|
76
76
|
|
|
77
77
|
# Test pulling CV results
|
|
78
|
-
model_name = "aqsol-pytorch
|
|
78
|
+
model_name = "aqsol-reg-pytorch"
|
|
79
79
|
print(f"Loading Workbench model: {model_name}")
|
|
80
80
|
model = Model(model_name)
|
|
81
81
|
print(f"Model Framework: {model.model_framework}")
|
workbench/utils/shap_utils.py
CHANGED
|
@@ -9,6 +9,7 @@ from typing import Optional, List, Tuple, Dict, Union
|
|
|
9
9
|
from workbench.utils.xgboost_model_utils import xgboost_model_from_s3
|
|
10
10
|
from workbench.utils.model_utils import load_category_mappings_from_s3
|
|
11
11
|
from workbench.utils.pandas_utils import convert_categorical_types
|
|
12
|
+
from workbench.model_script_utils.model_script_utils import decompress_features
|
|
12
13
|
|
|
13
14
|
# Set up the log
|
|
14
15
|
log = logging.getLogger("workbench")
|
|
@@ -111,61 +112,6 @@ def shap_values_data(
|
|
|
111
112
|
return result_df, feature_df
|
|
112
113
|
|
|
113
114
|
|
|
114
|
-
def decompress_features(
|
|
115
|
-
df: pd.DataFrame, features: List[str], compressed_features: List[str]
|
|
116
|
-
) -> Tuple[pd.DataFrame, List[str]]:
|
|
117
|
-
"""Prepare features for the XGBoost model
|
|
118
|
-
|
|
119
|
-
Args:
|
|
120
|
-
df (pd.DataFrame): The features DataFrame
|
|
121
|
-
features (List[str]): Full list of feature names
|
|
122
|
-
compressed_features (List[str]): List of feature names to decompress (bitstrings)
|
|
123
|
-
|
|
124
|
-
Returns:
|
|
125
|
-
pd.DataFrame: DataFrame with the decompressed features
|
|
126
|
-
List[str]: Updated list of feature names after decompression
|
|
127
|
-
|
|
128
|
-
Raises:
|
|
129
|
-
ValueError: If any missing values are found in the specified features
|
|
130
|
-
"""
|
|
131
|
-
|
|
132
|
-
# Check for any missing values in the required features
|
|
133
|
-
missing_counts = df[features].isna().sum()
|
|
134
|
-
if missing_counts.any():
|
|
135
|
-
missing_features = missing_counts[missing_counts > 0]
|
|
136
|
-
print(
|
|
137
|
-
f"WARNING: Found missing values in features: {missing_features.to_dict()}. "
|
|
138
|
-
"WARNING: You might want to remove/replace all NaN values before processing."
|
|
139
|
-
)
|
|
140
|
-
|
|
141
|
-
# Decompress the specified compressed features
|
|
142
|
-
decompressed_features = features
|
|
143
|
-
for feature in compressed_features:
|
|
144
|
-
if (feature not in df.columns) or (feature not in features):
|
|
145
|
-
print(f"Feature '{feature}' not in the features list, skipping decompression.")
|
|
146
|
-
continue
|
|
147
|
-
|
|
148
|
-
# Remove the feature from the list of features to avoid duplication
|
|
149
|
-
decompressed_features.remove(feature)
|
|
150
|
-
|
|
151
|
-
# Handle all compressed features as bitstrings
|
|
152
|
-
bit_matrix = np.array([list(bitstring) for bitstring in df[feature]], dtype=np.uint8)
|
|
153
|
-
prefix = feature[:3]
|
|
154
|
-
|
|
155
|
-
# Create all new columns at once - avoids fragmentation
|
|
156
|
-
new_col_names = [f"{prefix}_{i}" for i in range(bit_matrix.shape[1])]
|
|
157
|
-
new_df = pd.DataFrame(bit_matrix, columns=new_col_names, index=df.index)
|
|
158
|
-
|
|
159
|
-
# Add to features list
|
|
160
|
-
decompressed_features.extend(new_col_names)
|
|
161
|
-
|
|
162
|
-
# Drop original column and concatenate new ones
|
|
163
|
-
df = df.drop(columns=[feature])
|
|
164
|
-
df = pd.concat([df, new_df], axis=1)
|
|
165
|
-
|
|
166
|
-
return df, decompressed_features
|
|
167
|
-
|
|
168
|
-
|
|
169
115
|
def _calculate_shap_values(workbench_model, sample_df: pd.DataFrame = None):
|
|
170
116
|
"""
|
|
171
117
|
Internal function to calculate SHAP values for Workbench Models.
|
|
@@ -420,21 +420,17 @@ if __name__ == "__main__":
|
|
|
420
420
|
df = pd.DataFrame(data)
|
|
421
421
|
|
|
422
422
|
# Get a UQ regressor model
|
|
423
|
-
|
|
424
|
-
# end = Endpoint("aqsol-uq")
|
|
425
|
-
# df = end.auto_inference()
|
|
426
|
-
# DFStore().upsert("/workbench/models/aqsol-uq/auto_inference", df)
|
|
423
|
+
from workbench.api import Model
|
|
427
424
|
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
df = DFStore().get("/workbench/models/aqsol-uq-100/full_cross_fold_inference")
|
|
425
|
+
model = Model("logd-reg-xgb")
|
|
426
|
+
df = model.get_inference_predictions("full_cross_fold")
|
|
431
427
|
|
|
432
428
|
# Run the Unit Test on the Plugin
|
|
433
429
|
PluginUnitTest(
|
|
434
430
|
ScatterPlot,
|
|
435
431
|
input_data=df,
|
|
436
432
|
theme="midnight_blue",
|
|
437
|
-
x="
|
|
433
|
+
x="logd",
|
|
438
434
|
y="prediction",
|
|
439
435
|
color="prediction_std",
|
|
440
436
|
suppress_hover_display=True,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: workbench
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.219
|
|
4
4
|
Summary: Workbench: A Dashboard and Python API for creating and deploying AWS SageMaker Model Pipelines
|
|
5
5
|
Author-email: SuperCowPowers LLC <support@supercowpowers.com>
|
|
6
6
|
License: MIT License
|
|
@@ -47,6 +47,7 @@ Requires-Dist: cryptography>=44.0.2
|
|
|
47
47
|
Requires-Dist: ipython>=8.37.0
|
|
48
48
|
Requires-Dist: pyreadline3; sys_platform == "win32"
|
|
49
49
|
Requires-Dist: scikit-learn>=1.5.2
|
|
50
|
+
Requires-Dist: umap-learn>=0.5.8
|
|
50
51
|
Requires-Dist: xgboost>=3.0.3
|
|
51
52
|
Requires-Dist: joblib>=1.3.2
|
|
52
53
|
Requires-Dist: requests>=2.26.0
|