workbench 0.8.192__py3-none-any.whl → 0.8.193__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- workbench/core/artifacts/model_core.py +29 -42
- workbench/core/transforms/features_to_model/features_to_model.py +2 -2
- workbench/model_scripts/xgb_model/generated_model_script.py +468 -0
- workbench/utils/model_utils.py +15 -3
- workbench/utils/xgboost_model_utils.py +24 -13
- workbench/web_interface/components/model_plot.py +7 -1
- workbench/web_interface/components/plugins/model_details.py +7 -2
- {workbench-0.8.192.dist-info → workbench-0.8.193.dist-info}/METADATA +23 -2
- {workbench-0.8.192.dist-info → workbench-0.8.193.dist-info}/RECORD +13 -12
- {workbench-0.8.192.dist-info → workbench-0.8.193.dist-info}/licenses/LICENSE +1 -1
- {workbench-0.8.192.dist-info → workbench-0.8.193.dist-info}/WHEEL +0 -0
- {workbench-0.8.192.dist-info → workbench-0.8.193.dist-info}/entry_points.txt +0 -0
- {workbench-0.8.192.dist-info → workbench-0.8.193.dist-info}/top_level.txt +0 -0
|
@@ -41,52 +41,39 @@ class ModelType(Enum):
|
|
|
41
41
|
class ModelImages:
|
|
42
42
|
"""Class for retrieving workbench inference images"""
|
|
43
43
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
),
|
|
55
|
-
("us-east-1", "pytorch_inference", "0.1", "x86_64"): (
|
|
56
|
-
"507740646243.dkr.ecr.us-east-1.amazonaws.com/aws-ml-images/py312-pytorch-inference:0.1"
|
|
57
|
-
),
|
|
58
|
-
# US West 2 images
|
|
59
|
-
("us-west-2", "training", "0.1", "x86_64"): (
|
|
60
|
-
"507740646243.dkr.ecr.us-west-2.amazonaws.com/aws-ml-images/py312-general-ml-training:0.1"
|
|
61
|
-
),
|
|
62
|
-
("us-west-2", "inference", "0.1", "x86_64"): (
|
|
63
|
-
"507740646243.dkr.ecr.us-west-2.amazonaws.com/aws-ml-images/py312-general-ml-inference:0.1"
|
|
64
|
-
),
|
|
65
|
-
("us-west-2", "pytorch_training", "0.1", "x86_64"): (
|
|
66
|
-
"507740646243.dkr.ecr.us-west-2.amazonaws.com/aws-ml-images/py312-pytorch-training:0.1"
|
|
67
|
-
),
|
|
68
|
-
("us-west-2", "pytorch_inference", "0.1", "x86_64"): (
|
|
69
|
-
"507740646243.dkr.ecr.us-west-2.amazonaws.com/aws-ml-images/py312-pytorch-inference:0.1"
|
|
70
|
-
),
|
|
71
|
-
# ARM64 images
|
|
72
|
-
# Meta Endpoint inference images
|
|
73
|
-
("us-east-1", "meta-endpoint", "0.1", "x86_64"): (
|
|
74
|
-
"507740646243.dkr.ecr.us-east-1.amazonaws.com/aws-ml-images/py312-meta-endpoint:0.1"
|
|
75
|
-
),
|
|
76
|
-
("us-west-2", "meta-endpoint", "0.1", "x86_64"): (
|
|
77
|
-
"507740646243.dkr.ecr.us-west-2.amazonaws.com/aws-ml-images/py312-meta-endpoint:0.1"
|
|
78
|
-
),
|
|
44
|
+
# Account ID
|
|
45
|
+
ACCOUNT_ID = "507740646243"
|
|
46
|
+
|
|
47
|
+
# Image name mappings
|
|
48
|
+
IMAGE_NAMES = {
|
|
49
|
+
"training": "py312-general-ml-training",
|
|
50
|
+
"inference": "py312-general-ml-inference",
|
|
51
|
+
"pytorch_training": "py312-pytorch-training",
|
|
52
|
+
"pytorch_inference": "py312-pytorch-inference",
|
|
53
|
+
"meta-endpoint": "py312-meta-endpoint",
|
|
79
54
|
}
|
|
80
55
|
|
|
81
56
|
@classmethod
|
|
82
57
|
def get_image_uri(cls, region, image_type, version="0.1", architecture="x86_64"):
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
)
|
|
58
|
+
"""
|
|
59
|
+
Dynamically construct ECR image URI.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
region: AWS region (e.g., 'us-east-1', 'us-west-2')
|
|
63
|
+
image_type: Type of image (e.g., 'training', 'inference', 'pytorch_training')
|
|
64
|
+
version: Image version (e.g., '0.1', '0.2')
|
|
65
|
+
architecture: CPU architecture (default: 'x86_64', currently unused but kept for compatibility)
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
ECR image URI string
|
|
69
|
+
"""
|
|
70
|
+
if image_type not in cls.IMAGE_NAMES:
|
|
71
|
+
raise ValueError(f"Unknown image_type: {image_type}. Valid types: {list(cls.IMAGE_NAMES.keys())}")
|
|
72
|
+
|
|
73
|
+
image_name = cls.IMAGE_NAMES[image_type]
|
|
74
|
+
uri = f"{cls.ACCOUNT_ID}.dkr.ecr.{region}.amazonaws.com/aws-ml-images/{image_name}:{version}"
|
|
75
|
+
|
|
76
|
+
return uri
|
|
90
77
|
|
|
91
78
|
|
|
92
79
|
class ModelCore(Artifact):
|
|
@@ -233,7 +233,7 @@ class FeaturesToModel(Transform):
|
|
|
233
233
|
source_dir = str(Path(script_path).parent)
|
|
234
234
|
|
|
235
235
|
# Create a Sagemaker Model with our script
|
|
236
|
-
image = ModelImages.get_image_uri(self.sm_session.boto_region_name, self.training_image, "0.
|
|
236
|
+
image = ModelImages.get_image_uri(self.sm_session.boto_region_name, self.training_image, "0.2")
|
|
237
237
|
self.estimator = Estimator(
|
|
238
238
|
entry_point=entry_point,
|
|
239
239
|
source_dir=source_dir,
|
|
@@ -306,7 +306,7 @@ class FeaturesToModel(Transform):
|
|
|
306
306
|
|
|
307
307
|
# Register our model
|
|
308
308
|
image = ModelImages.get_image_uri(
|
|
309
|
-
self.sm_session.boto_region_name, self.inference_image, "0.
|
|
309
|
+
self.sm_session.boto_region_name, self.inference_image, "0.2", self.inference_arch
|
|
310
310
|
)
|
|
311
311
|
self.log.important(f"Registering model {self.output_name} with Inference Image {image}...")
|
|
312
312
|
model = self.estimator.create_model(role=self.workbench_role_arn)
|
|
@@ -0,0 +1,468 @@
|
|
|
1
|
+
# Imports for XGB Model
|
|
2
|
+
import xgboost as xgb
|
|
3
|
+
import awswrangler as wr
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
# Model Performance Scores
|
|
7
|
+
from sklearn.metrics import (
|
|
8
|
+
mean_absolute_error,
|
|
9
|
+
r2_score,
|
|
10
|
+
root_mean_squared_error,
|
|
11
|
+
precision_recall_fscore_support,
|
|
12
|
+
confusion_matrix,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
# Classification Encoder
|
|
16
|
+
from sklearn.preprocessing import LabelEncoder
|
|
17
|
+
|
|
18
|
+
# Scikit Learn Imports
|
|
19
|
+
from sklearn.model_selection import train_test_split
|
|
20
|
+
|
|
21
|
+
from io import StringIO
|
|
22
|
+
import json
|
|
23
|
+
import argparse
|
|
24
|
+
import joblib
|
|
25
|
+
import os
|
|
26
|
+
import pandas as pd
|
|
27
|
+
from typing import List, Tuple
|
|
28
|
+
|
|
29
|
+
# Template Parameters
|
|
30
|
+
TEMPLATE_PARAMS = {
|
|
31
|
+
"model_type": "regressor",
|
|
32
|
+
"target": "solubility",
|
|
33
|
+
"features": ['molwt', 'mollogp', 'molmr', 'heavyatomcount', 'numhacceptors', 'numhdonors', 'numheteroatoms', 'numrotatablebonds', 'numvalenceelectrons', 'numaromaticrings', 'numsaturatedrings', 'numaliphaticrings', 'ringcount', 'tpsa', 'labuteasa', 'balabanj', 'bertzct'],
|
|
34
|
+
"compressed_features": [],
|
|
35
|
+
"model_metrics_s3_path": "s3://sandbox-sageworks-artifacts/models/aqsol-regression/training",
|
|
36
|
+
"train_all_data": False,
|
|
37
|
+
"hyperparameters": {},
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# Function to check if dataframe is empty
|
|
42
|
+
def check_dataframe(df: pd.DataFrame, df_name: str) -> None:
|
|
43
|
+
"""
|
|
44
|
+
Check if the provided dataframe is empty and raise an exception if it is.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
df (pd.DataFrame): DataFrame to check
|
|
48
|
+
df_name (str): Name of the DataFrame
|
|
49
|
+
"""
|
|
50
|
+
if df.empty:
|
|
51
|
+
msg = f"*** The training data {df_name} has 0 rows! ***STOPPING***"
|
|
52
|
+
print(msg)
|
|
53
|
+
raise ValueError(msg)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def expand_proba_column(df: pd.DataFrame, class_labels: List[str]) -> pd.DataFrame:
|
|
57
|
+
"""
|
|
58
|
+
Expands a column in a DataFrame containing a list of probabilities into separate columns.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
df (pd.DataFrame): DataFrame containing a "pred_proba" column
|
|
62
|
+
class_labels (List[str]): List of class labels
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
pd.DataFrame: DataFrame with the "pred_proba" expanded into separate columns
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
# Sanity check
|
|
69
|
+
proba_column = "pred_proba"
|
|
70
|
+
if proba_column not in df.columns:
|
|
71
|
+
raise ValueError('DataFrame does not contain a "pred_proba" column')
|
|
72
|
+
|
|
73
|
+
# Construct new column names with '_proba' suffix
|
|
74
|
+
proba_splits = [f"{label}_proba" for label in class_labels]
|
|
75
|
+
|
|
76
|
+
# Expand the proba_column into separate columns for each probability
|
|
77
|
+
proba_df = pd.DataFrame(df[proba_column].tolist(), columns=proba_splits)
|
|
78
|
+
|
|
79
|
+
# Drop any proba columns and reset the index in prep for the concat
|
|
80
|
+
df = df.drop(columns=[proba_column] + proba_splits, errors="ignore")
|
|
81
|
+
df = df.reset_index(drop=True)
|
|
82
|
+
|
|
83
|
+
# Concatenate the new columns with the original DataFrame
|
|
84
|
+
df = pd.concat([df, proba_df], axis=1)
|
|
85
|
+
print(df)
|
|
86
|
+
return df
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def match_features_case_insensitive(df: pd.DataFrame, model_features: list) -> pd.DataFrame:
|
|
90
|
+
"""
|
|
91
|
+
Matches and renames DataFrame columns to match model feature names (case-insensitive).
|
|
92
|
+
Prioritizes exact matches, then case-insensitive matches.
|
|
93
|
+
|
|
94
|
+
Raises ValueError if any model features cannot be matched.
|
|
95
|
+
"""
|
|
96
|
+
df_columns_lower = {col.lower(): col for col in df.columns}
|
|
97
|
+
rename_dict = {}
|
|
98
|
+
missing = []
|
|
99
|
+
for feature in model_features:
|
|
100
|
+
if feature in df.columns:
|
|
101
|
+
continue # Exact match
|
|
102
|
+
elif feature.lower() in df_columns_lower:
|
|
103
|
+
rename_dict[df_columns_lower[feature.lower()]] = feature
|
|
104
|
+
else:
|
|
105
|
+
missing.append(feature)
|
|
106
|
+
|
|
107
|
+
if missing:
|
|
108
|
+
raise ValueError(f"Features not found: {missing}")
|
|
109
|
+
|
|
110
|
+
# Rename the DataFrame columns to match the model features
|
|
111
|
+
return df.rename(columns=rename_dict)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def convert_categorical_types(df: pd.DataFrame, features: list, category_mappings={}) -> tuple:
|
|
115
|
+
"""
|
|
116
|
+
Converts appropriate columns to categorical type with consistent mappings.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
df (pd.DataFrame): The DataFrame to process.
|
|
120
|
+
features (list): List of feature names to consider for conversion.
|
|
121
|
+
category_mappings (dict, optional): Existing category mappings. If empty dict, we're in
|
|
122
|
+
training mode. If populated, we're in inference mode.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
tuple: (processed DataFrame, category mappings dictionary)
|
|
126
|
+
"""
|
|
127
|
+
# Training mode
|
|
128
|
+
if category_mappings == {}:
|
|
129
|
+
for col in df.select_dtypes(include=["object", "string"]):
|
|
130
|
+
if col in features and df[col].nunique() < 20:
|
|
131
|
+
print(f"Training mode: Converting {col} to category")
|
|
132
|
+
df[col] = df[col].astype("category")
|
|
133
|
+
category_mappings[col] = df[col].cat.categories.tolist() # Store category mappings
|
|
134
|
+
|
|
135
|
+
# Inference mode
|
|
136
|
+
else:
|
|
137
|
+
for col, categories in category_mappings.items():
|
|
138
|
+
if col in df.columns:
|
|
139
|
+
print(f"Inference mode: Applying categorical mapping for {col}")
|
|
140
|
+
df[col] = pd.Categorical(df[col], categories=categories) # Apply consistent categorical mapping
|
|
141
|
+
|
|
142
|
+
return df, category_mappings
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def decompress_features(
|
|
146
|
+
df: pd.DataFrame, features: List[str], compressed_features: List[str]
|
|
147
|
+
) -> Tuple[pd.DataFrame, List[str]]:
|
|
148
|
+
"""Prepare features for the model by decompressing bitstring features
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
df (pd.DataFrame): The features DataFrame
|
|
152
|
+
features (List[str]): Full list of feature names
|
|
153
|
+
compressed_features (List[str]): List of feature names to decompress (bitstrings)
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
pd.DataFrame: DataFrame with the decompressed features
|
|
157
|
+
List[str]: Updated list of feature names after decompression
|
|
158
|
+
|
|
159
|
+
Raises:
|
|
160
|
+
ValueError: If any missing values are found in the specified features
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
# Check for any missing values in the required features
|
|
164
|
+
missing_counts = df[features].isna().sum()
|
|
165
|
+
if missing_counts.any():
|
|
166
|
+
missing_features = missing_counts[missing_counts > 0]
|
|
167
|
+
print(
|
|
168
|
+
f"WARNING: Found missing values in features: {missing_features.to_dict()}. "
|
|
169
|
+
"WARNING: You might want to remove/replace all NaN values before processing."
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# Decompress the specified compressed features
|
|
173
|
+
decompressed_features = features.copy()
|
|
174
|
+
for feature in compressed_features:
|
|
175
|
+
if (feature not in df.columns) or (feature not in features):
|
|
176
|
+
print(f"Feature '{feature}' not in the features list, skipping decompression.")
|
|
177
|
+
continue
|
|
178
|
+
|
|
179
|
+
# Remove the feature from the list of features to avoid duplication
|
|
180
|
+
decompressed_features.remove(feature)
|
|
181
|
+
|
|
182
|
+
# Handle all compressed features as bitstrings
|
|
183
|
+
bit_matrix = np.array([list(bitstring) for bitstring in df[feature]], dtype=np.uint8)
|
|
184
|
+
prefix = feature[:3]
|
|
185
|
+
|
|
186
|
+
# Create all new columns at once - avoids fragmentation
|
|
187
|
+
new_col_names = [f"{prefix}_{i}" for i in range(bit_matrix.shape[1])]
|
|
188
|
+
new_df = pd.DataFrame(bit_matrix, columns=new_col_names, index=df.index)
|
|
189
|
+
|
|
190
|
+
# Add to features list
|
|
191
|
+
decompressed_features.extend(new_col_names)
|
|
192
|
+
|
|
193
|
+
# Drop original column and concatenate new ones
|
|
194
|
+
df = df.drop(columns=[feature])
|
|
195
|
+
df = pd.concat([df, new_df], axis=1)
|
|
196
|
+
|
|
197
|
+
return df, decompressed_features
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
if __name__ == "__main__":
|
|
201
|
+
"""The main function is for training the XGBoost model"""
|
|
202
|
+
|
|
203
|
+
# Harness Template Parameters
|
|
204
|
+
target = TEMPLATE_PARAMS["target"]
|
|
205
|
+
features = TEMPLATE_PARAMS["features"]
|
|
206
|
+
orig_features = features.copy()
|
|
207
|
+
compressed_features = TEMPLATE_PARAMS["compressed_features"]
|
|
208
|
+
model_type = TEMPLATE_PARAMS["model_type"]
|
|
209
|
+
model_metrics_s3_path = TEMPLATE_PARAMS["model_metrics_s3_path"]
|
|
210
|
+
train_all_data = TEMPLATE_PARAMS["train_all_data"]
|
|
211
|
+
hyperparameters = TEMPLATE_PARAMS["hyperparameters"]
|
|
212
|
+
validation_split = 0.2
|
|
213
|
+
|
|
214
|
+
# Script arguments for input/output directories
|
|
215
|
+
parser = argparse.ArgumentParser()
|
|
216
|
+
parser.add_argument("--model-dir", type=str, default=os.environ.get("SM_MODEL_DIR", "/opt/ml/model"))
|
|
217
|
+
parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAIN", "/opt/ml/input/data/train"))
|
|
218
|
+
parser.add_argument(
|
|
219
|
+
"--output-data-dir", type=str, default=os.environ.get("SM_OUTPUT_DATA_DIR", "/opt/ml/output/data")
|
|
220
|
+
)
|
|
221
|
+
args = parser.parse_args()
|
|
222
|
+
|
|
223
|
+
# Read the training data into DataFrames
|
|
224
|
+
training_files = [os.path.join(args.train, file) for file in os.listdir(args.train) if file.endswith(".csv")]
|
|
225
|
+
print(f"Training Files: {training_files}")
|
|
226
|
+
|
|
227
|
+
# Combine files and read them all into a single pandas dataframe
|
|
228
|
+
all_df = pd.concat([pd.read_csv(file, engine="python") for file in training_files])
|
|
229
|
+
|
|
230
|
+
# Check if the dataframe is empty
|
|
231
|
+
check_dataframe(all_df, "training_df")
|
|
232
|
+
|
|
233
|
+
# Features/Target output
|
|
234
|
+
print(f"Target: {target}")
|
|
235
|
+
print(f"Features: {str(features)}")
|
|
236
|
+
|
|
237
|
+
# Convert any features that might be categorical to 'category' type
|
|
238
|
+
all_df, category_mappings = convert_categorical_types(all_df, features)
|
|
239
|
+
|
|
240
|
+
# If we have compressed features, decompress them
|
|
241
|
+
if compressed_features:
|
|
242
|
+
print(f"Decompressing features {compressed_features}...")
|
|
243
|
+
all_df, features = decompress_features(all_df, features, compressed_features)
|
|
244
|
+
|
|
245
|
+
# Do we want to train on all the data?
|
|
246
|
+
if train_all_data:
|
|
247
|
+
print("Training on ALL of the data")
|
|
248
|
+
df_train = all_df.copy()
|
|
249
|
+
df_val = all_df.copy()
|
|
250
|
+
|
|
251
|
+
# Does the dataframe have a training column?
|
|
252
|
+
elif "training" in all_df.columns:
|
|
253
|
+
print("Found training column, splitting data based on training column")
|
|
254
|
+
df_train = all_df[all_df["training"]]
|
|
255
|
+
df_val = all_df[~all_df["training"]]
|
|
256
|
+
else:
|
|
257
|
+
# Just do a random training Split
|
|
258
|
+
print("WARNING: No training column found, splitting data with random state=42")
|
|
259
|
+
df_train, df_val = train_test_split(all_df, test_size=validation_split, random_state=42)
|
|
260
|
+
print(f"FIT/TRAIN: {df_train.shape}")
|
|
261
|
+
print(f"VALIDATION: {df_val.shape}")
|
|
262
|
+
|
|
263
|
+
# Use any hyperparameters to set up both the trainer and model configurations
|
|
264
|
+
print(f"Hyperparameters: {hyperparameters}")
|
|
265
|
+
|
|
266
|
+
# Now spin up our XGB Model
|
|
267
|
+
if model_type == "classifier":
|
|
268
|
+
xgb_model = xgb.XGBClassifier(enable_categorical=True, **hyperparameters)
|
|
269
|
+
|
|
270
|
+
# Encode the target column
|
|
271
|
+
label_encoder = LabelEncoder()
|
|
272
|
+
df_train[target] = label_encoder.fit_transform(df_train[target])
|
|
273
|
+
df_val[target] = label_encoder.transform(df_val[target])
|
|
274
|
+
|
|
275
|
+
else:
|
|
276
|
+
xgb_model = xgb.XGBRegressor(enable_categorical=True, **hyperparameters)
|
|
277
|
+
label_encoder = None # We don't need this for regression
|
|
278
|
+
|
|
279
|
+
# Grab our Features, Target and Train the Model
|
|
280
|
+
y_train = df_train[target]
|
|
281
|
+
X_train = df_train[features]
|
|
282
|
+
xgb_model.fit(X_train, y_train)
|
|
283
|
+
|
|
284
|
+
# Make Predictions on the Validation Set
|
|
285
|
+
print(f"Making Predictions on Validation Set...")
|
|
286
|
+
y_validate = df_val[target]
|
|
287
|
+
X_validate = df_val[features]
|
|
288
|
+
preds = xgb_model.predict(X_validate)
|
|
289
|
+
if model_type == "classifier":
|
|
290
|
+
# Also get the probabilities for each class
|
|
291
|
+
print("Processing Probabilities...")
|
|
292
|
+
probs = xgb_model.predict_proba(X_validate)
|
|
293
|
+
df_val["pred_proba"] = [p.tolist() for p in probs]
|
|
294
|
+
|
|
295
|
+
# Expand the pred_proba column into separate columns for each class
|
|
296
|
+
print(df_val.columns)
|
|
297
|
+
df_val = expand_proba_column(df_val, label_encoder.classes_)
|
|
298
|
+
print(df_val.columns)
|
|
299
|
+
|
|
300
|
+
# Decode the target and prediction labels
|
|
301
|
+
y_validate = label_encoder.inverse_transform(y_validate)
|
|
302
|
+
preds = label_encoder.inverse_transform(preds)
|
|
303
|
+
|
|
304
|
+
# Save predictions to S3 (just the target, prediction, and '_proba' columns)
|
|
305
|
+
df_val["prediction"] = preds
|
|
306
|
+
output_columns = [target, "prediction"]
|
|
307
|
+
output_columns += [col for col in df_val.columns if col.endswith("_proba")]
|
|
308
|
+
wr.s3.to_csv(
|
|
309
|
+
df_val[output_columns],
|
|
310
|
+
path=f"{model_metrics_s3_path}/validation_predictions.csv",
|
|
311
|
+
index=False,
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
# Report Performance Metrics
|
|
315
|
+
if model_type == "classifier":
|
|
316
|
+
# Get the label names and their integer mapping
|
|
317
|
+
label_names = label_encoder.classes_
|
|
318
|
+
|
|
319
|
+
# Calculate various model performance metrics
|
|
320
|
+
scores = precision_recall_fscore_support(y_validate, preds, average=None, labels=label_names)
|
|
321
|
+
|
|
322
|
+
# Put the scores into a dataframe
|
|
323
|
+
score_df = pd.DataFrame(
|
|
324
|
+
{
|
|
325
|
+
target: label_names,
|
|
326
|
+
"precision": scores[0],
|
|
327
|
+
"recall": scores[1],
|
|
328
|
+
"fscore": scores[2],
|
|
329
|
+
"support": scores[3],
|
|
330
|
+
}
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
# We need to get creative with the Classification Metrics
|
|
334
|
+
metrics = ["precision", "recall", "fscore", "support"]
|
|
335
|
+
for t in label_names:
|
|
336
|
+
for m in metrics:
|
|
337
|
+
value = score_df.loc[score_df[target] == t, m].iloc[0]
|
|
338
|
+
print(f"Metrics:{t}:{m} {value}")
|
|
339
|
+
|
|
340
|
+
# Compute and output the confusion matrix
|
|
341
|
+
conf_mtx = confusion_matrix(y_validate, preds, labels=label_names)
|
|
342
|
+
for i, row_name in enumerate(label_names):
|
|
343
|
+
for j, col_name in enumerate(label_names):
|
|
344
|
+
value = conf_mtx[i, j]
|
|
345
|
+
print(f"ConfusionMatrix:{row_name}:{col_name} {value}")
|
|
346
|
+
|
|
347
|
+
else:
|
|
348
|
+
# Calculate various model performance metrics (regression)
|
|
349
|
+
rmse = root_mean_squared_error(y_validate, preds)
|
|
350
|
+
mae = mean_absolute_error(y_validate, preds)
|
|
351
|
+
r2 = r2_score(y_validate, preds)
|
|
352
|
+
print(f"RMSE: {rmse:.3f}")
|
|
353
|
+
print(f"MAE: {mae:.3f}")
|
|
354
|
+
print(f"R2: {r2:.3f}")
|
|
355
|
+
print(f"NumRows: {len(df_val)}")
|
|
356
|
+
|
|
357
|
+
# Now save the model to the standard place/name
|
|
358
|
+
joblib.dump(xgb_model, os.path.join(args.model_dir, "xgb_model.joblib"))
|
|
359
|
+
|
|
360
|
+
# Save the label encoder if we have one
|
|
361
|
+
if label_encoder:
|
|
362
|
+
joblib.dump(label_encoder, os.path.join(args.model_dir, "label_encoder.joblib"))
|
|
363
|
+
|
|
364
|
+
# Save the features (this will validate input during predictions)
|
|
365
|
+
with open(os.path.join(args.model_dir, "feature_columns.json"), "w") as fp:
|
|
366
|
+
json.dump(orig_features, fp) # We save the original features, not the decompressed ones
|
|
367
|
+
|
|
368
|
+
# Save the category mappings
|
|
369
|
+
with open(os.path.join(args.model_dir, "category_mappings.json"), "w") as fp:
|
|
370
|
+
json.dump(category_mappings, fp)
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def model_fn(model_dir):
|
|
374
|
+
"""Deserialize and return fitted XGBoost model"""
|
|
375
|
+
model_path = os.path.join(model_dir, "xgb_model.joblib")
|
|
376
|
+
model = joblib.load(model_path)
|
|
377
|
+
return model
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def input_fn(input_data, content_type):
|
|
381
|
+
"""Parse input data and return a DataFrame."""
|
|
382
|
+
if not input_data:
|
|
383
|
+
raise ValueError("Empty input data is not supported!")
|
|
384
|
+
|
|
385
|
+
# Decode bytes to string if necessary
|
|
386
|
+
if isinstance(input_data, bytes):
|
|
387
|
+
input_data = input_data.decode("utf-8")
|
|
388
|
+
|
|
389
|
+
if "text/csv" in content_type:
|
|
390
|
+
return pd.read_csv(StringIO(input_data))
|
|
391
|
+
elif "application/json" in content_type:
|
|
392
|
+
return pd.DataFrame(json.loads(input_data)) # Assumes JSON array of records
|
|
393
|
+
else:
|
|
394
|
+
raise ValueError(f"{content_type} not supported!")
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def output_fn(output_df, accept_type):
|
|
398
|
+
"""Supports both CSV and JSON output formats."""
|
|
399
|
+
if "text/csv" in accept_type:
|
|
400
|
+
csv_output = output_df.fillna("N/A").to_csv(index=False) # CSV with N/A for missing values
|
|
401
|
+
return csv_output, "text/csv"
|
|
402
|
+
elif "application/json" in accept_type:
|
|
403
|
+
return output_df.to_json(orient="records"), "application/json" # JSON array of records (NaNs -> null)
|
|
404
|
+
else:
|
|
405
|
+
raise RuntimeError(f"{accept_type} accept type is not supported by this script.")
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
def predict_fn(df, model) -> pd.DataFrame:
|
|
409
|
+
"""Make Predictions with our XGB Model
|
|
410
|
+
|
|
411
|
+
Args:
|
|
412
|
+
df (pd.DataFrame): The input DataFrame
|
|
413
|
+
model: The model use for predictions
|
|
414
|
+
|
|
415
|
+
Returns:
|
|
416
|
+
pd.DataFrame: The DataFrame with the predictions added
|
|
417
|
+
"""
|
|
418
|
+
compressed_features = TEMPLATE_PARAMS["compressed_features"]
|
|
419
|
+
|
|
420
|
+
# Grab our feature columns (from training)
|
|
421
|
+
model_dir = os.environ.get("SM_MODEL_DIR", "/opt/ml/model")
|
|
422
|
+
with open(os.path.join(model_dir, "feature_columns.json")) as fp:
|
|
423
|
+
features = json.load(fp)
|
|
424
|
+
print(f"Model Features: {features}")
|
|
425
|
+
|
|
426
|
+
# Load the category mappings (from training)
|
|
427
|
+
with open(os.path.join(model_dir, "category_mappings.json")) as fp:
|
|
428
|
+
category_mappings = json.load(fp)
|
|
429
|
+
|
|
430
|
+
# Load our Label Encoder if we have one
|
|
431
|
+
label_encoder = None
|
|
432
|
+
if os.path.exists(os.path.join(model_dir, "label_encoder.joblib")):
|
|
433
|
+
label_encoder = joblib.load(os.path.join(model_dir, "label_encoder.joblib"))
|
|
434
|
+
|
|
435
|
+
# We're going match features in a case-insensitive manner, accounting for all the permutations
|
|
436
|
+
# - Model has a feature list that's any case ("Id", "taCos", "cOunT", "likes_tacos")
|
|
437
|
+
# - Incoming data has columns that are mixed case ("ID", "Tacos", "Count", "Likes_Tacos")
|
|
438
|
+
matched_df = match_features_case_insensitive(df, features)
|
|
439
|
+
|
|
440
|
+
# Detect categorical types in the incoming DataFrame
|
|
441
|
+
matched_df, _ = convert_categorical_types(matched_df, features, category_mappings)
|
|
442
|
+
|
|
443
|
+
# If we have compressed features, decompress them
|
|
444
|
+
if compressed_features:
|
|
445
|
+
print("Decompressing features for prediction...")
|
|
446
|
+
matched_df, features = decompress_features(matched_df, features, compressed_features)
|
|
447
|
+
|
|
448
|
+
# Predict the features against our XGB Model
|
|
449
|
+
X = matched_df[features]
|
|
450
|
+
predictions = model.predict(X)
|
|
451
|
+
|
|
452
|
+
# If we have a label encoder, decode the predictions
|
|
453
|
+
if label_encoder:
|
|
454
|
+
predictions = label_encoder.inverse_transform(predictions)
|
|
455
|
+
|
|
456
|
+
# Set the predictions on the DataFrame
|
|
457
|
+
df["prediction"] = predictions
|
|
458
|
+
|
|
459
|
+
# Does our model have a 'predict_proba' method? If so we will call it and add the results to the DataFrame
|
|
460
|
+
if getattr(model, "predict_proba", None):
|
|
461
|
+
probs = model.predict_proba(matched_df[features])
|
|
462
|
+
df["pred_proba"] = [p.tolist() for p in probs]
|
|
463
|
+
|
|
464
|
+
# Expand the pred_proba column into separate columns for each class
|
|
465
|
+
df = expand_proba_column(df, label_encoder.classes_)
|
|
466
|
+
|
|
467
|
+
# All done, return the DataFrame with new columns for the predictions
|
|
468
|
+
return df
|
workbench/utils/model_utils.py
CHANGED
|
@@ -188,6 +188,20 @@ def uq_model(model: "Model", uq_model_name: str, train_all_data: bool = False) -
|
|
|
188
188
|
return uq_model
|
|
189
189
|
|
|
190
190
|
|
|
191
|
+
def safe_extract_tarfile(tar_path: str, extract_path: str) -> None:
|
|
192
|
+
"""
|
|
193
|
+
Extract a tarball safely, using data filter if available.
|
|
194
|
+
|
|
195
|
+
The filter parameter was backported to Python 3.8+, 3.9+, 3.10.13+, 3.11+
|
|
196
|
+
as a security patch, but may not be present in older patch versions.
|
|
197
|
+
"""
|
|
198
|
+
with tarfile.open(tar_path, "r:gz") as tar:
|
|
199
|
+
if hasattr(tarfile, "data_filter"):
|
|
200
|
+
tar.extractall(path=extract_path, filter="data")
|
|
201
|
+
else:
|
|
202
|
+
tar.extractall(path=extract_path)
|
|
203
|
+
|
|
204
|
+
|
|
191
205
|
def load_category_mappings_from_s3(model_artifact_uri: str) -> Optional[dict]:
|
|
192
206
|
"""
|
|
193
207
|
Download and extract category mappings from a model artifact in S3.
|
|
@@ -206,9 +220,7 @@ def load_category_mappings_from_s3(model_artifact_uri: str) -> Optional[dict]:
|
|
|
206
220
|
wr.s3.download(path=model_artifact_uri, local_file=local_tar_path)
|
|
207
221
|
|
|
208
222
|
# Extract tarball
|
|
209
|
-
|
|
210
|
-
# Note: For 3.12+, can use filter="data" argument
|
|
211
|
-
tar.extractall(path=tmpdir)
|
|
223
|
+
safe_extract_tarfile(local_tar_path, tmpdir)
|
|
212
224
|
|
|
213
225
|
# Look for category mappings in base directory only
|
|
214
226
|
mappings_path = os.path.join(tmpdir, "category_mappings.json")
|
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
5
|
import tempfile
|
|
6
|
-
import tarfile
|
|
7
6
|
import joblib
|
|
8
7
|
import pickle
|
|
9
8
|
import glob
|
|
@@ -26,7 +25,7 @@ from scipy.stats import spearmanr
|
|
|
26
25
|
from sklearn.preprocessing import LabelEncoder
|
|
27
26
|
|
|
28
27
|
# Workbench Imports
|
|
29
|
-
from workbench.utils.model_utils import load_category_mappings_from_s3
|
|
28
|
+
from workbench.utils.model_utils import load_category_mappings_from_s3, safe_extract_tarfile
|
|
30
29
|
from workbench.utils.pandas_utils import convert_categorical_types
|
|
31
30
|
|
|
32
31
|
# Set up the log
|
|
@@ -50,9 +49,7 @@ def xgboost_model_from_s3(model_artifact_uri: str):
|
|
|
50
49
|
wr.s3.download(path=model_artifact_uri, local_file=local_tar_path)
|
|
51
50
|
|
|
52
51
|
# Extract tarball
|
|
53
|
-
|
|
54
|
-
# Note: For 3.12+, can use filter="data" argument
|
|
55
|
-
tar.extractall(path=tmpdir)
|
|
52
|
+
safe_extract_tarfile(local_tar_path, tmpdir)
|
|
56
53
|
|
|
57
54
|
# Define model file patterns to search for (in order of preference)
|
|
58
55
|
patterns = [
|
|
@@ -285,8 +282,18 @@ def cross_fold_inference(workbench_model: Any, nfolds: int = 5) -> Tuple[Dict[st
|
|
|
285
282
|
|
|
286
283
|
# Check if we got a full sklearn model or need to create one
|
|
287
284
|
if isinstance(loaded_model, (xgb.XGBClassifier, xgb.XGBRegressor)):
|
|
288
|
-
|
|
289
|
-
|
|
285
|
+
is_classifier = isinstance(loaded_model, xgb.XGBClassifier)
|
|
286
|
+
|
|
287
|
+
# Get the model's hyperparameters and ensure enable_categorical=True
|
|
288
|
+
params = loaded_model.get_params()
|
|
289
|
+
params["enable_categorical"] = True
|
|
290
|
+
|
|
291
|
+
# Create new model with same params but enable_categorical=True
|
|
292
|
+
if is_classifier:
|
|
293
|
+
xgb_model = xgb.XGBClassifier(**params)
|
|
294
|
+
else:
|
|
295
|
+
xgb_model = xgb.XGBRegressor(**params)
|
|
296
|
+
|
|
290
297
|
elif isinstance(loaded_model, xgb.Booster):
|
|
291
298
|
# Legacy: got a booster, need to wrap it
|
|
292
299
|
log.warning("Deprecated: Loaded model is a Booster, wrapping in sklearn model.")
|
|
@@ -308,10 +315,12 @@ def cross_fold_inference(workbench_model: Any, nfolds: int = 5) -> Tuple[Dict[st
|
|
|
308
315
|
target_col = workbench_model.target()
|
|
309
316
|
feature_cols = workbench_model.features()
|
|
310
317
|
|
|
311
|
-
# Convert string
|
|
318
|
+
# Convert string[python] to object, then to category for XGBoost compatibility
|
|
319
|
+
# This avoids XGBoost's issue with pandas 2.x string[python] dtype in categorical categories
|
|
312
320
|
for col in feature_cols:
|
|
313
|
-
if df[col]
|
|
314
|
-
|
|
321
|
+
if pd.api.types.is_string_dtype(df[col]):
|
|
322
|
+
# Double conversion: string[python] -> object -> category
|
|
323
|
+
df[col] = df[col].astype("object").astype("category")
|
|
315
324
|
|
|
316
325
|
X = df[feature_cols]
|
|
317
326
|
y = df[target_col]
|
|
@@ -440,10 +449,12 @@ def leave_one_out_inference(workbench_model: Any) -> pd.DataFrame:
|
|
|
440
449
|
target_col = workbench_model.target()
|
|
441
450
|
feature_cols = workbench_model.features()
|
|
442
451
|
|
|
443
|
-
# Convert string
|
|
452
|
+
# Convert string[python] to object, then to category for XGBoost compatibility
|
|
453
|
+
# This avoids XGBoost's issue with pandas 2.x string[python] dtype in categorical categories
|
|
444
454
|
for col in feature_cols:
|
|
445
|
-
if df[col]
|
|
446
|
-
|
|
455
|
+
if pd.api.types.is_string_dtype(df[col]):
|
|
456
|
+
# Double conversion: string[python] -> object -> category
|
|
457
|
+
df[col] = df[col].astype("object").astype("category")
|
|
447
458
|
|
|
448
459
|
# Determine which samples to run LOO on
|
|
449
460
|
if len(df) > 1000:
|
|
@@ -39,7 +39,13 @@ class ModelPlot(ComponentInterface):
|
|
|
39
39
|
# Calculate the distance from the diagonal for each point
|
|
40
40
|
target = model.target()
|
|
41
41
|
df["error"] = abs(df["prediction"] - df[target])
|
|
42
|
-
return ScatterPlot().update_properties(
|
|
42
|
+
return ScatterPlot().update_properties(
|
|
43
|
+
df,
|
|
44
|
+
color="error",
|
|
45
|
+
regression_line=True,
|
|
46
|
+
x=target,
|
|
47
|
+
y="prediction",
|
|
48
|
+
)[0]
|
|
43
49
|
else:
|
|
44
50
|
return self.display_text(f"Model Type: {model.model_type}\n\n Awesome Plot Coming Soon!")
|
|
45
51
|
|
|
@@ -249,8 +249,13 @@ class ModelDetails(PluginInterface):
|
|
|
249
249
|
if not inference_runs:
|
|
250
250
|
return [], None
|
|
251
251
|
|
|
252
|
-
#
|
|
253
|
-
|
|
252
|
+
# Default inference run (full_cross_fold if it exists, then auto_inference, then first)
|
|
253
|
+
if "full_cross_fold" in inference_runs:
|
|
254
|
+
default_inference_run = "full_cross_fold"
|
|
255
|
+
elif "auto_inference" in inference_runs:
|
|
256
|
+
default_inference_run = "auto_inference"
|
|
257
|
+
else:
|
|
258
|
+
default_inference_run = inference_runs[0]
|
|
254
259
|
|
|
255
260
|
# Return the options for the dropdown and the selected value
|
|
256
261
|
return inference_runs, default_inference_run
|
|
@@ -1,9 +1,30 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: workbench
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.193
|
|
4
4
|
Summary: Workbench: A Dashboard and Python API for creating and deploying AWS SageMaker Model Pipelines
|
|
5
5
|
Author-email: SuperCowPowers LLC <support@supercowpowers.com>
|
|
6
|
-
License
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2021-2026 SuperCowPowers LLC
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
7
28
|
Project-URL: Homepage, https://github.com/SuperCowPowers/workbench
|
|
8
29
|
Keywords: SageMaker,Machine Learning,AWS,Python,Utilities
|
|
9
30
|
Classifier: Development Status :: 4 - Beta
|
|
@@ -56,7 +56,7 @@ workbench/core/artifacts/data_source_abstract.py,sha256=5IRCzFVK-17cd4NXPMRfx99v
|
|
|
56
56
|
workbench/core/artifacts/data_source_factory.py,sha256=YL_tA5fsgubbB3dPF6T4tO0rGgz-6oo3ge4i_YXVC-M,2380
|
|
57
57
|
workbench/core/artifacts/endpoint_core.py,sha256=VH-q-R4pfKyjCOXl2Gq1pxMNf4Ir0YdMw9YIHqd7CVU,51974
|
|
58
58
|
workbench/core/artifacts/feature_set_core.py,sha256=6qOJoJ9_qwtyz4neFY6vMn73Ujjeut7E0dy_e8nYfSE,31462
|
|
59
|
-
workbench/core/artifacts/model_core.py,sha256=
|
|
59
|
+
workbench/core/artifacts/model_core.py,sha256=hY-2w_N4HtCO-vnfVQP22_PYYsnoAQfZzSS3vrP1wYY,51281
|
|
60
60
|
workbench/core/artifacts/monitor_core.py,sha256=M307yz7tEzOEHgv-LmtVy9jKjSbM98fHW3ckmNYrwlU,27897
|
|
61
61
|
workbench/core/cloud_platform/cloud_meta.py,sha256=-g4-LTC3D0PXb3VfaXdLR1ERijKuHdffeMK_zhD-koQ,8809
|
|
62
62
|
workbench/core/cloud_platform/aws/README.md,sha256=QT5IQXoUHbIA0qQ2wO6_2P2lYjYQFVYuezc22mWY4i8,97
|
|
@@ -102,7 +102,7 @@ workbench/core/transforms/features_to_features/__init__.py,sha256=47DEQpj8HBSa-_
|
|
|
102
102
|
workbench/core/transforms/features_to_features/heavy/emr/Readme.md,sha256=YtQgCEQeKe0CQXQkhzMTYq9xOtCsCYb5P5LW2BmRKWQ,68
|
|
103
103
|
workbench/core/transforms/features_to_features/heavy/glue/Readme.md,sha256=TuyCatWfoDr99zUwvOcxf-TqMkQzaMqXlj5nmFcRzfo,48
|
|
104
104
|
workbench/core/transforms/features_to_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
105
|
-
workbench/core/transforms/features_to_model/features_to_model.py,sha256=
|
|
105
|
+
workbench/core/transforms/features_to_model/features_to_model.py,sha256=MHJQbKpzBQzW-ZXVfmYQ_1yvAHVPHsw81udBWotxiac,20115
|
|
106
106
|
workbench/core/transforms/model_to_endpoint/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
107
107
|
workbench/core/transforms/model_to_endpoint/model_to_endpoint.py,sha256=TIYXvuK0s383PwJ4iS6fCRhuif6oIxsoWb4CpMGJjY4,6358
|
|
108
108
|
workbench/core/transforms/pandas_transforms/__init__.py,sha256=xL4MT8-fZ1SFqDbTLc8XyxjupHtB1YR6Ej0AC2nwd7I,894
|
|
@@ -153,6 +153,7 @@ workbench/model_scripts/scikit_learn/requirements.txt,sha256=aVvwiJ3LgBUhM_PyFlb
|
|
|
153
153
|
workbench/model_scripts/scikit_learn/scikit_learn.template,sha256=QQvqx-eX9ZTbYmyupq6R6vIQwosmsmY_MRBPaHyfjdk,12586
|
|
154
154
|
workbench/model_scripts/uq_models/mapie.template,sha256=2HIwB_658IsZiLIV1RViIZBIGgXxDsJPZinDUu8SchU,18961
|
|
155
155
|
workbench/model_scripts/uq_models/requirements.txt,sha256=fw7T7t_YJAXK3T6Ysbesxh_Agx_tv0oYx72cEBTqRDY,98
|
|
156
|
+
workbench/model_scripts/xgb_model/generated_model_script.py,sha256=0S2WYCcgguGJ3vqiZe9y5CLuhrpHwIOoKVJBIphxQSQ,18129
|
|
156
157
|
workbench/model_scripts/xgb_model/requirements.txt,sha256=jWlGc7HH7vqyukTm38LN4EyDi8jDUPEay4n45z-30uc,104
|
|
157
158
|
workbench/model_scripts/xgb_model/xgb_model.template,sha256=0uXknIEqgUaIFUfu2gfkxa3WHUr8HBBqBepGUTDvrhQ,17917
|
|
158
159
|
workbench/repl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -213,7 +214,7 @@ workbench/utils/lambda_utils.py,sha256=7GhGRPyXn9o-toWb9HBGSnI8-DhK9YRkwhCSk_mNK
|
|
|
213
214
|
workbench/utils/license_manager.py,sha256=lNE9zZIglmX3zqqCKBdN1xqTgHCEZgJDxavF6pdG7fc,6825
|
|
214
215
|
workbench/utils/log_utils.py,sha256=7n1NJXO_jUX82e6LWAQug6oPo3wiPDBYsqk9gsYab_A,3167
|
|
215
216
|
workbench/utils/markdown_utils.py,sha256=4lEqzgG4EVmLcvvKKNUwNxVCySLQKJTJmWDiaDroI1w,8306
|
|
216
|
-
workbench/utils/model_utils.py,sha256=
|
|
217
|
+
workbench/utils/model_utils.py,sha256=_Gjr318BkMT7hv9M3g7eBYsFluYVPzMjWjMrpNpqx3A,13921
|
|
217
218
|
workbench/utils/monitor_utils.py,sha256=kVaJ7BgUXs3VPMFYfLC03wkIV4Dq-pEhoXS0wkJFxCc,7858
|
|
218
219
|
workbench/utils/pandas_utils.py,sha256=uTUx-d1KYfjbS9PMQp2_9FogCV7xVZR6XLzU5YAGmfs,39371
|
|
219
220
|
workbench/utils/performance_utils.py,sha256=WDNvz-bOdC99cDuXl0urAV4DJ7alk_V3yzKPwvqgST4,1329
|
|
@@ -235,7 +236,7 @@ workbench/utils/workbench_cache.py,sha256=IQchxB81iR4eVggHBxUJdXxUCRkqWz1jKe5gxN
|
|
|
235
236
|
workbench/utils/workbench_event_bridge.py,sha256=z1GmXOB-Qs7VOgC6Hjnp2DI9nSEWepaSXejACxTIR7o,4150
|
|
236
237
|
workbench/utils/workbench_logging.py,sha256=WCuMWhQwibrvcGAyj96h2wowh6dH7zNlDJ7sWUzdCeI,10263
|
|
237
238
|
workbench/utils/workbench_sqs.py,sha256=RwM80z7YWwdtMaCKh7KWF8v38f7eBRU7kyC7ZhTRuI0,2072
|
|
238
|
-
workbench/utils/xgboost_model_utils.py,sha256=
|
|
239
|
+
workbench/utils/xgboost_model_utils.py,sha256=lm6XYnPImi3RyHyiJgl2o4HLJ63EghEdSbmwVRMctXg,22786
|
|
239
240
|
workbench/utils/chem_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
240
241
|
workbench/utils/chem_utils/fingerprints.py,sha256=Qvs8jaUwguWUq3Q3j695MY0t0Wk3BvroW-oWBwalMUo,5255
|
|
241
242
|
workbench/utils/chem_utils/misc.py,sha256=Nevf8_opu-uIPrv_1_0ubuFVVo2_fGUkMoLAHB3XAeo,7372
|
|
@@ -251,7 +252,7 @@ workbench/web_interface/components/component_interface.py,sha256=QCPWqiZLkVsAEzQ
|
|
|
251
252
|
workbench/web_interface/components/correlation_matrix.py,sha256=Lv4vRta5-TdxBsu0G8Ea7hyyR3XyPes-k5AfL6qZWEc,6376
|
|
252
253
|
workbench/web_interface/components/data_details_markdown.py,sha256=axDs6eXniglBmvFwIKjpJ5oyT-3D4FO9IcfA_cl-EJ8,9706
|
|
253
254
|
workbench/web_interface/components/endpoint_metric_plots.py,sha256=H0cXuj9UQrrh_2JvRHtq7O8pMXFXKs7o9XpzySENylw,3441
|
|
254
|
-
workbench/web_interface/components/model_plot.py,sha256=
|
|
255
|
+
workbench/web_interface/components/model_plot.py,sha256=Rojx_ZED4P9gvgeEsUm6xnwMNPoeOyn0evw45BWTITc,2536
|
|
255
256
|
workbench/web_interface/components/plugin_interface.py,sha256=jGRq4igUTVXUT4sDqqsKKI2yjilV0ORNBQq6CjEWE84,9563
|
|
256
257
|
workbench/web_interface/components/plugin_unit_test.py,sha256=UBZtGztLk2oJMDXfExfxkhHdmXr6ayv4NS0RpwGc8ro,7704
|
|
257
258
|
workbench/web_interface/components/regression_plot.py,sha256=k18Bd0fcH7ig6kL5GqC_dINci3_YLle_fSEM32zXtzY,3342
|
|
@@ -266,7 +267,7 @@ workbench/web_interface/components/plugins/endpoint_details.py,sha256=0A7g_Lx5-3
|
|
|
266
267
|
workbench/web_interface/components/plugins/generated_compounds.py,sha256=A6JGlkl7buZUugPK21YgufVFDRoGlHJowaqf8PAmz_s,8056
|
|
267
268
|
workbench/web_interface/components/plugins/graph_plot.py,sha256=JFzuSH_CkEmlaLAgFpzmiEpS3sXov0ycnCfP0VLsK2g,14502
|
|
268
269
|
workbench/web_interface/components/plugins/license_details.py,sha256=UyMSBGxEgdp3m9szDkDUAl_Ua8C5a4RNMdYpYCx354M,5497
|
|
269
|
-
workbench/web_interface/components/plugins/model_details.py,sha256=
|
|
270
|
+
workbench/web_interface/components/plugins/model_details.py,sha256=S5J7LmN39F-oWbPQjndv0T3XKEKLDm6pz3JY4274O2M,10468
|
|
270
271
|
workbench/web_interface/components/plugins/molecule_panel.py,sha256=xGCEI5af8F5lNId5eKUpetdQs_ahnIPdW6U7wKvbz2o,3515
|
|
271
272
|
workbench/web_interface/components/plugins/molecule_viewer.py,sha256=xavixcu4RNzh6Nj_-3-XlK09DgpNx5jGmo3wEPNftiE,4529
|
|
272
273
|
workbench/web_interface/components/plugins/pipeline_details.py,sha256=caiFIakHk-1dGGNW7wlio2X7iAm2_tCNbSjDzoRWGEk,5534
|
|
@@ -280,9 +281,9 @@ workbench/web_interface/page_views/main_page.py,sha256=X4-KyGTKLAdxR-Zk2niuLJB2Y
|
|
|
280
281
|
workbench/web_interface/page_views/models_page_view.py,sha256=M0bdC7bAzLyIaE2jviY12FF4abdMFZmg6sFuOY_LaGI,2650
|
|
281
282
|
workbench/web_interface/page_views/page_view.py,sha256=Gh6YnpOGlUejx-bHZAf5pzqoQ1H1R0OSwOpGhOBO06w,455
|
|
282
283
|
workbench/web_interface/page_views/pipelines_page_view.py,sha256=v2pxrIbsHBcYiblfius3JK766NZ7ciD2yPx0t3E5IJo,2656
|
|
283
|
-
workbench-0.8.
|
|
284
|
-
workbench-0.8.
|
|
285
|
-
workbench-0.8.
|
|
286
|
-
workbench-0.8.
|
|
287
|
-
workbench-0.8.
|
|
288
|
-
workbench-0.8.
|
|
284
|
+
workbench-0.8.193.dist-info/licenses/LICENSE,sha256=RTBoTMeEwTgEhS-n8vgQ-VUo5qig0PWVd8xFPKU6Lck,1080
|
|
285
|
+
workbench-0.8.193.dist-info/METADATA,sha256=vW41RDdu0YekBKST6qabwNkGWXpcDgXaqPhk0YvolRU,10495
|
|
286
|
+
workbench-0.8.193.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
287
|
+
workbench-0.8.193.dist-info/entry_points.txt,sha256=o7ohD4D2oygnHp7i9-C0LfcHDuPW5Tv0JXGAg97DpGk,413
|
|
288
|
+
workbench-0.8.193.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
|
|
289
|
+
workbench-0.8.193.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|