PyPI - workbench - Versions diffs - 0.8.162__py3-none-any.whl → 0.8.202__py3-none-any.whl - Mend

workbench 0.8.162py3-none-any.whl → 0.8.202py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of workbench might be problematic. Click here for more details.

Files changed (113) hide show

workbench/algorithms/dataframe/__init__.py +1 -2
workbench/algorithms/dataframe/fingerprint_proximity.py +2 -2
workbench/algorithms/dataframe/proximity.py +261 -235
workbench/algorithms/graph/light/proximity_graph.py +10 -8
workbench/api/__init__.py +2 -1
workbench/api/compound.py +1 -1
workbench/api/endpoint.py +11 -0
workbench/api/feature_set.py +11 -8
workbench/api/meta.py +5 -2
workbench/api/model.py +16 -15
workbench/api/monitor.py +1 -16
workbench/core/artifacts/__init__.py +11 -2
workbench/core/artifacts/artifact.py +11 -3
workbench/core/artifacts/data_capture_core.py +355 -0
workbench/core/artifacts/endpoint_core.py +256 -118
workbench/core/artifacts/feature_set_core.py +265 -16
workbench/core/artifacts/model_core.py +107 -60
workbench/core/artifacts/monitor_core.py +33 -248
workbench/core/cloud_platform/aws/aws_account_clamp.py +50 -1
workbench/core/cloud_platform/aws/aws_meta.py +12 -5
workbench/core/cloud_platform/aws/aws_parameter_store.py +18 -2
workbench/core/cloud_platform/aws/aws_session.py +4 -4
workbench/core/transforms/data_to_features/light/molecular_descriptors.py +4 -4
workbench/core/transforms/features_to_model/features_to_model.py +42 -32
workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +36 -6
workbench/core/transforms/pandas_transforms/pandas_to_features.py +27 -0
workbench/core/views/training_view.py +113 -42
workbench/core/views/view.py +53 -3
workbench/core/views/view_utils.py +4 -4
workbench/model_scripts/chemprop/chemprop.template +852 -0
workbench/model_scripts/chemprop/generated_model_script.py +852 -0
workbench/model_scripts/chemprop/requirements.txt +11 -0
workbench/model_scripts/custom_models/chem_info/fingerprints.py +134 -0
workbench/model_scripts/custom_models/chem_info/mol_descriptors.py +483 -0
workbench/model_scripts/custom_models/chem_info/mol_standardize.py +450 -0
workbench/model_scripts/custom_models/chem_info/molecular_descriptors.py +7 -9
workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py +1 -1
workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +3 -5
workbench/model_scripts/custom_models/proximity/proximity.py +261 -235
workbench/model_scripts/custom_models/uq_models/bayesian_ridge.template +7 -8
workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +20 -21
workbench/model_scripts/custom_models/uq_models/gaussian_process.template +5 -11
workbench/model_scripts/custom_models/uq_models/meta_uq.template +166 -62
workbench/model_scripts/custom_models/uq_models/ngboost.template +30 -18
workbench/model_scripts/custom_models/uq_models/proximity.py +261 -235
workbench/model_scripts/custom_models/uq_models/requirements.txt +1 -3
workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +15 -17
workbench/model_scripts/pytorch_model/generated_model_script.py +373 -190
workbench/model_scripts/pytorch_model/pytorch.template +370 -187
workbench/model_scripts/scikit_learn/generated_model_script.py +7 -12
workbench/model_scripts/scikit_learn/scikit_learn.template +4 -9
workbench/model_scripts/script_generation.py +17 -9
workbench/model_scripts/uq_models/generated_model_script.py +605 -0
workbench/model_scripts/uq_models/mapie.template +605 -0
workbench/model_scripts/uq_models/requirements.txt +1 -0
workbench/model_scripts/xgb_model/generated_model_script.py +37 -46
workbench/model_scripts/xgb_model/xgb_model.template +44 -46
workbench/repl/workbench_shell.py +28 -14
workbench/scripts/endpoint_test.py +162 -0
workbench/scripts/lambda_test.py +73 -0
workbench/scripts/ml_pipeline_batch.py +137 -0
workbench/scripts/ml_pipeline_sqs.py +186 -0
workbench/scripts/monitor_cloud_watch.py +20 -100
workbench/utils/aws_utils.py +4 -3
workbench/utils/chem_utils/__init__.py +0 -0
workbench/utils/chem_utils/fingerprints.py +134 -0
workbench/utils/chem_utils/misc.py +194 -0
workbench/utils/chem_utils/mol_descriptors.py +483 -0
workbench/utils/chem_utils/mol_standardize.py +450 -0
workbench/utils/chem_utils/mol_tagging.py +348 -0
workbench/utils/chem_utils/projections.py +209 -0
workbench/utils/chem_utils/salts.py +256 -0
workbench/utils/chem_utils/sdf.py +292 -0
workbench/utils/chem_utils/toxicity.py +250 -0
workbench/utils/chem_utils/vis.py +253 -0
workbench/utils/chemprop_utils.py +760 -0
workbench/utils/cloudwatch_handler.py +1 -1
workbench/utils/cloudwatch_utils.py +137 -0
workbench/utils/config_manager.py +3 -7
workbench/utils/endpoint_utils.py +5 -7
workbench/utils/license_manager.py +2 -6
workbench/utils/model_utils.py +95 -34
workbench/utils/monitor_utils.py +44 -62
workbench/utils/pandas_utils.py +3 -3
workbench/utils/pytorch_utils.py +526 -0
workbench/utils/shap_utils.py +10 -2
workbench/utils/workbench_logging.py +0 -3
workbench/utils/workbench_sqs.py +1 -1
workbench/utils/xgboost_model_utils.py +371 -156
workbench/web_interface/components/model_plot.py +7 -1
workbench/web_interface/components/plugin_unit_test.py +5 -2
workbench/web_interface/components/plugins/dashboard_status.py +3 -1
workbench/web_interface/components/plugins/generated_compounds.py +1 -1
workbench/web_interface/components/plugins/model_details.py +9 -7
workbench/web_interface/components/plugins/scatter_plot.py +3 -3
{workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/METADATA +27 -6
{workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/RECORD +101 -85
{workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/entry_points.txt +4 -0
{workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/licenses/LICENSE +1 -1
workbench/model_scripts/custom_models/chem_info/local_utils.py +0 -769
workbench/model_scripts/custom_models/chem_info/tautomerize.py +0 -83
workbench/model_scripts/custom_models/proximity/generated_model_script.py +0 -138
workbench/model_scripts/custom_models/uq_models/generated_model_script.py +0 -393
workbench/model_scripts/custom_models/uq_models/mapie_xgb.template +0 -203
workbench/model_scripts/ensemble_xgb/generated_model_script.py +0 -279
workbench/model_scripts/quant_regression/quant_regression.template +0 -279
workbench/model_scripts/quant_regression/requirements.txt +0 -1
workbench/utils/chem_utils.py +0 -1556
workbench/utils/execution_environment.py +0 -211
workbench/utils/fast_inference.py +0 -167
workbench/utils/resource_utils.py +0 -39
{workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/WHEEL +0 -0
{workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/top_level.txt +0 -0

workbench/model_scripts/xgb_model/generated_model_script.py CHANGED Viewed

@@ -28,14 +28,16 @@ from typing import List, Tuple
 # Template Parameters
 TEMPLATE_PARAMS = {
-    "model_type": "classifier",
-    "target_column": "solubility_class",
-    "features": ['molwt', 'mollogp', 'molmr', 'heavyatomcount', 'numhacceptors', 'numhdonors', 'numheteroatoms', 'numrotatablebonds', 'numvalenceelectrons', 'numaromaticrings', 'numsaturatedrings', 'numaliphaticrings', 'ringcount', 'tpsa', 'labuteasa', 'balabanj', 'bertzct', 'fingerprint'],
-    "compressed_features": ['fingerprint'],
-    "model_metrics_s3_path": "s3://sandbox-sageworks-artifacts/models/aqsol-fingerprints-plus-class/training",
-    "train_all_data": True
+    "model_type": "regressor",
+    "target": "class_number_of_rings",
+    "features": ['length', 'diameter', 'height', 'whole_weight', 'shucked_weight', 'viscera_weight', 'shell_weight', 'sex'],
+    "compressed_features": [],
+    "model_metrics_s3_path": "s3://sandbox-sageworks-artifacts/models/abalone-regression/training",
+    "train_all_data": False,
+    "hyperparameters": {},
 }
 # Function to check if dataframe is empty
 def check_dataframe(df: pd.DataFrame, df_name: str) -> None:
     """
@@ -75,7 +77,7 @@ def expand_proba_column(df: pd.DataFrame, class_labels: List[str]) -> pd.DataFra
     proba_df = pd.DataFrame(df[proba_column].tolist(), columns=proba_splits)
     # Drop any proba columns and reset the index in prep for the concat
-    df = df.drop(columns=[proba_column]+proba_splits, errors="ignore")
+    df = df.drop(columns=[proba_column] + proba_splits, errors="ignore")
     df = df.reset_index(drop=True)
     # Concatenate the new columns with the original DataFrame
@@ -88,13 +90,12 @@ def match_features_case_insensitive(df: pd.DataFrame, model_features: list) -> p
     """
     Matches and renames DataFrame columns to match model feature names (case-insensitive).
     Prioritizes exact matches, then case-insensitive matches.
     Raises ValueError if any model features cannot be matched.
     """
     df_columns_lower = {col.lower(): col for col in df.columns}
     rename_dict = {}
     missing = []
     for feature in model_features:
         if feature in df.columns:
             continue  # Exact match
@@ -102,10 +103,11 @@ def match_features_case_insensitive(df: pd.DataFrame, model_features: list) -> p
             rename_dict[df_columns_lower[feature.lower()]] = feature
         else:
             missing.append(feature)
     if missing:
         raise ValueError(f"Features not found: {missing}")
+    # Rename the DataFrame columns to match the model features
     return df.rename(columns=rename_dict)
@@ -140,8 +142,10 @@ def convert_categorical_types(df: pd.DataFrame, features: list, category_mapping
     return df, category_mappings
-def decompress_features(df: pd.DataFrame, features: List[str], compressed_features: List[str]) -> Tuple[pd.DataFrame, List[str]]:
-    """Prepare features for the XGBoost model
+def decompress_features(
+    df: pd.DataFrame, features: List[str], compressed_features: List[str]
+) -> Tuple[pd.DataFrame, List[str]]:
+    """Prepare features for the model by decompressing bitstring features
     Args:
         df (pd.DataFrame): The features DataFrame
@@ -166,7 +170,7 @@ def decompress_features(df: pd.DataFrame, features: List[str], compressed_featur
         )
     # Decompress the specified compressed features
-    decompressed_features = features
+    decompressed_features = features.copy()
     for feature in compressed_features:
         if (feature not in df.columns) or (feature not in features):
             print(f"Feature '{feature}' not in the features list, skipping decompression.")
@@ -197,13 +201,14 @@ if __name__ == "__main__":
     """The main function is for training the XGBoost model"""
     # Harness Template Parameters
-    target = TEMPLATE_PARAMS["target_column"]
+    target = TEMPLATE_PARAMS["target"]
     features = TEMPLATE_PARAMS["features"]
     orig_features = features.copy()
     compressed_features = TEMPLATE_PARAMS["compressed_features"]
     model_type = TEMPLATE_PARAMS["model_type"]
     model_metrics_s3_path = TEMPLATE_PARAMS["model_metrics_s3_path"]
     train_all_data = TEMPLATE_PARAMS["train_all_data"]
+    hyperparameters = TEMPLATE_PARAMS["hyperparameters"]
     validation_split = 0.2
     # Script arguments for input/output directories
@@ -216,11 +221,7 @@ if __name__ == "__main__":
     args = parser.parse_args()
     # Read the training data into DataFrames
-    training_files = [
-        os.path.join(args.train, file)
-        for file in os.listdir(args.train)
-        if file.endswith(".csv")
-    ]
+    training_files = [os.path.join(args.train, file) for file in os.listdir(args.train) if file.endswith(".csv")]
     print(f"Training Files: {training_files}")
     # Combine files and read them all into a single pandas dataframe
@@ -255,15 +256,16 @@ if __name__ == "__main__":
     else:
         # Just do a random training Split
         print("WARNING: No training column found, splitting data with random state=42")
-        df_train, df_val = train_test_split(
-            all_df, test_size=validation_split, random_state=42
-        )
+        df_train, df_val = train_test_split(all_df, test_size=validation_split, random_state=42)
     print(f"FIT/TRAIN: {df_train.shape}")
     print(f"VALIDATION: {df_val.shape}")
+    # Use any hyperparameters to set up both the trainer and model configurations
+    print(f"Hyperparameters: {hyperparameters}")
     # Now spin up our XGB Model
     if model_type == "classifier":
-        xgb_model = xgb.XGBClassifier(enable_categorical=True)
+        xgb_model = xgb.XGBClassifier(enable_categorical=True, **hyperparameters)
         # Encode the target column
         label_encoder = LabelEncoder()
@@ -271,12 +273,12 @@ if __name__ == "__main__":
         df_val[target] = label_encoder.transform(df_val[target])
     else:
-        xgb_model = xgb.XGBRegressor(enable_categorical=True)
+        xgb_model = xgb.XGBRegressor(enable_categorical=True, **hyperparameters)
         label_encoder = None  # We don't need this for regression
     # Grab our Features, Target and Train the Model
     y_train = df_train[target]
-    X_train= df_train[features]
+    X_train = df_train[features]
     xgb_model.fit(X_train, y_train)
     # Make Predictions on the Validation Set
@@ -315,9 +317,7 @@ if __name__ == "__main__":
         label_names = label_encoder.classes_
         # Calculate various model performance metrics
-        scores = precision_recall_fscore_support(
-            y_validate, preds, average=None, labels=label_names
-        )
+        scores = precision_recall_fscore_support(y_validate, preds, average=None, labels=label_names)
         # Put the scores into a dataframe
         score_df = pd.DataFrame(
@@ -325,13 +325,13 @@ if __name__ == "__main__":
                 target: label_names,
                 "precision": scores[0],
                 "recall": scores[1],
-                "fscore": scores[2],
+                "f1": scores[2],
                 "support": scores[3],
             }
         )
         # We need to get creative with the Classification Metrics
-        metrics = ["precision", "recall", "fscore", "support"]
+        metrics = ["precision", "recall", "f1", "support"]
         for t in label_names:
             for m in metrics:
                 value = score_df.loc[score_df[target] == t, m].iloc[0]
@@ -355,7 +355,9 @@ if __name__ == "__main__":
         print(f"NumRows: {len(df_val)}")
     # Now save the model to the standard place/name
-    xgb_model.save_model(os.path.join(args.model_dir, "xgb_model.json"))
+    joblib.dump(xgb_model, os.path.join(args.model_dir, "xgb_model.joblib"))
+    # Save the label encoder if we have one
     if label_encoder:
         joblib.dump(label_encoder, os.path.join(args.model_dir, "label_encoder.joblib"))
@@ -370,19 +372,8 @@ if __name__ == "__main__":
 def model_fn(model_dir):
     """Deserialize and return fitted XGBoost model"""
-    model_path = os.path.join(model_dir, "xgb_model.json")
-    with open(model_path, "r") as f:
-        model_json = json.load(f)
-    sklearn_data = model_json['learner']['attributes']['scikit_learn']
-    model_type = json.loads(sklearn_data)['_estimator_type']
-    model_class = xgb.XGBClassifier if model_type == "classifier" else xgb.XGBRegressor
-    model = model_class(enable_categorical=True)
-    model.load_model(model_path)
+    model_path = os.path.join(model_dir, "xgb_model.joblib")
+    model = joblib.load(model_path)
     return model
@@ -390,7 +381,7 @@ def input_fn(input_data, content_type):
     """Parse input data and return a DataFrame."""
     if not input_data:
         raise ValueError("Empty input data is not supported!")
     # Decode bytes to string if necessary
     if isinstance(input_data, bytes):
         input_data = input_data.decode("utf-8")

workbench/model_scripts/xgb_model/xgb_model.template CHANGED Viewed

@@ -6,11 +6,13 @@ import numpy as np
 # Model Performance Scores
 from sklearn.metrics import (
     mean_absolute_error,
+    median_absolute_error,
     r2_score,
     root_mean_squared_error,
     precision_recall_fscore_support,
     confusion_matrix,
 )
+from scipy.stats import spearmanr
 # Classification Encoder
 from sklearn.preprocessing import LabelEncoder
@@ -29,13 +31,15 @@ from typing import List, Tuple
 # Template Parameters
 TEMPLATE_PARAMS = {
     "model_type": "{{model_type}}",
-    "target_column": "{{target_column}}",
+    "target": "{{target_column}}",
     "features": "{{feature_list}}",
     "compressed_features": "{{compressed_features}}",
     "model_metrics_s3_path": "{{model_metrics_s3_path}}",
-    "train_all_data": "{{train_all_data}}"
+    "train_all_data": "{{train_all_data}}",
+    "hyperparameters": "{{hyperparameters}}",
 }
 # Function to check if dataframe is empty
 def check_dataframe(df: pd.DataFrame, df_name: str) -> None:
     """
@@ -75,7 +79,7 @@ def expand_proba_column(df: pd.DataFrame, class_labels: List[str]) -> pd.DataFra
     proba_df = pd.DataFrame(df[proba_column].tolist(), columns=proba_splits)
     # Drop any proba columns and reset the index in prep for the concat
-    df = df.drop(columns=[proba_column]+proba_splits, errors="ignore")
+    df = df.drop(columns=[proba_column] + proba_splits, errors="ignore")
     df = df.reset_index(drop=True)
     # Concatenate the new columns with the original DataFrame
@@ -88,13 +92,12 @@ def match_features_case_insensitive(df: pd.DataFrame, model_features: list) -> p
     """
     Matches and renames DataFrame columns to match model feature names (case-insensitive).
     Prioritizes exact matches, then case-insensitive matches.
     Raises ValueError if any model features cannot be matched.
     """
     df_columns_lower = {col.lower(): col for col in df.columns}
     rename_dict = {}
     missing = []
     for feature in model_features:
         if feature in df.columns:
             continue  # Exact match
@@ -102,10 +105,11 @@ def match_features_case_insensitive(df: pd.DataFrame, model_features: list) -> p
             rename_dict[df_columns_lower[feature.lower()]] = feature
         else:
             missing.append(feature)
     if missing:
         raise ValueError(f"Features not found: {missing}")
+    # Rename the DataFrame columns to match the model features
     return df.rename(columns=rename_dict)
@@ -140,8 +144,10 @@ def convert_categorical_types(df: pd.DataFrame, features: list, category_mapping
     return df, category_mappings
-def decompress_features(df: pd.DataFrame, features: List[str], compressed_features: List[str]) -> Tuple[pd.DataFrame, List[str]]:
-    """Prepare features for the XGBoost model
+def decompress_features(
+    df: pd.DataFrame, features: List[str], compressed_features: List[str]
+) -> Tuple[pd.DataFrame, List[str]]:
+    """Prepare features for the model by decompressing bitstring features
     Args:
         df (pd.DataFrame): The features DataFrame
@@ -166,7 +172,7 @@ def decompress_features(df: pd.DataFrame, features: List[str], compressed_featur
         )
     # Decompress the specified compressed features
-    decompressed_features = features
+    decompressed_features = features.copy()
     for feature in compressed_features:
         if (feature not in df.columns) or (feature not in features):
             print(f"Feature '{feature}' not in the features list, skipping decompression.")
@@ -197,13 +203,14 @@ if __name__ == "__main__":
     """The main function is for training the XGBoost model"""
     # Harness Template Parameters
-    target = TEMPLATE_PARAMS["target_column"]
+    target = TEMPLATE_PARAMS["target"]
     features = TEMPLATE_PARAMS["features"]
     orig_features = features.copy()
     compressed_features = TEMPLATE_PARAMS["compressed_features"]
     model_type = TEMPLATE_PARAMS["model_type"]
     model_metrics_s3_path = TEMPLATE_PARAMS["model_metrics_s3_path"]
     train_all_data = TEMPLATE_PARAMS["train_all_data"]
+    hyperparameters = TEMPLATE_PARAMS["hyperparameters"]
     validation_split = 0.2
     # Script arguments for input/output directories
@@ -216,11 +223,7 @@ if __name__ == "__main__":
     args = parser.parse_args()
     # Read the training data into DataFrames
-    training_files = [
-        os.path.join(args.train, file)
-        for file in os.listdir(args.train)
-        if file.endswith(".csv")
-    ]
+    training_files = [os.path.join(args.train, file) for file in os.listdir(args.train) if file.endswith(".csv")]
     print(f"Training Files: {training_files}")
     # Combine files and read them all into a single pandas dataframe
@@ -255,15 +258,16 @@ if __name__ == "__main__":
     else:
         # Just do a random training Split
         print("WARNING: No training column found, splitting data with random state=42")
-        df_train, df_val = train_test_split(
-            all_df, test_size=validation_split, random_state=42
-        )
+        df_train, df_val = train_test_split(all_df, test_size=validation_split, random_state=42)
     print(f"FIT/TRAIN: {df_train.shape}")
     print(f"VALIDATION: {df_val.shape}")
+    # Use any hyperparameters to set up both the trainer and model configurations
+    print(f"Hyperparameters: {hyperparameters}")
     # Now spin up our XGB Model
     if model_type == "classifier":
-        xgb_model = xgb.XGBClassifier(enable_categorical=True)
+        xgb_model = xgb.XGBClassifier(enable_categorical=True, **hyperparameters)
         # Encode the target column
         label_encoder = LabelEncoder()
@@ -271,12 +275,12 @@ if __name__ == "__main__":
         df_val[target] = label_encoder.transform(df_val[target])
     else:
-        xgb_model = xgb.XGBRegressor(enable_categorical=True)
+        xgb_model = xgb.XGBRegressor(enable_categorical=True, **hyperparameters)
         label_encoder = None  # We don't need this for regression
     # Grab our Features, Target and Train the Model
     y_train = df_train[target]
-    X_train= df_train[features]
+    X_train = df_train[features]
     xgb_model.fit(X_train, y_train)
     # Make Predictions on the Validation Set
@@ -315,9 +319,7 @@ if __name__ == "__main__":
         label_names = label_encoder.classes_
         # Calculate various model performance metrics
-        scores = precision_recall_fscore_support(
-            y_validate, preds, average=None, labels=label_names
-        )
+        scores = precision_recall_fscore_support(y_validate, preds, average=None, labels=label_names)
         # Put the scores into a dataframe
         score_df = pd.DataFrame(
@@ -325,13 +327,13 @@ if __name__ == "__main__":
                 target: label_names,
                 "precision": scores[0],
                 "recall": scores[1],
-                "fscore": scores[2],
+                "f1": scores[2],
                 "support": scores[3],
             }
         )
         # We need to get creative with the Classification Metrics
-        metrics = ["precision", "recall", "fscore", "support"]
+        metrics = ["precision", "recall", "f1", "support"]
         for t in label_names:
             for m in metrics:
                 value = score_df.loc[score_df[target] == t, m].iloc[0]
@@ -348,14 +350,21 @@ if __name__ == "__main__":
         # Calculate various model performance metrics (regression)
         rmse = root_mean_squared_error(y_validate, preds)
         mae = mean_absolute_error(y_validate, preds)
+        medae = median_absolute_error(y_validate, preds)
         r2 = r2_score(y_validate, preds)
-        print(f"RMSE: {rmse:.3f}")
-        print(f"MAE: {mae:.3f}")
-        print(f"R2: {r2:.3f}")
-        print(f"NumRows: {len(df_val)}")
+        spearman_corr = spearmanr(y_validate, preds).correlation
+        support = len(df_val)
+        print(f"rmse: {rmse:.3f}")
+        print(f"mae: {mae:.3f}")
+        print(f"medae: {medae:.3f}")
+        print(f"r2: {r2:.3f}")
+        print(f"spearmanr: {spearman_corr:.3f}")
+        print(f"support: {support}")
     # Now save the model to the standard place/name
-    xgb_model.save_model(os.path.join(args.model_dir, "xgb_model.json"))
+    joblib.dump(xgb_model, os.path.join(args.model_dir, "xgb_model.joblib"))
+    # Save the label encoder if we have one
     if label_encoder:
         joblib.dump(label_encoder, os.path.join(args.model_dir, "label_encoder.joblib"))
@@ -370,19 +379,8 @@ if __name__ == "__main__":
 def model_fn(model_dir):
     """Deserialize and return fitted XGBoost model"""
-    model_path = os.path.join(model_dir, "xgb_model.json")
-    with open(model_path, "r") as f:
-        model_json = json.load(f)
-    sklearn_data = model_json['learner']['attributes']['scikit_learn']
-    model_type = json.loads(sklearn_data)['_estimator_type']
-    model_class = xgb.XGBClassifier if model_type == "classifier" else xgb.XGBRegressor
-    model = model_class(enable_categorical=True)
-    model.load_model(model_path)
+    model_path = os.path.join(model_dir, "xgb_model.joblib")
+    model = joblib.load(model_path)
     return model
@@ -390,7 +388,7 @@ def input_fn(input_data, content_type):
     """Parse input data and return a DataFrame."""
     if not input_data:
         raise ValueError("Empty input data is not supported!")
     # Decode bytes to string if necessary
     if isinstance(input_data, bytes):
         input_data = input_data.decode("utf-8")

workbench/repl/workbench_shell.py CHANGED Viewed

@@ -1,14 +1,25 @@
+# flake8: noqa: E402
+import os
+import sys
+import logging
+import importlib
+import webbrowser
+import readline  # noqa: F401
+# Disable OpenMP parallelism to avoid segfaults with PyTorch in iPython
+# This is a known issue on macOS where libomp crashes during thread synchronization
+# Must be set before importing numpy/pandas/torch or any library that uses OpenMP
+os.environ.setdefault("OMP_NUM_THREADS", "1")
+os.environ.setdefault("MKL_NUM_THREADS", "1")
+import IPython
 from IPython import start_ipython
+from distutils.version import LooseVersion
 from IPython.terminal.prompts import Prompts
 from IPython.terminal.ipapp import load_default_config
 from pygments.token import Token
-import sys
-import logging
-import importlib
 import botocore
-import webbrowser
 import pandas as pd
-import readline  # noqa
 try:
     import matplotlib.pyplot as plt  # noqa
@@ -39,7 +50,7 @@ from workbench.cached.cached_meta import CachedMeta
 try:
     import rdkit  # noqa
     import mordred  # noqa
-    from workbench.utils import chem_utils
+    from workbench.utils.chem_utils import vis
     HAVE_CHEM_UTILS = True
 except ImportError:
@@ -70,7 +81,7 @@ if not ConfigManager().config_okay():
 # Set the log level to important
 log = logging.getLogger("workbench")
-log.setLevel(IMPORTANT_LEVEL_NUM)
+log.setLevel(logging.INFO)
 log.addFilter(
     lambda record: not (
         record.getMessage().startswith("Async: Metadata") or record.getMessage().startswith("Updated Metadata")
@@ -176,12 +187,12 @@ class WorkbenchShell:
         # Add cheminformatics utils if available
         if HAVE_CHEM_UTILS:
-            self.commands["show"] = chem_utils.show
+            self.commands["show"] = vis.show
     def start(self):
         """Start the Workbench IPython shell"""
         cprint("magenta", "\nWelcome to Workbench!")
-        if self.aws_status is False:
+        if not self.aws_status:
             cprint("red", "AWS Account Connection Failed...Review/Fix the Workbench Config:")
             cprint("red", f"Path: {self.cm.site_config_path}")
             self.show_config()
@@ -202,7 +213,10 @@ class WorkbenchShell:
         # Start IPython with the config and commands in the namespace
         try:
-            ipython_argv = ["--no-tip", "--theme", "linux"]
+            if LooseVersion(IPython.__version__) >= LooseVersion("9.0.0"):
+                ipython_argv = ["--no-tip", "--theme", "linux"]
+            else:
+                ipython_argv = []
             start_ipython(ipython_argv, user_ns=locs, config=config)
         finally:
             spinner = self.spinner_start("Goodbye to AWS:")
@@ -520,7 +534,7 @@ class WorkbenchShell:
     def get_meta(self):
         return self.meta
-    def plot_manager(self, data, plot_type: str = "table", **kwargs):
+    def plot_manager(self, data, plot_type: str = "scatter", **kwargs):
         """Plot Manager for Workbench"""
         from workbench.web_interface.components.plugins import ag_table, graph_plot, scatter_plot
@@ -555,14 +569,14 @@ class WorkbenchShell:
         from workbench.web_interface.components.plugin_unit_test import PluginUnitTest
         # Get kwargs
-        theme = kwargs.get("theme", "dark")
+        theme = kwargs.get("theme", "midnight_blue")
         plugin_test = PluginUnitTest(plugin_class, theme=theme, input_data=data, **kwargs)
-        # Run the server and open in the browser
-        plugin_test.run()
+        # Open the browser and run the dash server
         url = f"http://127.0.0.1:{plugin_test.port}"
         webbrowser.open(url)
+        plugin_test.run()
 # Launch Shell Entry Point

workbench 0.8.162__py3-none-any.whl → 0.8.202__py3-none-any.whl

Potentially problematic release.

workbench 0.8.162py3-none-any.whl → 0.8.202py3-none-any.whl