PyPI - workbench - Versions diffs - 0.8.170__py3-none-any.whl → 0.8.171__py3-none-any.whl - Mend

workbench 0.8.170py3-none-any.whl → 0.8.171py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of workbench might be problematic. Click here for more details.

Files changed (16) hide show

workbench/core/cloud_platform/aws/aws_meta.py CHANGED Viewed

@@ -196,7 +196,9 @@ class AWSMeta:
         # Return the summary as a DataFrame
         df = pd.DataFrame(data_summary).convert_dtypes()
-        return df.sort_values(by="Created", ascending=False)
+        if not df.empty:
+            df.sort_values(by="Created", ascending=False, inplace=True)
+        return df
     def models(self, details: bool = False) -> pd.DataFrame:
         """Get a summary of the Models in AWS.
@@ -256,7 +258,9 @@ class AWSMeta:
         # Return the summary as a DataFrame
         df = pd.DataFrame(model_summary).convert_dtypes()
-        return df.sort_values(by="Created", ascending=False)
+        if not df.empty:
+            df.sort_values(by="Created", ascending=False, inplace=True)
+        return df
     def endpoints(self, details: bool = False) -> pd.DataFrame:
         """Get a summary of the Endpoints in AWS.
@@ -317,7 +321,9 @@ class AWSMeta:
         # Return the summary as a DataFrame
         df = pd.DataFrame(data_summary).convert_dtypes()
-        return df.sort_values(by="Created", ascending=False)
+        if not df.empty:
+            df.sort_values(by="Created", ascending=False, inplace=True)
+        return df
     def _endpoint_config_info(self, endpoint_config_name: str) -> dict:
         """Internal: Get the Endpoint Configuration information for the given endpoint config name.
@@ -657,7 +663,8 @@ class AWSMeta:
         df = pd.DataFrame(data_summary).convert_dtypes()
         # Sort by the Modified column
-        df = df.sort_values(by="Modified", ascending=False)
+        if not df.empty:
+            df = df.sort_values(by="Modified", ascending=False)
         return df
     def _aws_pipelines(self) -> pd.DataFrame:

workbench/model_scripts/custom_models/uq_models/generated_model_script.py CHANGED Viewed

@@ -2,7 +2,6 @@
 from ngboost import NGBRegressor
 from xgboost import XGBRegressor  # Base Estimator
 from sklearn.model_selection import train_test_split
-import numpy as np
 # Model Performance Scores
 from sklearn.metrics import (
@@ -16,7 +15,9 @@ import json
 import argparse
 import joblib
 import os
+import numpy as np
 import pandas as pd
+from typing import List, Tuple
 # Local Imports
 from proximity import Proximity
@@ -25,11 +26,12 @@ from proximity import Proximity
 # Template Placeholders
 TEMPLATE_PARAMS = {
-    "id_column": "id",
-    "features": ['molwt', 'mollogp', 'molmr', 'heavyatomcount', 'numhacceptors', 'numhdonors', 'numheteroatoms', 'numrotatablebonds', 'numvalenceelectrons', 'numaromaticrings', 'numsaturatedrings', 'numaliphaticrings', 'ringcount', 'tpsa', 'labuteasa', 'balabanj', 'bertzct'],
-    "target": "solubility",
-    "train_all_data": True,
-    "track_columns": ['solubility']
+    "id_column": "udm_mol_bat_id",
+    "target": "udm_asy_res_intrinsic_clearance_ul_per_min_per_mg_protein",
+    "features": ['bcut2d_logplow', 'numradicalelectrons', 'smr_vsa5', 'fr_lactam', 'fr_morpholine', 'fr_aldehyde', 'slogp_vsa1', 'fr_amidine', 'bpol', 'fr_ester', 'fr_azo', 'kappa3', 'peoe_vsa5', 'fr_ketone_topliss', 'vsa_estate9', 'estate_vsa9', 'bcut2d_mrhi', 'fr_ndealkylation1', 'numrotatablebonds', 'minestateindex', 'fr_quatn', 'peoe_vsa3', 'fr_epoxide', 'fr_aniline', 'minpartialcharge', 'fr_nitroso', 'fpdensitymorgan2', 'fr_oxime', 'fr_sulfone', 'smr_vsa1', 'kappa1', 'fr_pyridine', 'numaromaticrings', 'vsa_estate6', 'molmr', 'estate_vsa1', 'fr_dihydropyridine', 'vsa_estate10', 'fr_alkyl_halide', 'chi2n', 'fr_thiocyan', 'fpdensitymorgan1', 'fr_unbrch_alkane', 'slogp_vsa9', 'chi4n', 'fr_nitro_arom', 'fr_al_oh', 'fr_furan', 'fr_c_s', 'peoe_vsa8', 'peoe_vsa14', 'numheteroatoms', 'fr_ndealkylation2', 'maxabspartialcharge', 'vsa_estate2', 'peoe_vsa7', 'apol', 'numhacceptors', 'fr_tetrazole', 'vsa_estate1', 'peoe_vsa9', 'naromatom', 'bcut2d_chghi', 'fr_sh', 'fr_halogen', 'slogp_vsa4', 'fr_benzodiazepine', 'molwt', 'fr_isocyan', 'fr_prisulfonamd', 'maxabsestateindex', 'minabsestateindex', 'peoe_vsa11', 'slogp_vsa12', 'estate_vsa5', 'numaliphaticcarbocycles', 'bcut2d_mwlow', 'slogp_vsa7', 'fr_allylic_oxid', 'fr_methoxy', 'fr_nh0', 'fr_coo2', 'fr_phenol', 'nacid', 'nbase', 'chi3v', 'fr_ar_nh', 'fr_nitrile', 'fr_imidazole', 'fr_urea', 'bcut2d_mrlow', 'chi1', 'smr_vsa6', 'fr_aryl_methyl', 'narombond', 'fr_alkyl_carbamate', 'fr_piperzine', 'exactmolwt', 'qed', 'chi0n', 'fr_sulfonamd', 'fr_thiazole', 'numvalenceelectrons', 'fr_phos_acid', 'peoe_vsa12', 'fr_nh1', 'fr_hdrzine', 'fr_c_o_nocoo', 'fr_lactone', 'estate_vsa6', 'bcut2d_logphi', 'vsa_estate7', 'peoe_vsa13', 'numsaturatedcarbocycles', 'fr_nitro', 'fr_phenol_noorthohbond', 'rotratio', 'fr_barbitur', 'fr_isothiocyan', 'balabanj', 'fr_arn', 'fr_imine', 'maxpartialcharge', 'fr_sulfide', 'slogp_vsa11', 'fr_hoccn', 'fr_n_o', 'peoe_vsa1', 'slogp_vsa6', 'heavyatommolwt', 'fractioncsp3', 'estate_vsa8', 'peoe_vsa10', 'numaliphaticrings', 'fr_thiophene', 'maxestateindex', 'smr_vsa10', 'labuteasa', 'smr_vsa2', 'fpdensitymorgan3', 'smr_vsa9', 'slogp_vsa10', 'numaromaticheterocycles', 'fr_nh2', 'fr_diazo', 'chi3n', 'fr_ar_coo', 'slogp_vsa5', 'fr_bicyclic', 'fr_amide', 'estate_vsa10', 'fr_guanido', 'chi1n', 'numsaturatedrings', 'fr_piperdine', 'fr_term_acetylene', 'estate_vsa4', 'slogp_vsa3', 'fr_coo', 'fr_ether', 'estate_vsa7', 'bcut2d_chglo', 'fr_oxazole', 'peoe_vsa6', 'hallkieralpha', 'peoe_vsa2', 'chi2v', 'nocount', 'vsa_estate5', 'fr_nhpyrrole', 'fr_al_coo', 'bertzct', 'estate_vsa11', 'minabspartialcharge', 'slogp_vsa8', 'fr_imide', 'kappa2', 'numaliphaticheterocycles', 'numsaturatedheterocycles', 'fr_hdrzone', 'smr_vsa4', 'fr_ar_n', 'nrot', 'smr_vsa8', 'slogp_vsa2', 'chi4v', 'fr_phos_ester', 'fr_para_hydroxylation', 'smr_vsa3', 'nhohcount', 'estate_vsa2', 'mollogp', 'tpsa', 'fr_azide', 'peoe_vsa4', 'numhdonors', 'fr_al_oh_notert', 'fr_c_o', 'chi0', 'fr_nitro_arom_nonortho', 'vsa_estate3', 'fr_benzene', 'fr_ketone', 'vsa_estate8', 'smr_vsa7', 'fr_ar_oh', 'fr_priamide', 'ringcount', 'estate_vsa3', 'numaromaticcarbocycles', 'bcut2d_mwhi', 'chi1v', 'heavyatomcount', 'vsa_estate4', 'chi0v'],
+    "compressed_features": [],
+    "train_all_data": False,
+    "track_columns": ['udm_asy_res_intrinsic_clearance_ul_per_min_per_mg_protein']
 }
@@ -73,136 +75,97 @@ def match_features_case_insensitive(df: pd.DataFrame, model_features: list) -> p
     return df.rename(columns=rename_dict)
-def distance_weighted_calibrated_intervals(
-        df_pred: pd.DataFrame,
-        prox_df: pd.DataFrame,
-        calibration_strength: float = 0.7,
-        distance_decay: float = 3.0,
-) -> pd.DataFrame:
+def convert_categorical_types(df: pd.DataFrame, features: list, category_mappings={}) -> tuple:
     """
-    Calibrate intervals using distance-weighted neighbor quantiles.
-    Uses all 10 neighbors with distance-based weighting.
+    Converts appropriate columns to categorical type with consistent mappings.
+    Args:
+        df (pd.DataFrame): The DataFrame to process.
+        features (list): List of feature names to consider for conversion.
+        category_mappings (dict, optional): Existing category mappings. If empty dict, we're in
+                                            training mode. If populated, we're in inference mode.
+    Returns:
+        tuple: (processed DataFrame, category mappings dictionary)
     """
-    id_column = TEMPLATE_PARAMS["id_column"]
-    target_column = TEMPLATE_PARAMS["target"]
-    # Distance-weighted neighbor statistics
-    def weighted_quantile(values, weights, q):
-        """Calculate weighted quantile"""
-        if len(values) == 0:
-            return np.nan
-        sorted_indices = np.argsort(values)
-        sorted_values = values[sorted_indices]
-        sorted_weights = weights[sorted_indices]
-        cumsum = np.cumsum(sorted_weights)
-        cutoff = q * cumsum[-1]
-        return np.interp(cutoff, cumsum, sorted_values)
-    # Calculate distance weights (closer neighbors get more weight)
-    prox_df = prox_df.copy()
-    prox_df['weight'] = 1 / (1 + prox_df['distance'] ** distance_decay)
-    # Get weighted quantiles and statistics for each ID
-    neighbor_stats = []
-    for id_val, group in prox_df.groupby(id_column):
-        values = group[target_column].values
-        weights = group['weight'].values
-        # Normalize weights
-        weights = weights / weights.sum()
-        stats = {
-            id_column: id_val,
-            'local_q025': weighted_quantile(values, weights, 0.025),
-            'local_q25': weighted_quantile(values, weights, 0.25),
-            'local_q75': weighted_quantile(values, weights, 0.75),
-            'local_q975': weighted_quantile(values, weights, 0.975),
-            'local_median': weighted_quantile(values, weights, 0.5),
-            'local_std': np.sqrt(np.average((values - np.average(values, weights=weights)) ** 2, weights=weights)),
-            'avg_distance': group['distance'].mean(),
-            'min_distance': group['distance'].min(),
-            'max_distance': group['distance'].max(),
-        }
-        neighbor_stats.append(stats)
-    neighbor_df = pd.DataFrame(neighbor_stats)
-    out = df_pred.merge(neighbor_df, on=id_column, how='left')
-    # Model disagreement score (normalized by prediction std)
-    model_disagreement = (out["prediction"] - out["prediction_uq"]).abs()
-    disagreement_score = (model_disagreement / out["prediction_std"]).clip(0, 2)
-    # Local confidence based on:
-    # 1. How close the neighbors are (closer = more confident)
-    # 2. How much local variance there is (less variance = more confident)
-    max_reasonable_distance = out['max_distance'].quantile(0.8)  # 80th percentile as reference
-    distance_confidence = (1 - (out['avg_distance'] / max_reasonable_distance)).clip(0.1, 1.0)
-    variance_confidence = (out["prediction_std"] / out["local_std"]).clip(0.5, 2.0)
-    local_confidence = distance_confidence * variance_confidence.clip(0.5, 1.5)
-    # Calibration weight: higher when models disagree and we have good local data
-    calibration_weight = (
-            calibration_strength *
-            local_confidence *  # Weight by local data quality
-            disagreement_score.clip(0.3, 1.0)  # More calibration when models disagree
-    )
+    # Training mode
+    if category_mappings == {}:
+        for col in df.select_dtypes(include=["object", "string"]):
+            if col in features and df[col].nunique() < 20:
+                print(f"Training mode: Converting {col} to category")
+                df[col] = df[col].astype("category")
+                category_mappings[col] = df[col].cat.categories.tolist()  # Store category mappings
+    # Inference mode
+    else:
+        for col, categories in category_mappings.items():
+            if col in df.columns:
+                print(f"Inference mode: Applying categorical mapping for {col}")
+                df[col] = pd.Categorical(df[col], categories=categories)  # Apply consistent categorical mapping
-    # Consensus prediction (slight preference for NGBoost since it provides intervals)
-    consensus_pred = 0.65 * out["prediction_uq"] + 0.35 * out["prediction"]
+    return df, category_mappings
-    # Re-center local intervals around consensus prediction
-    local_center_offset = consensus_pred - out["local_median"]
-    # Apply calibration to each quantile
-    quantile_pairs = [
-        ("q_025", "local_q025"),
-        ("q_25", "local_q25"),
-        ("q_75", "local_q75"),
-        ("q_975", "local_q975")
-    ]
+def decompress_features(df: pd.DataFrame, features: List[str], compressed_features: List[str]) -> Tuple[pd.DataFrame, List[str]]:
+    """Prepare features for the XGBoost model
+    Args:
+        df (pd.DataFrame): The features DataFrame
+        features (List[str]): Full list of feature names
+        compressed_features (List[str]): List of feature names to decompress (bitstrings)
+    Returns:
+        pd.DataFrame: DataFrame with the decompressed features
+        List[str]: Updated list of feature names after decompression
-    for model_q, local_q in quantile_pairs:
-        # Adjust local quantiles to be centered around consensus
-        adjusted_local_q = out[local_q] + local_center_offset
+    Raises:
+        ValueError: If any missing values are found in the specified features
+    """
-        # Blend model and local intervals
-        out[model_q] = (
-                (1 - calibration_weight) * out[model_q] +
-                calibration_weight * adjusted_local_q
+    # Check for any missing values in the required features
+    missing_counts = df[features].isna().sum()
+    if missing_counts.any():
+        missing_features = missing_counts[missing_counts > 0]
+        print(
+            f"WARNING: Found missing values in features: {missing_features.to_dict()}. "
+            "WARNING: You might want to remove/replace all NaN values before processing."
         )
-    # Ensure proper interval ordering and bounds using pandas
-    out["q_025"] = pd.concat([out["q_025"], consensus_pred], axis=1).min(axis=1)
-    out["q_975"] = pd.concat([out["q_975"], consensus_pred], axis=1).max(axis=1)
-    out["q_25"] = pd.concat([out["q_25"], out["q_75"]], axis=1).min(axis=1)
+    # Decompress the specified compressed features
+    decompressed_features = features
+    for feature in compressed_features:
+        if (feature not in df.columns) or (feature not in features):
+            print(f"Feature '{feature}' not in the features list, skipping decompression.")
+            continue
-    # Optional: Add some interval expansion when neighbors are very far
-    # (indicates we're in a sparse region of feature space)
-    sparse_region_mask = out['min_distance'] > out['min_distance'].quantile(0.9)
-    expansion_factor = 1 + 0.2 * sparse_region_mask  # 20% expansion in sparse regions
+        # Remove the feature from the list of features to avoid duplication
+        decompressed_features.remove(feature)
-    for q in ["q_025", "q_25", "q_75", "q_975"]:
-        interval_width = out[q] - consensus_pred
-        out[q] = consensus_pred + interval_width * expansion_factor
+        # Handle all compressed features as bitstrings
+        bit_matrix = np.array([list(bitstring) for bitstring in df[feature]], dtype=np.uint8)
+        prefix = feature[:3]
-    # Clean up temporary columns
-    cleanup_cols = [col for col in out.columns if col.startswith("local_")] + \
-                   ['avg_distance', 'min_distance', 'max_distance']
+        # Create all new columns at once - avoids fragmentation
+        new_col_names = [f"{prefix}_{i}" for i in range(bit_matrix.shape[1])]
+        new_df = pd.DataFrame(bit_matrix, columns=new_col_names, index=df.index)
-    return out.drop(columns=cleanup_cols)
+        # Add to features list
+        decompressed_features.extend(new_col_names)
+        # Drop original column and concatenate new ones
+        df = df.drop(columns=[feature])
+        df = pd.concat([df, new_df], axis=1)
+    return df, decompressed_features
-# TRAINING SECTION
-#
-# This section (__main__) is where SageMaker will execute the training job
-# and save the model artifacts to the model directory.
-#
 if __name__ == "__main__":
     # Template Parameters
     id_column = TEMPLATE_PARAMS["id_column"]
-    features = TEMPLATE_PARAMS["features"]
     target = TEMPLATE_PARAMS["target"]
+    features = TEMPLATE_PARAMS["features"]
+    orig_features = features.copy()
+    compressed_features = TEMPLATE_PARAMS["compressed_features"]
     train_all_data = TEMPLATE_PARAMS["train_all_data"]
     track_columns = TEMPLATE_PARAMS["track_columns"]  # Can be None
     validation_split = 0.2
@@ -216,34 +179,51 @@ if __name__ == "__main__":
     )
     args = parser.parse_args()
-    # Load training data from the specified directory
+    # Read the training data into DataFrames
     training_files = [
         os.path.join(args.train, file)
-        for file in os.listdir(args.train) if file.endswith(".csv")
+        for file in os.listdir(args.train)
+        if file.endswith(".csv")
     ]
     print(f"Training Files: {training_files}")
     # Combine files and read them all into a single pandas dataframe
-    df = pd.concat([pd.read_csv(file, engine="python") for file in training_files])
+    all_df = pd.concat([pd.read_csv(file, engine="python") for file in training_files])
-    # Check if the DataFrame is empty
-    check_dataframe(df, "training_df")
+    # Check if the dataframe is empty
+    check_dataframe(all_df, "training_df")
-    # Training data split logic
+    # Features/Target output
+    print(f"Target: {target}")
+    print(f"Features: {str(features)}")
+    # Convert any features that might be categorical to 'category' type
+    all_df, category_mappings = convert_categorical_types(all_df, features)
+    # If we have compressed features, decompress them
+    if compressed_features:
+        print(f"Decompressing features {compressed_features}...")
+        all_df, features = decompress_features(all_df, features, compressed_features)
+    # Do we want to train on all the data?
     if train_all_data:
-        # Use all data for both training and validation
-        print("Training on all data...")
-        df_train = df.copy()
-        df_val = df.copy()
-    elif "training" in df.columns:
-        # Split data based on a 'training' column if it exists
-        print("Splitting data based on 'training' column...")
-        df_train = df[df["training"]].copy()
-        df_val = df[~df["training"]].copy()
+        print("Training on ALL of the data")
+        df_train = all_df.copy()
+        df_val = all_df.copy()
+    # Does the dataframe have a training column?
+    elif "training" in all_df.columns:
+        print("Found training column, splitting data based on training column")
+        df_train = all_df[all_df["training"]]
+        df_val = all_df[~all_df["training"]]
     else:
-        # Perform a random split if no 'training' column is found
-        print("Splitting data randomly...")
-        df_train, df_val = train_test_split(df, test_size=validation_split, random_state=42)
+        # Just do a random training Split
+        print("WARNING: No training column found, splitting data with random state=42")
+        df_train, df_val = train_test_split(
+            all_df, test_size=validation_split, random_state=42
+        )
+    print(f"FIT/TRAIN: {df_train.shape}")
+    print(f"VALIDATION: {df_val.shape}")
     # We're using XGBoost for point predictions and NGBoost for uncertainty quantification
     xgb_model = XGBRegressor()
@@ -251,18 +231,16 @@ if __name__ == "__main__":
     # Prepare features and targets for training
     X_train = df_train[features]
-    X_val = df_val[features]
+    X_validate = df_val[features]
     y_train = df_train[target]
-    y_val = df_val[target]
+    y_validate = df_val[target]
     # Train both models using the training data
     xgb_model.fit(X_train, y_train)
-    ngb_model.fit(X_train, y_train, X_val=X_val, Y_val=y_val)
+    ngb_model.fit(X_train, y_train, X_val=X_validate, Y_val=y_validate)
     # Make Predictions on the Validation Set
     print(f"Making Predictions on Validation Set...")
-    y_validate = df_val[target]
-    X_validate = df_val[features]
     preds = xgb_model.predict(X_validate)
     # Calculate various model performance metrics (regression)
@@ -280,9 +258,9 @@ if __name__ == "__main__":
     # Save the trained NGBoost model
     joblib.dump(ngb_model, os.path.join(args.model_dir, "ngb_model.joblib"))
-    # Save the feature list to validate input during predictions
+    # Save the features (this will validate input during predictions)
     with open(os.path.join(args.model_dir, "feature_columns.json"), "w") as fp:
-        json.dump(features, fp)
+        json.dump(orig_features, fp)  # We save the original features, not the decompressed ones
     # Now the Proximity model
     model = Proximity(df_train, id_column, features, target, track_columns=track_columns)
@@ -295,7 +273,7 @@ if __name__ == "__main__":
 # Inference Section
 #
 def model_fn(model_dir) -> dict:
-    """Load and return XGBoost and NGBoost regressors from model directory."""
+    """Load and return XGBoost, NGBoost, and Prox Model from model directory."""
     # Load XGBoost regressor
     xgb_path = os.path.join(model_dir, "xgb_model.json")
@@ -376,18 +354,30 @@ def predict_fn(df, models) -> pd.DataFrame:
     df["prediction_std"] = dist_params['scale']  # standard deviation
     # Add 95% prediction intervals using ppf (percent point function)
-    df["q_025"] = y_dists.ppf(0.025)  # 2.5th percentile
-    df["q_975"] = y_dists.ppf(0.975)  # 97.5th percentile
+    # Note: Our hybrid model uses XGB point prediction and NGBoost UQ
+    #  so we need to adjust the bounds to include the point prediction
+    df["q_025"] = np.minimum(y_dists.ppf(0.025), df["prediction"])
+    df["q_975"] = np.maximum(y_dists.ppf(0.975), df["prediction"])
+    # Add 90% prediction intervals
+    df["q_05"] = y_dists.ppf(0.05)  # 5th percentile
+    df["q_95"] = y_dists.ppf(0.95)  # 95th percentile
+    # Add 80% prediction intervals
+    df["q_10"] = y_dists.ppf(0.10)  # 10th percentile
+    df["q_90"] = y_dists.ppf(0.90)  # 90th percentile
     # Add 50% prediction intervals
-    df["q_25"] = y_dists.ppf(0.25)   # 25th percentile
-    df["q_75"] = y_dists.ppf(0.75)   # 75th percentile
+    df["q_25"] = y_dists.ppf(0.25)  # 25th percentile
+    df["q_75"] = y_dists.ppf(0.75)  # 75th percentile
-    # Compute Nearest neighbors with Proximity model
-    prox_df = models["proximity"].neighbors(df)
+    # Reorder the quantile columns for easier reading
+    quantile_cols = ["q_025", "q_05", "q_10", "q_25", "q_75", "q_90", "q_95", "q_975"]
+    other_cols = [col for col in df.columns if col not in quantile_cols]
+    df = df[other_cols + quantile_cols]
-    # Shrink prediction intervals based on KNN variance
-    df = distance_weighted_calibrated_intervals(df, prox_df)
+    # Compute Nearest neighbors with Proximity model
+    models["proximity"].neighbors(df)
     # Return the modified DataFrame
     return df

workbench/model_scripts/custom_models/uq_models/meta_uq.template CHANGED Viewed

@@ -15,7 +15,9 @@ import json
 import argparse
 import joblib
 import os
+import numpy as np
 import pandas as pd
+from typing import List, Tuple
 # Local Imports
 from proximity import Proximity
@@ -25,8 +27,9 @@ from proximity import Proximity
 # Template Placeholders
 TEMPLATE_PARAMS = {
     "id_column": "{{id_column}}",
-    "features": "{{feature_list}}",
     "target": "{{target_column}}",
+    "features": "{{feature_list}}",
+    "compressed_features": "{{compressed_features}}",
     "train_all_data": "{{train_all_data}}",
     "track_columns": "{{track_columns}}"
 }
@@ -72,16 +75,97 @@ def match_features_case_insensitive(df: pd.DataFrame, model_features: list) -> p
     return df.rename(columns=rename_dict)
-# TRAINING SECTION
-#
-# This section (__main__) is where SageMaker will execute the training job
-# and save the model artifacts to the model directory.
-#
+def convert_categorical_types(df: pd.DataFrame, features: list, category_mappings={}) -> tuple:
+    """
+    Converts appropriate columns to categorical type with consistent mappings.
+    Args:
+        df (pd.DataFrame): The DataFrame to process.
+        features (list): List of feature names to consider for conversion.
+        category_mappings (dict, optional): Existing category mappings. If empty dict, we're in
+                                            training mode. If populated, we're in inference mode.
+    Returns:
+        tuple: (processed DataFrame, category mappings dictionary)
+    """
+    # Training mode
+    if category_mappings == {}:
+        for col in df.select_dtypes(include=["object", "string"]):
+            if col in features and df[col].nunique() < 20:
+                print(f"Training mode: Converting {col} to category")
+                df[col] = df[col].astype("category")
+                category_mappings[col] = df[col].cat.categories.tolist()  # Store category mappings
+    # Inference mode
+    else:
+        for col, categories in category_mappings.items():
+            if col in df.columns:
+                print(f"Inference mode: Applying categorical mapping for {col}")
+                df[col] = pd.Categorical(df[col], categories=categories)  # Apply consistent categorical mapping
+    return df, category_mappings
+def decompress_features(df: pd.DataFrame, features: List[str], compressed_features: List[str]) -> Tuple[pd.DataFrame, List[str]]:
+    """Prepare features for the XGBoost model
+    Args:
+        df (pd.DataFrame): The features DataFrame
+        features (List[str]): Full list of feature names
+        compressed_features (List[str]): List of feature names to decompress (bitstrings)
+    Returns:
+        pd.DataFrame: DataFrame with the decompressed features
+        List[str]: Updated list of feature names after decompression
+    Raises:
+        ValueError: If any missing values are found in the specified features
+    """
+    # Check for any missing values in the required features
+    missing_counts = df[features].isna().sum()
+    if missing_counts.any():
+        missing_features = missing_counts[missing_counts > 0]
+        print(
+            f"WARNING: Found missing values in features: {missing_features.to_dict()}. "
+            "WARNING: You might want to remove/replace all NaN values before processing."
+        )
+    # Decompress the specified compressed features
+    decompressed_features = features
+    for feature in compressed_features:
+        if (feature not in df.columns) or (feature not in features):
+            print(f"Feature '{feature}' not in the features list, skipping decompression.")
+            continue
+        # Remove the feature from the list of features to avoid duplication
+        decompressed_features.remove(feature)
+        # Handle all compressed features as bitstrings
+        bit_matrix = np.array([list(bitstring) for bitstring in df[feature]], dtype=np.uint8)
+        prefix = feature[:3]
+        # Create all new columns at once - avoids fragmentation
+        new_col_names = [f"{prefix}_{i}" for i in range(bit_matrix.shape[1])]
+        new_df = pd.DataFrame(bit_matrix, columns=new_col_names, index=df.index)
+        # Add to features list
+        decompressed_features.extend(new_col_names)
+        # Drop original column and concatenate new ones
+        df = df.drop(columns=[feature])
+        df = pd.concat([df, new_df], axis=1)
+    return df, decompressed_features
 if __name__ == "__main__":
     # Template Parameters
     id_column = TEMPLATE_PARAMS["id_column"]
-    features = TEMPLATE_PARAMS["features"]
     target = TEMPLATE_PARAMS["target"]
+    features = TEMPLATE_PARAMS["features"]
+    orig_features = features.copy()
+    compressed_features = TEMPLATE_PARAMS["compressed_features"]
     train_all_data = TEMPLATE_PARAMS["train_all_data"]
     track_columns = TEMPLATE_PARAMS["track_columns"]  # Can be None
     validation_split = 0.2
@@ -95,34 +179,51 @@ if __name__ == "__main__":
     )
     args = parser.parse_args()
-    # Load training data from the specified directory
+    # Read the training data into DataFrames
     training_files = [
         os.path.join(args.train, file)
-        for file in os.listdir(args.train) if file.endswith(".csv")
+        for file in os.listdir(args.train)
+        if file.endswith(".csv")
     ]
     print(f"Training Files: {training_files}")
     # Combine files and read them all into a single pandas dataframe
-    df = pd.concat([pd.read_csv(file, engine="python") for file in training_files])
+    all_df = pd.concat([pd.read_csv(file, engine="python") for file in training_files])
+    # Check if the dataframe is empty
+    check_dataframe(all_df, "training_df")
+    # Features/Target output
+    print(f"Target: {target}")
+    print(f"Features: {str(features)}")
-    # Check if the DataFrame is empty
-    check_dataframe(df, "training_df")
+    # Convert any features that might be categorical to 'category' type
+    all_df, category_mappings = convert_categorical_types(all_df, features)
-    # Training data split logic
+    # If we have compressed features, decompress them
+    if compressed_features:
+        print(f"Decompressing features {compressed_features}...")
+        all_df, features = decompress_features(all_df, features, compressed_features)
+    # Do we want to train on all the data?
     if train_all_data:
-        # Use all data for both training and validation
-        print("Training on all data...")
-        df_train = df.copy()
-        df_val = df.copy()
-    elif "training" in df.columns:
-        # Split data based on a 'training' column if it exists
-        print("Splitting data based on 'training' column...")
-        df_train = df[df["training"]].copy()
-        df_val = df[~df["training"]].copy()
+        print("Training on ALL of the data")
+        df_train = all_df.copy()
+        df_val = all_df.copy()
+    # Does the dataframe have a training column?
+    elif "training" in all_df.columns:
+        print("Found training column, splitting data based on training column")
+        df_train = all_df[all_df["training"]]
+        df_val = all_df[~all_df["training"]]
     else:
-        # Perform a random split if no 'training' column is found
-        print("Splitting data randomly...")
-        df_train, df_val = train_test_split(df, test_size=validation_split, random_state=42)
+        # Just do a random training Split
+        print("WARNING: No training column found, splitting data with random state=42")
+        df_train, df_val = train_test_split(
+            all_df, test_size=validation_split, random_state=42
+        )
+    print(f"FIT/TRAIN: {df_train.shape}")
+    print(f"VALIDATION: {df_val.shape}")
     # We're using XGBoost for point predictions and NGBoost for uncertainty quantification
     xgb_model = XGBRegressor()
@@ -130,18 +231,16 @@ if __name__ == "__main__":
     # Prepare features and targets for training
     X_train = df_train[features]
-    X_val = df_val[features]
+    X_validate = df_val[features]
     y_train = df_train[target]
-    y_val = df_val[target]
+    y_validate = df_val[target]
     # Train both models using the training data
     xgb_model.fit(X_train, y_train)
-    ngb_model.fit(X_train, y_train, X_val=X_val, Y_val=y_val)
+    ngb_model.fit(X_train, y_train, X_val=X_validate, Y_val=y_validate)
     # Make Predictions on the Validation Set
     print(f"Making Predictions on Validation Set...")
-    y_validate = df_val[target]
-    X_validate = df_val[features]
     preds = xgb_model.predict(X_validate)
     # Calculate various model performance metrics (regression)
@@ -159,9 +258,9 @@ if __name__ == "__main__":
     # Save the trained NGBoost model
     joblib.dump(ngb_model, os.path.join(args.model_dir, "ngb_model.joblib"))
-    # Save the feature list to validate input during predictions
+    # Save the features (this will validate input during predictions)
     with open(os.path.join(args.model_dir, "feature_columns.json"), "w") as fp:
-        json.dump(features, fp)
+        json.dump(orig_features, fp)  # We save the original features, not the decompressed ones
     # Now the Proximity model
     model = Proximity(df_train, id_column, features, target, track_columns=track_columns)
@@ -255,16 +354,27 @@ def predict_fn(df, models) -> pd.DataFrame:
     df["prediction_std"] = dist_params['scale']  # standard deviation
     # Add 95% prediction intervals using ppf (percent point function)
-    df["q_025"] = y_dists.ppf(0.025)  # 2.5th percentile
-    df["q_975"] = y_dists.ppf(0.975)  # 97.5th percentile
+    # Note: Our hybrid model uses XGB point prediction and NGBoost UQ
+    #  so we need to adjust the bounds to include the point prediction
+    df["q_025"] = np.minimum(y_dists.ppf(0.025), df["prediction"])
+    df["q_975"] = np.maximum(y_dists.ppf(0.975), df["prediction"])
+    # Add 90% prediction intervals
+    df["q_05"] = y_dists.ppf(0.05)  # 5th percentile
+    df["q_95"] = y_dists.ppf(0.95)  # 95th percentile
+    # Add 80% prediction intervals
+    df["q_10"] = y_dists.ppf(0.10)  # 10th percentile
+    df["q_90"] = y_dists.ppf(0.90)  # 90th percentile
     # Add 50% prediction intervals
-    df["q_25"] = y_dists.ppf(0.25)   # 25th percentile
-    df["q_75"] = y_dists.ppf(0.75)   # 75th percentile
+    df["q_25"] = y_dists.ppf(0.25)  # 25th percentile
+    df["q_75"] = y_dists.ppf(0.75)  # 75th percentile
-    # Adjust prediction intervals to include point predictions
-    df["q_025"] = df[["q_025", "prediction"]].min(axis=1)
-    df["q_975"] = df[["q_975", "prediction"]].max(axis=1)
+    # Reorder the quantile columns for easier reading
+    quantile_cols = ["q_025", "q_05", "q_10", "q_25", "q_75", "q_90", "q_95", "q_975"]
+    other_cols = [col for col in df.columns if col not in quantile_cols]
+    df = df[other_cols + quantile_cols]
     # Compute Nearest neighbors with Proximity model
     models["proximity"].neighbors(df)

workbench/model_scripts/custom_models/uq_models/ngboost.template CHANGED Viewed

@@ -219,9 +219,22 @@ def predict_fn(df, model) -> pd.DataFrame:
     df["q_025"] = y_dists.ppf(0.025)  # 2.5th percentile
     df["q_975"] = y_dists.ppf(0.975)  # 97.5th percentile
+    # Add 90% prediction intervals
+    df["q_05"] = y_dists.ppf(0.05)  # 5th percentile
+    df["q_95"] = y_dists.ppf(0.95)  # 95th percentile
+    # Add 80% prediction intervals
+    df["q_10"] = y_dists.ppf(0.10)  # 10th percentile
+    df["q_90"] = y_dists.ppf(0.90)  # 90th percentile
     # Add 50% prediction intervals
-    df["q_25"] = y_dists.ppf(0.25)   # 25th percentile
-    df["q_75"] = y_dists.ppf(0.75)   # 75th percentile
+    df["q_25"] = y_dists.ppf(0.25)  # 25th percentile
+    df["q_75"] = y_dists.ppf(0.75)  # 75th percentile
+    # Reorder the quantile columns for easier reading
+    quantile_cols = ["q_025", "q_05", "q_10", "q_25", "q_75", "q_90", "q_95", "q_975"]
+    other_cols = [col for col in df.columns if col not in quantile_cols]
+    df = df[other_cols + quantile_cols]
     # Return the modified DataFrame
     return df

workbench/model_scripts/xgb_model/generated_model_script.py CHANGED Viewed

@@ -28,12 +28,12 @@ from typing import List, Tuple
 # Template Parameters
 TEMPLATE_PARAMS = {
-    "model_type": "classifier",
-    "target_column": "class",
-    "features": ['bcut2d_logplow', 'numradicalelectrons', 'smr_vsa5', 'fr_lactam', 'fr_morpholine', 'fr_aldehyde', 'slogp_vsa1', 'fr_amidine', 'bpol', 'fr_ester', 'fr_azo', 'kappa3', 'peoe_vsa5', 'fr_ketone_topliss', 'vsa_estate9', 'estate_vsa9', 'bcut2d_mrhi', 'fr_ndealkylation1', 'numrotatablebonds', 'minestateindex', 'fr_quatn', 'peoe_vsa3', 'fr_epoxide', 'fr_aniline', 'minpartialcharge', 'fr_nitroso', 'fpdensitymorgan2', 'fr_oxime', 'fr_sulfone', 'smr_vsa1', 'kappa1', 'fr_pyridine', 'numaromaticrings', 'vsa_estate6', 'molmr', 'estate_vsa1', 'fr_dihydropyridine', 'vsa_estate10', 'fr_alkyl_halide', 'chi2n', 'fr_thiocyan', 'fpdensitymorgan1', 'fr_unbrch_alkane', 'slogp_vsa9', 'chi4n', 'fr_nitro_arom', 'fr_al_oh', 'fr_furan', 'fr_c_s', 'peoe_vsa8', 'peoe_vsa14', 'numheteroatoms', 'fr_ndealkylation2', 'maxabspartialcharge', 'vsa_estate2', 'peoe_vsa7', 'apol', 'numhacceptors', 'fr_tetrazole', 'vsa_estate1', 'peoe_vsa9', 'naromatom', 'bcut2d_chghi', 'fr_sh', 'fr_halogen', 'slogp_vsa4', 'fr_benzodiazepine', 'molwt', 'fr_isocyan', 'fr_prisulfonamd', 'maxabsestateindex', 'minabsestateindex', 'peoe_vsa11', 'slogp_vsa12', 'estate_vsa5', 'numaliphaticcarbocycles', 'bcut2d_mwlow', 'slogp_vsa7', 'fr_allylic_oxid', 'fr_methoxy', 'fr_nh0', 'fr_coo2', 'fr_phenol', 'nacid', 'nbase', 'chi3v', 'fr_ar_nh', 'fr_nitrile', 'fr_imidazole', 'fr_urea', 'bcut2d_mrlow', 'chi1', 'smr_vsa6', 'fr_aryl_methyl', 'narombond', 'fr_alkyl_carbamate', 'fr_piperzine', 'exactmolwt', 'qed', 'chi0n', 'fr_sulfonamd', 'fr_thiazole', 'numvalenceelectrons', 'fr_phos_acid', 'peoe_vsa12', 'fr_nh1', 'fr_hdrzine', 'fr_c_o_nocoo', 'fr_lactone', 'estate_vsa6', 'bcut2d_logphi', 'vsa_estate7', 'peoe_vsa13', 'numsaturatedcarbocycles', 'fr_nitro', 'fr_phenol_noorthohbond', 'rotratio', 'fr_barbitur', 'fr_isothiocyan', 'balabanj', 'fr_arn', 'fr_imine', 'maxpartialcharge', 'fr_sulfide', 'slogp_vsa11', 'fr_hoccn', 'fr_n_o', 'peoe_vsa1', 'slogp_vsa6', 'heavyatommolwt', 'fractioncsp3', 'estate_vsa8', 'peoe_vsa10', 'numaliphaticrings', 'fr_thiophene', 'maxestateindex', 'smr_vsa10', 'labuteasa', 'smr_vsa2', 'fpdensitymorgan3', 'smr_vsa9', 'slogp_vsa10', 'numaromaticheterocycles', 'fr_nh2', 'fr_diazo', 'chi3n', 'fr_ar_coo', 'slogp_vsa5', 'fr_bicyclic', 'fr_amide', 'estate_vsa10', 'fr_guanido', 'chi1n', 'numsaturatedrings', 'fr_piperdine', 'fr_term_acetylene', 'estate_vsa4', 'slogp_vsa3', 'fr_coo', 'fr_ether', 'estate_vsa7', 'bcut2d_chglo', 'fr_oxazole', 'peoe_vsa6', 'hallkieralpha', 'peoe_vsa2', 'chi2v', 'nocount', 'vsa_estate5', 'fr_nhpyrrole', 'fr_al_coo', 'bertzct', 'estate_vsa11', 'minabspartialcharge', 'slogp_vsa8', 'fr_imide', 'kappa2', 'numaliphaticheterocycles', 'numsaturatedheterocycles', 'fr_hdrzone', 'smr_vsa4', 'fr_ar_n', 'nrot', 'smr_vsa8', 'slogp_vsa2', 'chi4v', 'fr_phos_ester', 'fr_para_hydroxylation', 'smr_vsa3', 'nhohcount', 'estate_vsa2', 'mollogp', 'tpsa', 'fr_azide', 'peoe_vsa4', 'numhdonors', 'fr_al_oh_notert', 'fr_c_o', 'chi0', 'fr_nitro_arom_nonortho', 'vsa_estate3', 'fr_benzene', 'fr_ketone', 'vsa_estate8', 'smr_vsa7', 'fr_ar_oh', 'fr_priamide', 'ringcount', 'estate_vsa3', 'numaromaticcarbocycles', 'bcut2d_mwhi', 'chi1v', 'heavyatomcount', 'vsa_estate4', 'chi0v', 'pred_pka_reg'],
+    "model_type": "regressor",
+    "target": "udm_asy_res_intrinsic_clearance_ul_per_min_per_mg_protein",
+    "features": ['bcut2d_logplow', 'numradicalelectrons', 'smr_vsa5', 'fr_lactam', 'fr_morpholine', 'fr_aldehyde', 'slogp_vsa1', 'fr_amidine', 'bpol', 'fr_ester', 'fr_azo', 'kappa3', 'peoe_vsa5', 'fr_ketone_topliss', 'vsa_estate9', 'estate_vsa9', 'bcut2d_mrhi', 'fr_ndealkylation1', 'numrotatablebonds', 'minestateindex', 'fr_quatn', 'peoe_vsa3', 'fr_epoxide', 'fr_aniline', 'minpartialcharge', 'fr_nitroso', 'fpdensitymorgan2', 'fr_oxime', 'fr_sulfone', 'smr_vsa1', 'kappa1', 'fr_pyridine', 'numaromaticrings', 'vsa_estate6', 'molmr', 'estate_vsa1', 'fr_dihydropyridine', 'vsa_estate10', 'fr_alkyl_halide', 'chi2n', 'fr_thiocyan', 'fpdensitymorgan1', 'fr_unbrch_alkane', 'slogp_vsa9', 'chi4n', 'fr_nitro_arom', 'fr_al_oh', 'fr_furan', 'fr_c_s', 'peoe_vsa8', 'peoe_vsa14', 'numheteroatoms', 'fr_ndealkylation2', 'maxabspartialcharge', 'vsa_estate2', 'peoe_vsa7', 'apol', 'numhacceptors', 'fr_tetrazole', 'vsa_estate1', 'peoe_vsa9', 'naromatom', 'bcut2d_chghi', 'fr_sh', 'fr_halogen', 'slogp_vsa4', 'fr_benzodiazepine', 'molwt', 'fr_isocyan', 'fr_prisulfonamd', 'maxabsestateindex', 'minabsestateindex', 'peoe_vsa11', 'slogp_vsa12', 'estate_vsa5', 'numaliphaticcarbocycles', 'bcut2d_mwlow', 'slogp_vsa7', 'fr_allylic_oxid', 'fr_methoxy', 'fr_nh0', 'fr_coo2', 'fr_phenol', 'nacid', 'nbase', 'chi3v', 'fr_ar_nh', 'fr_nitrile', 'fr_imidazole', 'fr_urea', 'bcut2d_mrlow', 'chi1', 'smr_vsa6', 'fr_aryl_methyl', 'narombond', 'fr_alkyl_carbamate', 'fr_piperzine', 'exactmolwt', 'qed', 'chi0n', 'fr_sulfonamd', 'fr_thiazole', 'numvalenceelectrons', 'fr_phos_acid', 'peoe_vsa12', 'fr_nh1', 'fr_hdrzine', 'fr_c_o_nocoo', 'fr_lactone', 'estate_vsa6', 'bcut2d_logphi', 'vsa_estate7', 'peoe_vsa13', 'numsaturatedcarbocycles', 'fr_nitro', 'fr_phenol_noorthohbond', 'rotratio', 'fr_barbitur', 'fr_isothiocyan', 'balabanj', 'fr_arn', 'fr_imine', 'maxpartialcharge', 'fr_sulfide', 'slogp_vsa11', 'fr_hoccn', 'fr_n_o', 'peoe_vsa1', 'slogp_vsa6', 'heavyatommolwt', 'fractioncsp3', 'estate_vsa8', 'peoe_vsa10', 'numaliphaticrings', 'fr_thiophene', 'maxestateindex', 'smr_vsa10', 'labuteasa', 'smr_vsa2', 'fpdensitymorgan3', 'smr_vsa9', 'slogp_vsa10', 'numaromaticheterocycles', 'fr_nh2', 'fr_diazo', 'chi3n', 'fr_ar_coo', 'slogp_vsa5', 'fr_bicyclic', 'fr_amide', 'estate_vsa10', 'fr_guanido', 'chi1n', 'numsaturatedrings', 'fr_piperdine', 'fr_term_acetylene', 'estate_vsa4', 'slogp_vsa3', 'fr_coo', 'fr_ether', 'estate_vsa7', 'bcut2d_chglo', 'fr_oxazole', 'peoe_vsa6', 'hallkieralpha', 'peoe_vsa2', 'chi2v', 'nocount', 'vsa_estate5', 'fr_nhpyrrole', 'fr_al_coo', 'bertzct', 'estate_vsa11', 'minabspartialcharge', 'slogp_vsa8', 'fr_imide', 'kappa2', 'numaliphaticheterocycles', 'numsaturatedheterocycles', 'fr_hdrzone', 'smr_vsa4', 'fr_ar_n', 'nrot', 'smr_vsa8', 'slogp_vsa2', 'chi4v', 'fr_phos_ester', 'fr_para_hydroxylation', 'smr_vsa3', 'nhohcount', 'estate_vsa2', 'mollogp', 'tpsa', 'fr_azide', 'peoe_vsa4', 'numhdonors', 'fr_al_oh_notert', 'fr_c_o', 'chi0', 'fr_nitro_arom_nonortho', 'vsa_estate3', 'fr_benzene', 'fr_ketone', 'vsa_estate8', 'smr_vsa7', 'fr_ar_oh', 'fr_priamide', 'ringcount', 'estate_vsa3', 'numaromaticcarbocycles', 'bcut2d_mwhi', 'chi1v', 'heavyatomcount', 'vsa_estate4', 'chi0v'],
     "compressed_features": [],
-    "model_metrics_s3_path": "s3://ideaya-sageworks-bucket/models/sol-with-pka-class-100-test/training",
-    "train_all_data": True
+    "model_metrics_s3_path": "s3://ideaya-sageworks-bucket/models/temp-hlm-phase1-reg-0-80/training",
+    "train_all_data": False
 }
 # Function to check if dataframe is empty
@@ -88,13 +88,12 @@ def match_features_case_insensitive(df: pd.DataFrame, model_features: list) -> p
     """
     Matches and renames DataFrame columns to match model feature names (case-insensitive).
     Prioritizes exact matches, then case-insensitive matches.
     Raises ValueError if any model features cannot be matched.
     """
     df_columns_lower = {col.lower(): col for col in df.columns}
     rename_dict = {}
     missing = []
     for feature in model_features:
         if feature in df.columns:
             continue  # Exact match
@@ -102,10 +101,11 @@ def match_features_case_insensitive(df: pd.DataFrame, model_features: list) -> p
             rename_dict[df_columns_lower[feature.lower()]] = feature
         else:
             missing.append(feature)
     if missing:
         raise ValueError(f"Features not found: {missing}")
+    # Rename the DataFrame columns to match the model features
     return df.rename(columns=rename_dict)
@@ -197,7 +197,7 @@ if __name__ == "__main__":
     """The main function is for training the XGBoost model"""
     # Harness Template Parameters
-    target = TEMPLATE_PARAMS["target_column"]
+    target = TEMPLATE_PARAMS["target"]
     features = TEMPLATE_PARAMS["features"]
     orig_features = features.copy()
     compressed_features = TEMPLATE_PARAMS["compressed_features"]
@@ -390,7 +390,7 @@ def input_fn(input_data, content_type):
     """Parse input data and return a DataFrame."""
     if not input_data:
         raise ValueError("Empty input data is not supported!")
     # Decode bytes to string if necessary
     if isinstance(input_data, bytes):
         input_data = input_data.decode("utf-8")

workbench/model_scripts/xgb_model/xgb_model.template CHANGED Viewed

@@ -29,7 +29,7 @@ from typing import List, Tuple
 # Template Parameters
 TEMPLATE_PARAMS = {
     "model_type": "{{model_type}}",
-    "target_column": "{{target_column}}",
+    "target": "{{target_column}}",
     "features": "{{feature_list}}",
     "compressed_features": "{{compressed_features}}",
     "model_metrics_s3_path": "{{model_metrics_s3_path}}",
@@ -88,13 +88,12 @@ def match_features_case_insensitive(df: pd.DataFrame, model_features: list) -> p
     """
     Matches and renames DataFrame columns to match model feature names (case-insensitive).
     Prioritizes exact matches, then case-insensitive matches.
     Raises ValueError if any model features cannot be matched.
     """
     df_columns_lower = {col.lower(): col for col in df.columns}
     rename_dict = {}
     missing = []
     for feature in model_features:
         if feature in df.columns:
             continue  # Exact match
@@ -102,10 +101,11 @@ def match_features_case_insensitive(df: pd.DataFrame, model_features: list) -> p
             rename_dict[df_columns_lower[feature.lower()]] = feature
         else:
             missing.append(feature)
     if missing:
         raise ValueError(f"Features not found: {missing}")
+    # Rename the DataFrame columns to match the model features
     return df.rename(columns=rename_dict)
@@ -197,7 +197,7 @@ if __name__ == "__main__":
     """The main function is for training the XGBoost model"""
     # Harness Template Parameters
-    target = TEMPLATE_PARAMS["target_column"]
+    target = TEMPLATE_PARAMS["target"]
     features = TEMPLATE_PARAMS["features"]
     orig_features = features.copy()
     compressed_features = TEMPLATE_PARAMS["compressed_features"]
@@ -390,7 +390,7 @@ def input_fn(input_data, content_type):
     """Parse input data and return a DataFrame."""
     if not input_data:
         raise ValueError("Empty input data is not supported!")
     # Decode bytes to string if necessary
     if isinstance(input_data, bytes):
         input_data = input_data.decode("utf-8")

workbench/scripts/{ml_pipeline_launcher.py → ml_pipeline_batch.py} RENAMED Viewed

@@ -76,7 +76,7 @@ def run_batch_job(script_path: str, size: str = "small") -> int:
     response = batch.submit_job(
         jobName=job_name,
         jobQueue="workbench-job-queue",
-        jobDefinition=f"workbench-ml-pipeline-{size}",
+        jobDefinition=f"workbench-batch-{size}",
         containerOverrides={
             "environment": [
                 {"name": "ML_PIPELINE_S3_PATH", "value": s3_path},

workbench/scripts/ml_pipeline_sqs.py ADDED Viewed

@@ -0,0 +1,139 @@
+import argparse
+import logging
+import json
+from pathlib import Path
+# Workbench Imports
+from workbench.core.cloud_platform.aws.aws_account_clamp import AWSAccountClamp
+from workbench.utils.config_manager import ConfigManager
+from workbench.utils.s3_utils import upload_content_to_s3
+log = logging.getLogger("workbench")
+cm = ConfigManager()
+workbench_bucket = cm.get_config("WORKBENCH_BUCKET")
+def submit_to_sqs(script_path: str, size: str = "small") -> None:
+    """
+    Upload script to S3 and submit message to SQS queue for processing.
+    Args:
+        script_path: Local path to the ML pipeline script
+        size: Job size tier - "small" (default), "medium", or "large"
+    """
+    print(f"\n{'=' * 60}")
+    print("🚀  SUBMITTING ML PIPELINE JOB")
+    print(f"{'=' * 60}")
+    if size not in ["small", "medium", "large"]:
+        raise ValueError(f"Invalid size '{size}'. Must be 'small', 'medium', or 'large'")
+    # Validate script exists
+    script_file = Path(script_path)
+    if not script_file.exists():
+        raise FileNotFoundError(f"Script not found: {script_path}")
+    print(f"📄  Script: {script_file.name}")
+    print(f"📏  Size tier: {size}")
+    print(f"🪣  Bucket: {workbench_bucket}")
+    sqs = AWSAccountClamp().boto3_session.client("sqs")
+    script_name = script_file.name
+    # List Workbench queues
+    print("\n📋  Listing Workbench SQS queues...")
+    try:
+        queues = sqs.list_queues(QueueNamePrefix="workbench-")
+        queue_urls = queues.get("QueueUrls", [])
+        if queue_urls:
+            print(f"✅  Found {len(queue_urls)} workbench queue(s):")
+            for url in queue_urls:
+                queue_name = url.split("/")[-1]
+                print(f"   • {queue_name}")
+        else:
+            print("⚠️  No workbench queues found")
+    except Exception as e:
+        print(f"❌  Error listing queues: {e}")
+    # Upload script to S3
+    s3_path = f"s3://{workbench_bucket}/batch-jobs/{script_name}"
+    print("\n📤  Uploading script to S3...")
+    print(f"   Source: {script_path}")
+    print(f"   Destination: {s3_path}")
+    try:
+        upload_content_to_s3(script_file.read_text(), s3_path)
+        print("✅  Script uploaded successfully")
+    except Exception as e:
+        print(f"❌  Upload failed: {e}")
+        raise
+    # Get queue URL and info
+    queue_name = "workbench-ml-pipeline-queue.fifo"
+    print("\n🎯  Getting queue information...")
+    print(f"   Queue name: {queue_name}")
+    try:
+        queue_url = sqs.get_queue_url(QueueName=queue_name)["QueueUrl"]
+        print(f"   Queue URL: {queue_url}")
+        # Get queue attributes for additional info
+        attrs = sqs.get_queue_attributes(
+            QueueUrl=queue_url, AttributeNames=["ApproximateNumberOfMessages", "ApproximateNumberOfMessagesNotVisible"]
+        )
+        messages_available = attrs["Attributes"].get("ApproximateNumberOfMessages", "0")
+        messages_in_flight = attrs["Attributes"].get("ApproximateNumberOfMessagesNotVisible", "0")
+        print(f"   Messages in queue: {messages_available}")
+        print(f"   Messages in flight: {messages_in_flight}")
+    except Exception as e:
+        print(f"❌  Error accessing queue: {e}")
+        raise
+    # Prepare message
+    message = {"script_path": s3_path, "size": size}
+    print("\n📨  Sending message to SQS...")
+    # Send the message to SQS
+    try:
+        response = sqs.send_message(
+            QueueUrl=queue_url,
+            MessageBody=json.dumps(message, indent=2),
+            MessageGroupId="ml-pipeline-jobs",  # Required for FIFO
+        )
+        message_id = response["MessageId"]
+        print("✅  Message sent successfully!")
+        print(f"   Message ID: {message_id}")
+    except Exception as e:
+        print(f"❌  Failed to send message: {e}")
+        raise
+    # Success summary
+    print(f"\n{'=' * 60}")
+    print("✅  JOB SUBMISSION COMPLETE")
+    print(f"{'=' * 60}")
+    print(f"📄  Script: {script_name}")
+    print(f"📏  Size: {size}")
+    print(f"🆔  Message ID: {message_id}")
+    print("\n🔍  MONITORING LOCATIONS:")
+    print(f"   • SQS Queue: AWS Console → SQS → {queue_name}")
+    print("   • Lambda Logs: AWS Console → Lambda → Functions")
+    print("   • Batch Jobs: AWS Console → Batch → Jobs")
+    print("   • CloudWatch: AWS Console → CloudWatch → Log groups")
+    print("\n⏳  Your job should start processing soon...")
+def main():
+    """CLI entry point for submitting ML pipelines via SQS."""
+    parser = argparse.ArgumentParser(description="Submit ML pipeline to SQS queue for Batch processing")
+    parser.add_argument("script_file", help="Local path to ML pipeline script")
+    parser.add_argument(
+        "--size", default="small", choices=["small", "medium", "large"], help="Job size tier (default: small)"
+    )
+    args = parser.parse_args()
+    try:
+        submit_to_sqs(args.script_file, args.size)
+    except Exception as e:
+        print(f"\n❌  ERROR: {e}")
+        log.error(f"Error: {e}")
+        exit(1)
+if __name__ == "__main__":
+    main()

workbench/utils/model_utils.py CHANGED Viewed

@@ -220,6 +220,8 @@ def uq_metrics(df: pd.DataFrame, target_col: str) -> Dict[str, Any]:
     # --- Coverage and Interval Width ---
     if "q_025" in df.columns and "q_975" in df.columns:
         lower_95, upper_95 = df["q_025"], df["q_975"]
+        lower_90, upper_90 = df["q_05"], df["q_95"]
+        lower_80, upper_80 = df["q_10"], df["q_90"]
         lower_50, upper_50 = df["q_25"], df["q_75"]
     elif "prediction_std" in df.columns:
         lower_95 = df["prediction"] - 1.96 * df["prediction_std"]
@@ -231,8 +233,12 @@ def uq_metrics(df: pd.DataFrame, target_col: str) -> Dict[str, Any]:
             "Either quantile columns (q_025, q_975, q_25, q_75) or 'prediction_std' column must be present."
         )
     coverage_95 = np.mean((df[target_col] >= lower_95) & (df[target_col] <= upper_95))
+    coverage_90 = np.mean((df[target_col] >= lower_90) & (df[target_col] <= upper_90))
+    coverage_80 = np.mean((df[target_col] >= lower_80) & (df[target_col] <= upper_80))
     coverage_50 = np.mean((df[target_col] >= lower_50) & (df[target_col] <= upper_50))
     avg_width_95 = np.mean(upper_95 - lower_95)
+    avg_width_90 = np.mean(upper_90 - lower_90)
+    avg_width_80 = np.mean(upper_80 - lower_80)
     avg_width_50 = np.mean(upper_50 - lower_50)
     # --- CRPS (measures calibration + sharpness) ---
@@ -260,6 +266,8 @@ def uq_metrics(df: pd.DataFrame, target_col: str) -> Dict[str, Any]:
     # Collect results
     results = {
         "coverage_95": coverage_95,
+        "coverage_90": coverage_90,
+        "coverage_80": coverage_80,
         "coverage_50": coverage_50,
         "avg_width_95": avg_width_95,
         "avg_width_50": avg_width_50,
@@ -271,8 +279,12 @@ def uq_metrics(df: pd.DataFrame, target_col: str) -> Dict[str, Any]:
     print("\n=== UQ Metrics ===")
     print(f"Coverage @ 95%: {coverage_95:.3f} (target: 0.95)")
+    print(f"Coverage @ 90%: {coverage_90:.3f} (target: 0.90)")
+    print(f"Coverage @ 80%: {coverage_80:.3f} (target: 0.80)")
     print(f"Coverage @ 50%: {coverage_50:.3f} (target: 0.50)")
     print(f"Average 95% Width: {avg_width_95:.3f}")
+    print(f"Average 90% Width: {avg_width_90:.3f}")
+    print(f"Average 80% Width: {avg_width_80:.3f}")
     print(f"Average 50% Width: {avg_width_50:.3f}")
     print(f"CRPS: {mean_crps:.3f} (lower is better)")
     print(f"Interval Score 95%: {mean_is_95:.3f} (lower is better)")

workbench/web_interface/components/plugins/dashboard_status.py CHANGED Viewed

@@ -72,7 +72,9 @@ class DashboardStatus(PluginInterface):
             details = "**Redis:** 🔴 Failed to Connect<br>"
         # Fill in the license details
-        details += f"**Redis Server:** {config_info['REDIS_HOST']}:{config_info.get('REDIS_PORT', 6379)}<br>"
+        redis_host = config_info.get("REDIS_HOST", "NOT SET")
+        redis_port = config_info.get("REDIS_PORT", "NOT SET")
+        details += f"**Redis Server:** {redis_host}:{redis_port}<br>"
         details += f"**Workbench S3 Bucket:** {config_info['WORKBENCH_BUCKET']}<br>"
         details += f"**Plugin Path:** {config_info.get('WORKBENCH_PLUGINS', 'unknown')}<br>"
         details += f"**Themes Path:** {config_info.get('WORKBENCH_THEMES', 'unknown')}<br>"

{workbench-0.8.170.dist-info → workbench-0.8.171.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: workbench
-Version: 0.8.170
+Version: 0.8.171
 Summary: Workbench: A Dashboard and Python API for creating and deploying AWS SageMaker Model Pipelines
 Author-email: SuperCowPowers LLC <support@supercowpowers.com>
 License-Expression: MIT

{workbench-0.8.170.dist-info → workbench-0.8.171.dist-info}/RECORD RENAMED Viewed

@@ -62,7 +62,7 @@ workbench/core/cloud_platform/aws/README.md,sha256=QT5IQXoUHbIA0qQ2wO6_2P2lYjYQF
 workbench/core/cloud_platform/aws/aws_account_clamp.py,sha256=OzFknZXKW7VTvnDGGX4BXKoh0i1gQ7yaEBhkLCyHFSs,6310
 workbench/core/cloud_platform/aws/aws_df_store.py,sha256=utRIlTCPwFneHHZ8_Z3Hw3rOJSeryiFA4wBtucxULRQ,15055
 workbench/core/cloud_platform/aws/aws_graph_store.py,sha256=ytYxQTplUmeWbsPmxyZbf6mO9qyTl60ewlJG8MyfyEY,9414
-workbench/core/cloud_platform/aws/aws_meta.py,sha256=xpidYpDydgWmKmJPrNFWbggahDY-nRXzXTRaEA3c5Sc,34587
+workbench/core/cloud_platform/aws/aws_meta.py,sha256=eY9Pn6pl2yAyseACFb2nitR-0vLwG4i8CSEXe8Iaswc,34778
 workbench/core/cloud_platform/aws/aws_parameter_store.py,sha256=9ekuMOQFHFMIEV68UbHhS_fLB9iqG5Hvu4EV6iamEpk,10400
 workbench/core/cloud_platform/aws/aws_secrets_manager.py,sha256=TUnddp1gX-OwxJ_oO5ONh7OI4Z2HC_6euGkJ-himCCk,8615
 workbench/core/cloud_platform/aws/aws_session.py,sha256=2Gc_k4Q87BBeQDgXgVR-w-qmsF6ncZR8wvTeNnixM6k,6926
@@ -139,10 +139,10 @@ workbench/model_scripts/custom_models/uq_models/Readme.md,sha256=UVpL-lvtTrLqwBe
 workbench/model_scripts/custom_models/uq_models/bayesian_ridge.template,sha256=U4LIlpp8Rbu3apyzPR7-55lvlutpTsCro_PUvQ5pklY,6457
 workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template,sha256=0IJnSBACQ556ldEiPqR7yPCOOLJs1hQhHmPBvB2d9tY,13491
 workbench/model_scripts/custom_models/uq_models/gaussian_process.template,sha256=QbDUfkiPCwJ-c-4Twgu4utZuYZaAyeW_3T1IP-_tutw,6683
-workbench/model_scripts/custom_models/uq_models/generated_model_script.py,sha256=l74VibzFnhmPeNUEiFwIIg5aNujcCs9LtRywUvc5Avo,14528
+workbench/model_scripts/custom_models/uq_models/generated_model_script.py,sha256=QsMivNf77m4XfrV9aYTB7K3vI-InwegD7gyLZFNQmF4,17170
 workbench/model_scripts/custom_models/uq_models/mapie_xgb.template,sha256=ZTmerwkmXtewJwx3GGJSdLRyzJV5SJ86PvCu3dV_GHw,7330
-workbench/model_scripts/custom_models/uq_models/meta_uq.template,sha256=26FNangcpyV9nFOIufRuVZ45BQv6oPf9xlJZkVIULG4,9287
-workbench/model_scripts/custom_models/uq_models/ngboost.template,sha256=N-eWP967-X2Qbvk18VL7LPXRJMKne9SS2fb_jntwTec,7738
+workbench/model_scripts/custom_models/uq_models/meta_uq.template,sha256=FqLLbuKMijd4DjmxuBBQN3_vZcbl8WF0BZU8HRK48_0,13977
+workbench/model_scripts/custom_models/uq_models/ngboost.template,sha256=9-O6P-SW50ul5Wl6es2DMWXSbrwOg7HWsdc8Qdln0MM,8278
 workbench/model_scripts/custom_models/uq_models/proximity.py,sha256=zqmNlX70LnWXr5fdtFFQppSNTLjlOciQVrjGr-g9jRE,13716
 workbench/model_scripts/custom_models/uq_models/requirements.txt,sha256=jfwV5b1t6BFtdaRGrSz8LnuQzJm-4V5OlhhP-4CGxhs,107
 workbench/model_scripts/custom_script_example/custom_model_script.py,sha256=T8aydawgRVAdSlDimoWpXxG2YuWWQkbcjBVjAeSG2_0,6408
@@ -158,16 +158,17 @@ workbench/model_scripts/quant_regression/requirements.txt,sha256=jWlGc7HH7vqyukT
 workbench/model_scripts/scikit_learn/generated_model_script.py,sha256=c73ZpJBlU5k13Nx-ZDkLXu7da40CYyhwjwwmuPq6uLg,12870
 workbench/model_scripts/scikit_learn/requirements.txt,sha256=aVvwiJ3LgBUhM_PyFlb2gHXu_kpGPho3ANBzlOkfcvs,107
 workbench/model_scripts/scikit_learn/scikit_learn.template,sha256=d4pgeZYFezUQsB-7iIsjsUgB1FM6d27651wpfDdXmI0,12640
-workbench/model_scripts/xgb_model/generated_model_script.py,sha256=IITiaNcB7kqQtBCTvTbWwCb-vAKNeJsbyxBB691sU8U,21091
+workbench/model_scripts/xgb_model/generated_model_script.py,sha256=nU9BLU0wIhK066HAgChgNLcuOM94vBqweoH8xB8wBeo,21152
 workbench/model_scripts/xgb_model/requirements.txt,sha256=jWlGc7HH7vqyukTm38LN4EyDi8jDUPEay4n45z-30uc,104
-workbench/model_scripts/xgb_model/xgb_model.template,sha256=RaUr8X6al5R2IILNKgGUH05Gb4H7AFFG9RE524_VH7Q,17935
+workbench/model_scripts/xgb_model/xgb_model.template,sha256=HViJRsMWn393hP8VJRS45UQBzUVBhwR5sKc8Ern-9f4,17963
 workbench/repl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 workbench/repl/workbench_shell.py,sha256=eJ3rpYgEwZjhrVVCaJHht2N5BrimN6mbxqHXGrJmwC8,22130
 workbench/resources/open_source_api.key,sha256=3S0OTblsmC0msUPdE_dbBmI83xJNmYscuwLJ57JmuOc,433
 workbench/resources/signature_verify_pub.pem,sha256=V3-u-3_z2PH-805ybkKvzDOBwAbvHxcKn0jLBImEtzM,272
 workbench/scripts/check_double_bond_stereo.py,sha256=p5hnL54Weq77ES0HCELq9JeoM-PyUGkvVSeWYF2dKyo,7776
 workbench/scripts/glue_launcher.py,sha256=bIKQvfGxpAhzbeNvTnHfRW_5kQhY-169_868ZnCejJk,10692
-workbench/scripts/ml_pipeline_launcher.py,sha256=fjI35SXi9CDSQ6Lan7qGcLAHkVCDioyhbPlo0eDHDxQ,4913
+workbench/scripts/ml_pipeline_batch.py,sha256=1T5JnLlUJR7bwAGBLHmLPOuj1xFRqVIQX8PsuDhHy8o,4907
+workbench/scripts/ml_pipeline_sqs.py,sha256=7w67UUuZNYnxXiZG48gpoEFbH-c_cUfjMg0FgWI0DbQ,5100
 workbench/scripts/monitor_cloud_watch.py,sha256=s7MY4bsHts0nup9G0lWESCvgJZ9Mw1Eo-c8aKRgLjMw,9235
 workbench/scripts/redis_expire.py,sha256=DxI_RKSNlrW2BsJZXcsSbaWGBgPZdPhtzHjV9SUtElE,1120
 workbench/scripts/redis_report.py,sha256=iaJSuGPyLCs6e0TMcZDoT0YyJ43xJ1u74YD8FLnnUg4,990
@@ -219,7 +220,7 @@ workbench/utils/lambda_utils.py,sha256=7GhGRPyXn9o-toWb9HBGSnI8-DhK9YRkwhCSk_mNK
 workbench/utils/license_manager.py,sha256=sDuhk1mZZqUbFmnuFXehyGnui_ALxrmYBg7gYwoo7ho,6975
 workbench/utils/log_utils.py,sha256=7n1NJXO_jUX82e6LWAQug6oPo3wiPDBYsqk9gsYab_A,3167
 workbench/utils/markdown_utils.py,sha256=4lEqzgG4EVmLcvvKKNUwNxVCySLQKJTJmWDiaDroI1w,8306
-workbench/utils/model_utils.py,sha256=YV_OPdRXabte9Zo8v9igs4kW8s6eCngtvapa9jY6X_k,11264
+workbench/utils/model_utils.py,sha256=S_fGnYucuOH5YfNviH-K85hUjSh1zFRCIjuduax7rvU,11940
 workbench/utils/monitor_utils.py,sha256=ywoEdqoHY9t5PYRstjitS_halEWO6veCL_06BekmMVo,9153
 workbench/utils/pandas_utils.py,sha256=LQTfZ3WJkg3rIahNJhsz1YV2y_0DBG94lO-KMmEY1g0,39325
 workbench/utils/performance_utils.py,sha256=WDNvz-bOdC99cDuXl0urAV4DJ7alk_V3yzKPwvqgST4,1329
@@ -256,7 +257,7 @@ workbench/web_interface/components/experiments/dashboard_metric_plots.py,sha256=
 workbench/web_interface/components/experiments/outlier_plot.py,sha256=5bWsmJEXyt50npeQxLHXCPtiq4WRVgg938Sl0DVjNWg,3647
 workbench/web_interface/components/plugins/ag_table.py,sha256=HrPOMotlOGigk0v8Cxx_doSHXdOKTT1-bzlsqDwwzng,3979
 workbench/web_interface/components/plugins/confusion_matrix.py,sha256=1K94JSlDwQwdf5uDYVydQzY-EQm89hYXchxbXoNvons,7176
-workbench/web_interface/components/plugins/dashboard_status.py,sha256=8Tu38lR5YgntxDjz_x2XfLiW7SOdreNLOFT5VkbYzKo,5748
+workbench/web_interface/components/plugins/dashboard_status.py,sha256=4plmoiXj3dDjoQerUNpep_jfk50pI9rHvcoSP20UbE8,5832
 workbench/web_interface/components/plugins/data_details.py,sha256=pZm1AbM_0EXQwx77qUkfyrU9MedAs4Wlkp6iOtSrUtI,11104
 workbench/web_interface/components/plugins/endpoint_details.py,sha256=0A7g_Lx5-3XnDWOGT3YEDPNpmME_-WfYc65f-rRVjJE,3769
 workbench/web_interface/components/plugins/generated_compounds.py,sha256=hC0sh-1_rbN55Huno-E_2wF37kgIHi5Mtaer6Xk5fRM,8052
@@ -276,9 +277,9 @@ workbench/web_interface/page_views/main_page.py,sha256=X4-KyGTKLAdxR-Zk2niuLJB2Y
 workbench/web_interface/page_views/models_page_view.py,sha256=M0bdC7bAzLyIaE2jviY12FF4abdMFZmg6sFuOY_LaGI,2650
 workbench/web_interface/page_views/page_view.py,sha256=Gh6YnpOGlUejx-bHZAf5pzqoQ1H1R0OSwOpGhOBO06w,455
 workbench/web_interface/page_views/pipelines_page_view.py,sha256=v2pxrIbsHBcYiblfius3JK766NZ7ciD2yPx0t3E5IJo,2656
-workbench-0.8.170.dist-info/licenses/LICENSE,sha256=z4QMMPlLJkZjU8VOKqJkZiQZCEZ--saIU2Z8-p3aVc0,1080
-workbench-0.8.170.dist-info/METADATA,sha256=GbS745jAMPDykgLqfLcwjb9nRnczT-uV9Q11GbPBAX8,9210
-workbench-0.8.170.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-workbench-0.8.170.dist-info/entry_points.txt,sha256=V_v6hQ4DYoCJnTnqbm036reCri_CXkA_ONcRSuF5OKg,305
-workbench-0.8.170.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
-workbench-0.8.170.dist-info/RECORD,,
+workbench-0.8.171.dist-info/licenses/LICENSE,sha256=z4QMMPlLJkZjU8VOKqJkZiQZCEZ--saIU2Z8-p3aVc0,1080
+workbench-0.8.171.dist-info/METADATA,sha256=cLYIPKqidwQU6U3CIprMiMImJm8hwvKBAJBXGck_Aqo,9210
+workbench-0.8.171.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+workbench-0.8.171.dist-info/entry_points.txt,sha256=zPFPruY9uayk8-wsKrhfnIyIB6jvZOW_ibyllEIsLWo,356
+workbench-0.8.171.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
+workbench-0.8.171.dist-info/RECORD,,

{workbench-0.8.170.dist-info → workbench-0.8.171.dist-info}/entry_points.txt RENAMED Viewed

@@ -1,6 +1,7 @@
 [console_scripts]
 cloud_watch = workbench.scripts.monitor_cloud_watch:main
 glue_launcher = workbench.scripts.glue_launcher:main
-ml_pipeline_launcher = workbench.scripts.ml_pipeline_launcher:main
+ml_pipeline_batch = workbench.scripts.ml_pipeline_batch:main
+ml_pipeline_sqs = workbench.scripts.ml_pipeline_sqs:main
 workbench = workbench.repl.workbench_shell:launch_shell
 workbench_config = workbench.scripts.show_config:main

{workbench-0.8.170.dist-info → workbench-0.8.171.dist-info}/WHEEL RENAMED Viewed

File without changes

{workbench-0.8.170.dist-info → workbench-0.8.171.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{workbench-0.8.170.dist-info → workbench-0.8.171.dist-info}/top_level.txt RENAMED Viewed

File without changes

workbench 0.8.170__py3-none-any.whl → 0.8.171__py3-none-any.whl

Potentially problematic release.

workbench 0.8.170py3-none-any.whl → 0.8.171py3-none-any.whl