PyPI - workbench - Versions diffs - 0.8.217__py3-none-any.whl → 0.8.219__py3-none-any.whl - Mend

workbench 0.8.217py3-none-any.whl → 0.8.219py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

workbench/utils/chem_utils/fingerprints.py CHANGED Viewed

@@ -1,11 +1,19 @@
-"""Molecular fingerprint computation utilities"""
+"""Molecular fingerprint computation utilities for ADMET modeling.
+This module provides Morgan count fingerprints, the standard for ADMET prediction.
+Count fingerprints outperform binary fingerprints for molecular property prediction.
+References:
+    - Count vs Binary: https://pubs.acs.org/doi/10.1021/acs.est.3c02198
+    - ECFP/Morgan: https://pubs.acs.org/doi/10.1021/ci100050t
+"""
 import logging
-import pandas as pd
-# Molecular Descriptor Imports
+import numpy as np
+import pandas as pd
 from rdkit import Chem, RDLogger
-from rdkit.Chem import rdFingerprintGenerator
+from rdkit.Chem import AllChem
 from rdkit.Chem.MolStandardize import rdMolStandardize
 # Suppress RDKit warnings (e.g., "not removing hydrogen atom without neighbors")
@@ -16,20 +24,25 @@ RDLogger.DisableLog("rdApp.warning")
 log = logging.getLogger("workbench")
-def compute_morgan_fingerprints(df: pd.DataFrame, radius=2, n_bits=2048, counts=True) -> pd.DataFrame:
-    """Compute and add Morgan fingerprints to the DataFrame.
+def compute_morgan_fingerprints(df: pd.DataFrame, radius: int = 2, n_bits: int = 2048) -> pd.DataFrame:
+    """Compute Morgan count fingerprints for ADMET modeling.
+    Generates true count fingerprints where each bit position contains the
+    number of times that substructure appears in the molecule (clamped to 0-255).
+    This is the recommended approach for ADMET prediction per 2025 research.
     Args:
-        df (pd.DataFrame): Input DataFrame containing SMILES strings.
-        radius (int): Radius for the Morgan fingerprint.
-        n_bits (int): Number of bits for the fingerprint.
-        counts (bool): Count simulation for the fingerprint.
+        df: Input DataFrame containing SMILES strings.
+        radius: Radius for the Morgan fingerprint (default 2 = ECFP4 equivalent).
+        n_bits: Number of bits for the fingerprint (default 2048).
     Returns:
-        pd.DataFrame: The input DataFrame with the Morgan fingerprints added as bit strings.
+        pd.DataFrame: Input DataFrame with 'fingerprint' column added.
+                      Values are comma-separated uint8 counts.
     Note:
-        See: https://greglandrum.github.io/rdkit-blog/posts/2021-07-06-simulating-counts.html
+        Count fingerprints outperform binary for ADMET prediction.
+        See: https://pubs.acs.org/doi/10.1021/acs.est.3c02198
     """
     delete_mol_column = False
@@ -43,7 +56,7 @@ def compute_morgan_fingerprints(df: pd.DataFrame, radius=2, n_bits=2048, counts=
         log.warning("Detected serialized molecules in 'molecule' column. Removing...")
         del df["molecule"]
-    # Convert SMILES to RDKit molecule objects (vectorized)
+    # Convert SMILES to RDKit molecule objects
     if "molecule" not in df.columns:
         log.info("Converting SMILES to RDKit Molecules...")
         delete_mol_column = True
@@ -59,15 +72,24 @@ def compute_morgan_fingerprints(df: pd.DataFrame, radius=2, n_bits=2048, counts=
         lambda mol: rdMolStandardize.LargestFragmentChooser().choose(mol) if mol else None
     )
-    # Create a Morgan fingerprint generator
-    if counts:
-        n_bits *= 4  # Multiply by 4 to simulate counts
-    morgan_generator = rdFingerprintGenerator.GetMorganGenerator(radius=radius, fpSize=n_bits, countSimulation=counts)
+    def mol_to_count_string(mol):
+        """Convert molecule to comma-separated count fingerprint string."""
+        if mol is None:
+            return pd.NA
-    # Compute Morgan fingerprints (vectorized)
-    fingerprints = largest_frags.apply(
-        lambda mol: (morgan_generator.GetFingerprint(mol).ToBitString() if mol else pd.NA)
-    )
+        # Get hashed Morgan fingerprint with counts
+        fp = AllChem.GetHashedMorganFingerprint(mol, radius, nBits=n_bits)
+        # Initialize array and populate with counts (clamped to uint8 range)
+        counts = np.zeros(n_bits, dtype=np.uint8)
+        for idx, count in fp.GetNonzeroElements().items():
+            counts[idx] = min(count, 255)
+        # Return as comma-separated string
+        return ",".join(map(str, counts))
+    # Compute Morgan count fingerprints
+    fingerprints = largest_frags.apply(mol_to_count_string)
     # Add the fingerprints to the DataFrame
     df["fingerprint"] = fingerprints
@@ -75,59 +97,62 @@ def compute_morgan_fingerprints(df: pd.DataFrame, radius=2, n_bits=2048, counts=
     # Drop the intermediate 'molecule' column if it was added
     if delete_mol_column:
         del df["molecule"]
     return df
 if __name__ == "__main__":
-    print("Running molecular fingerprint tests...")
-    print("Note: This requires molecular_screening module to be available")
+    print("Running Morgan count fingerprint tests...")
     # Test molecules
     test_molecules = {
         "aspirin": "CC(=O)OC1=CC=CC=C1C(=O)O",
         "caffeine": "CN1C=NC2=C1C(=O)N(C(=O)N2C)C",
         "glucose": "C([C@@H]1[C@H]([C@@H]([C@H](C(O1)O)O)O)O)O",  # With stereochemistry
-        "sodium_acetate": "CC(=O)[O-].[Na+]",  # Salt
+        "sodium_acetate": "CC(=O)[O-].[Na+]",  # Salt (largest fragment used)
         "benzene": "c1ccccc1",
         "butene_e": "C/C=C/C",  # E-butene
         "butene_z": "C/C=C\\C",  # Z-butene
     }
-    # Test 1: Morgan Fingerprints
-    print("\n1. Testing Morgan fingerprint generation...")
+    # Test 1: Morgan Count Fingerprints (default parameters)
+    print("\n1. Testing Morgan fingerprint generation (radius=2, n_bits=2048)...")
     test_df = pd.DataFrame({"SMILES": list(test_molecules.values()), "name": list(test_molecules.keys())})
-    fp_df = compute_morgan_fingerprints(test_df.copy(), radius=2, n_bits=512, counts=False)
+    fp_df = compute_morgan_fingerprints(test_df.copy())
     print("   Fingerprint generation results:")
     for _, row in fp_df.iterrows():
         fp = row.get("fingerprint", "N/A")
-        fp_len = len(fp) if fp != "N/A" else 0
-        print(f"   {row['name']:15} → {fp_len} bits")
+        if pd.notna(fp):
+            counts = [int(x) for x in fp.split(",")]
+            non_zero = sum(1 for c in counts if c > 0)
+            max_count = max(counts)
+            print(f"   {row['name']:15} → {len(counts)} features, {non_zero} non-zero, max={max_count}")
+        else:
+            print(f"   {row['name']:15} → N/A")
-    # Test 2: Different fingerprint parameters
-    print("\n2. Testing different fingerprint parameters...")
+    # Test 2: Different parameters
+    print("\n2. Testing with different parameters (radius=3, n_bits=1024)...")
-    # Test with counts enabled
-    fp_counts_df = compute_morgan_fingerprints(test_df.copy(), radius=3, n_bits=256, counts=True)
+    fp_df_custom = compute_morgan_fingerprints(test_df.copy(), radius=3, n_bits=1024)
-    print("   With count simulation (256 bits * 4):")
-    for _, row in fp_counts_df.iterrows():
+    for _, row in fp_df_custom.iterrows():
         fp = row.get("fingerprint", "N/A")
-        fp_len = len(fp) if fp != "N/A" else 0
-        print(f"   {row['name']:15} → {fp_len} bits")
+        if pd.notna(fp):
+            counts = [int(x) for x in fp.split(",")]
+            non_zero = sum(1 for c in counts if c > 0)
+            print(f"   {row['name']:15} → {len(counts)} features, {non_zero} non-zero")
+        else:
+            print(f"   {row['name']:15} → N/A")
     # Test 3: Edge cases
     print("\n3. Testing edge cases...")
     # Invalid SMILES
     invalid_df = pd.DataFrame({"SMILES": ["INVALID", ""]})
-    try:
-        fp_invalid = compute_morgan_fingerprints(invalid_df.copy())
-        print(f"   ✓ Invalid SMILES handled: {len(fp_invalid)} valid molecules")
-    except Exception as e:
-        print(f"   ✓ Invalid SMILES properly raised error: {type(e).__name__}")
+    fp_invalid = compute_morgan_fingerprints(invalid_df.copy())
+    print(f"   ✓ Invalid SMILES handled: {len(fp_invalid)} rows returned")
     # Test with pre-existing molecule column
     mol_df = test_df.copy()
@@ -135,4 +160,16 @@ if __name__ == "__main__":
     fp_with_mol = compute_morgan_fingerprints(mol_df)
     print(f"   ✓ Pre-existing molecule column handled: {len(fp_with_mol)} fingerprints generated")
+    # Test 4: Verify count values are reasonable
+    print("\n4. Verifying count distribution...")
+    all_counts = []
+    for _, row in fp_df.iterrows():
+        fp = row.get("fingerprint", "N/A")
+        if pd.notna(fp):
+            counts = [int(x) for x in fp.split(",")]
+            all_counts.extend([c for c in counts if c > 0])
+    if all_counts:
+        print(f"   Non-zero counts: min={min(all_counts)}, max={max(all_counts)}, mean={np.mean(all_counts):.2f}")
     print("\n✅ All fingerprint tests completed!")

workbench/utils/meta_model_simulator.py CHANGED Viewed

@@ -61,6 +61,13 @@ class MetaModelSimulator:
             df["abs_residual"] = df["residual"].abs()
             self._dfs[name] = df
+        # Find common rows across all models
+        id_sets = {name: set(df[self.id_column]) for name, df in self._dfs.items()}
+        common_ids = set.intersection(*id_sets.values())
+        sizes = ", ".join(f"{name}: {len(ids)}" for name, ids in id_sets.items())
+        log.info(f"Row counts before alignment: {sizes} -> common: {len(common_ids)}")
+        self._dfs = {name: df[df[self.id_column].isin(common_ids)] for name, df in self._dfs.items()}
         # Align DataFrames by sorting on id column
         self._dfs = {name: df.sort_values(self.id_column).reset_index(drop=True) for name, df in self._dfs.items()}
         log.info(f"Loaded {len(self._dfs)} models, {len(list(self._dfs.values())[0])} samples each")
@@ -372,13 +379,13 @@ class MetaModelSimulator:
         return weight_df
     def ensemble_failure_analysis(self) -> dict:
-        """Compare ensemble vs best overall model (not per-row oracle).
+        """Compare best ensemble strategy vs best individual model.
         Returns:
             Dict with comparison statistics
         """
         print("\n" + "=" * 60)
-        print("ENSEMBLE VS BEST MODEL COMPARISON")
+        print("BEST ENSEMBLE VS BEST MODEL COMPARISON")
         print("=" * 60)
         model_names = list(self._dfs.keys())
@@ -393,35 +400,55 @@ class MetaModelSimulator:
             combined[f"{name}_abs_err"] = df["abs_residual"].values
         pred_cols = [f"{name}_pred" for name in model_names]
+        conf_cols = [f"{name}_conf" for name in model_names]
+        pred_arr = combined[pred_cols].values
+        conf_arr = combined[conf_cols].values
-        # Calculate ensemble prediction (inverse-MAE weighted)
         mae_scores = {name: self._dfs[name]["abs_residual"].mean() for name in model_names}
         inv_mae_weights = np.array([1.0 / mae_scores[name] for name in model_names])
         inv_mae_weights = inv_mae_weights / inv_mae_weights.sum()
-        pred_arr = combined[pred_cols].values
-        combined["ensemble_pred"] = (pred_arr * inv_mae_weights).sum(axis=1)
-        combined["ensemble_abs_err"] = (combined["ensemble_pred"] - combined["target"]).abs()
-        # Find best overall model (lowest MAE)
+        # Compute all ensemble strategies (true ensembles that combine multiple models)
+        ensemble_strategies = {}
+        ensemble_strategies["Simple Mean"] = combined[pred_cols].mean(axis=1)
+        conf_sum = conf_arr.sum(axis=1, keepdims=True) + 1e-8
+        ensemble_strategies["Confidence-Weighted"] = (pred_arr * (conf_arr / conf_sum)).sum(axis=1)
+        ensemble_strategies["Inverse-MAE Weighted"] = (pred_arr * inv_mae_weights).sum(axis=1)
+        scaled_conf = conf_arr * inv_mae_weights
+        scaled_conf_sum = scaled_conf.sum(axis=1, keepdims=True) + 1e-8
+        ensemble_strategies["Scaled Conf-Weighted"] = (pred_arr * (scaled_conf / scaled_conf_sum)).sum(axis=1)
+        worst_model = max(mae_scores, key=mae_scores.get)
+        remaining = [n for n in model_names if n != worst_model]
+        remaining_cols = [f"{n}_pred" for n in remaining]
+        # Only add Drop Worst if it still combines multiple models
+        if len(remaining) > 1:
+            ensemble_strategies[f"Drop Worst ({worst_model})"] = combined[remaining_cols].mean(axis=1)
+        # Find best individual model
         best_model = min(mae_scores, key=mae_scores.get)
         combined["best_model_abs_err"] = combined[f"{best_model}_abs_err"]
+        best_model_mae = mae_scores[best_model]
-        # Compare ensemble vs best model
+        # Find best true ensemble strategy
+        strategy_maes = {name: (preds - combined["target"]).abs().mean() for name, preds in ensemble_strategies.items()}
+        best_strategy = min(strategy_maes, key=strategy_maes.get)
+        combined["ensemble_pred"] = ensemble_strategies[best_strategy]
+        combined["ensemble_abs_err"] = (combined["ensemble_pred"] - combined["target"]).abs()
+        ensemble_mae = strategy_maes[best_strategy]
+        # Compare
         combined["ensemble_better"] = combined["ensemble_abs_err"] < combined["best_model_abs_err"]
         n_better = combined["ensemble_better"].sum()
         n_total = len(combined)
-        ensemble_mae = combined["ensemble_abs_err"].mean()
-        best_model_mae = mae_scores[best_model]
         print(f"\nBest individual model: {best_model} (MAE={best_model_mae:.4f})")
-        print(f"Ensemble MAE: {ensemble_mae:.4f}")
+        print(f"Best ensemble strategy: {best_strategy} (MAE={ensemble_mae:.4f})")
         if ensemble_mae < best_model_mae:
             improvement = (best_model_mae - ensemble_mae) / best_model_mae * 100
             print(f"Ensemble improves over best model by {improvement:.1f}%")
         else:
             degradation = (ensemble_mae - best_model_mae) / best_model_mae * 100
-            print(f"Ensemble is worse than best model by {degradation:.1f}%")
+            print(f"No ensemble benefit: best single model outperforms all ensemble strategies by {degradation:.1f}%")
         print("\nPer-row comparison:")
         print(f"  Ensemble wins: {n_better}/{n_total} ({100*n_better/n_total:.1f}%)")
@@ -443,6 +470,7 @@ class MetaModelSimulator:
         return {
             "ensemble_mae": ensemble_mae,
+            "best_strategy": best_strategy,
             "best_model": best_model,
             "best_model_mae": best_model_mae,
             "ensemble_win_rate": n_better / n_total,

workbench/utils/shap_utils.py CHANGED Viewed

@@ -9,6 +9,7 @@ from typing import Optional, List, Tuple, Dict, Union
 from workbench.utils.xgboost_model_utils import xgboost_model_from_s3
 from workbench.utils.model_utils import load_category_mappings_from_s3
 from workbench.utils.pandas_utils import convert_categorical_types
+from workbench.model_script_utils.model_script_utils import decompress_features
 # Set up the log
 log = logging.getLogger("workbench")
@@ -111,61 +112,6 @@ def shap_values_data(
         return result_df, feature_df
-def decompress_features(
-    df: pd.DataFrame, features: List[str], compressed_features: List[str]
-) -> Tuple[pd.DataFrame, List[str]]:
-    """Prepare features for the XGBoost model
-    Args:
-        df (pd.DataFrame): The features DataFrame
-        features (List[str]): Full list of feature names
-        compressed_features (List[str]): List of feature names to decompress (bitstrings)
-    Returns:
-        pd.DataFrame: DataFrame with the decompressed features
-        List[str]: Updated list of feature names after decompression
-    Raises:
-        ValueError: If any missing values are found in the specified features
-    """
-    # Check for any missing values in the required features
-    missing_counts = df[features].isna().sum()
-    if missing_counts.any():
-        missing_features = missing_counts[missing_counts > 0]
-        print(
-            f"WARNING: Found missing values in features: {missing_features.to_dict()}. "
-            "WARNING: You might want to remove/replace all NaN values before processing."
-        )
-    # Decompress the specified compressed features
-    decompressed_features = features
-    for feature in compressed_features:
-        if (feature not in df.columns) or (feature not in features):
-            print(f"Feature '{feature}' not in the features list, skipping decompression.")
-            continue
-        # Remove the feature from the list of features to avoid duplication
-        decompressed_features.remove(feature)
-        # Handle all compressed features as bitstrings
-        bit_matrix = np.array([list(bitstring) for bitstring in df[feature]], dtype=np.uint8)
-        prefix = feature[:3]
-        # Create all new columns at once - avoids fragmentation
-        new_col_names = [f"{prefix}_{i}" for i in range(bit_matrix.shape[1])]
-        new_df = pd.DataFrame(bit_matrix, columns=new_col_names, index=df.index)
-        # Add to features list
-        decompressed_features.extend(new_col_names)
-        # Drop original column and concatenate new ones
-        df = df.drop(columns=[feature])
-        df = pd.concat([df, new_df], axis=1)
-    return df, decompressed_features
 def _calculate_shap_values(workbench_model, sample_df: pd.DataFrame = None):
     """
     Internal function to calculate SHAP values for Workbench Models.

{workbench-0.8.217.dist-info → workbench-0.8.219.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: workbench
-Version: 0.8.217
+Version: 0.8.219
 Summary: Workbench: A Dashboard and Python API for creating and deploying AWS SageMaker Model Pipelines
 Author-email: SuperCowPowers LLC <support@supercowpowers.com>
 License: MIT License

{workbench-0.8.217.dist-info → workbench-0.8.219.dist-info}/RECORD RENAMED Viewed

@@ -26,7 +26,7 @@ workbench/algorithms/sql/__init__.py,sha256=TbOZQwCfx6Tjc3pCCLCiM31wpCX26j5MBNQ6
 workbench/algorithms/sql/column_stats.py,sha256=IwgddvPVITdAvUgxaK_px2IVSEX-jA-8cPIVFoVkbN8,5943
 workbench/algorithms/sql/correlations.py,sha256=0DMgAkzIdR0cApQ_5vs4CxPSRz1qItcAToz7GAOFqzI,3935
 workbench/algorithms/sql/descriptive_stats.py,sha256=VxSR5zQi8NmAWrJvOCO3wrmgVHYrwhenSy5Gl0AOqoo,4075
-workbench/algorithms/sql/outliers.py,sha256=2hoilOk0gaz9pwrnGEBY2y7M-UqFED3KO_mFm_0_3ac,10645
+workbench/algorithms/sql/outliers.py,sha256=LbOYaE3bNR4x-aEIrA2KAX3Aq07ZowRgrW9buCeKisQ,10663
 workbench/algorithms/sql/sample_rows.py,sha256=SRYoGb24QP_iPvOoW9bGZ95yZuseYDtyoNhilfoLu34,2688
 workbench/algorithms/sql/value_counts.py,sha256=F-rZoLTTKv1cHYl2_tDlvWDjczy76uLTr3EMHa-WrEk,3340
 workbench/api/__init__.py,sha256=1JAQKD82biia4h07BRA9ytjxuJUYQqgHvkf8FwpnlVQ,1195
@@ -58,8 +58,8 @@ workbench/core/artifacts/data_capture_core.py,sha256=q8f79rRTYiZ7T4IQRWXl8ZvPpcv
 workbench/core/artifacts/data_source_abstract.py,sha256=5IRCzFVK-17cd4NXPMRfx99vQAmQ0WHE5jcm5RfsVTg,10619
 workbench/core/artifacts/data_source_factory.py,sha256=YL_tA5fsgubbB3dPF6T4tO0rGgz-6oo3ge4i_YXVC-M,2380
 workbench/core/artifacts/df_store_core.py,sha256=AueNr_JvuLLu_ByE7cb3u-isH9u0Q7cMP-UCgCX-Ctg,3536
-workbench/core/artifacts/endpoint_core.py,sha256=VAEDP4eLl_Obwcb_Tg4tqDsAti4kXa0UzhGON57M4Hs,54071
-workbench/core/artifacts/feature_set_core.py,sha256=EAvFbkNWDaiTnQvsugNJXAt1sgbzOs4tCvSycPB7Ry8,39332
+workbench/core/artifacts/endpoint_core.py,sha256=fLOxgwNmbsrOpKafXN8zLCzazKdpJQZr2zanKJ14KRc,54057
+workbench/core/artifacts/feature_set_core.py,sha256=zR6gia7V6JeUHaKYzQRGQwF1j0Z5DBcM8oqGPS1pox4,39344
 workbench/core/artifacts/model_core.py,sha256=wPkpdRlxnAXMqsDtJGPotGFO146Hm7NCfYbImHwZo9c,52343
 workbench/core/artifacts/monitor_core.py,sha256=M307yz7tEzOEHgv-LmtVy9jKjSbM98fHW3ckmNYrwlU,27897
 workbench/core/artifacts/parameter_store_core.py,sha256=sHvjJMuybM4qdcKhH-Sx6Ur6Yn5ozA3QHwtidsnhyG8,2867
@@ -125,22 +125,21 @@ workbench/core/views/training_view.py,sha256=7HwhbQhDBhT3Zo_gssS-b4eueJ0h9nqqT8Y
 workbench/core/views/view.py,sha256=DvmEA1xdvL980GET_cnbmHzqSy6IhlNaZcoQnVTtYis,13534
 workbench/core/views/view_utils.py,sha256=CwOlpqXpumCr6REi-ey7Qjz5_tpg-s4oWHmlOVu8POQ,12270
 workbench/core/views/storage/mdq_view.py,sha256=qf_ep1KwaXOIfO930laEwNIiCYP7VNOqjE3VdHfopRE,5195
-workbench/model_script_utils/model_script_utils.py,sha256=9Js8bc57osH-kkreWPq09VtWeIQ7buzMqutgV63u0UI,11479
+workbench/model_script_utils/model_script_utils.py,sha256=rGPdjxmQUPcZNXK_8nKYQWb7IPQ5ietne7UMYRQZpMo,11841
 workbench/model_script_utils/pytorch_utils.py,sha256=vr8ybK45U0H8Jhjb5qx6xbJNozdcl7bVqubknDwh6U0,13704
 workbench/model_script_utils/uq_harness.py,sha256=70b7dI9Wls03ff6zm2TpfKIsboVBKsj7P7fNzmMe6c0,10305
 workbench/model_scripts/script_generation.py,sha256=w3L2VYGnGUvBtd01BWzH38DuHKULtYsc_Xz_3_Eavvo,8258
-workbench/model_scripts/chemprop/chemprop.template,sha256=Vh2DW3E6ryrvM3VizZ2JVlBeFTu247guB_3cPcF2Hgw,29386
-workbench/model_scripts/chemprop/generated_model_script.py,sha256=7h0sVMIlfe53XHUCRdKyVFUoq6lKOJBcxD15BmZhC8c,29408
-workbench/model_scripts/chemprop/model_script_utils.py,sha256=9Js8bc57osH-kkreWPq09VtWeIQ7buzMqutgV63u0UI,11479
+workbench/model_scripts/chemprop/chemprop.template,sha256=EF1otxEJGPKm_iZibbWBUvjWhQY0G8jnPK8d_A7OnS8,29416
+workbench/model_scripts/chemprop/generated_model_script.py,sha256=4WqqqkUlUSf1EEgzZk-OAFSwoif5drjwitEko0rlI38,30093
+workbench/model_scripts/chemprop/model_script_utils.py,sha256=rGPdjxmQUPcZNXK_8nKYQWb7IPQ5ietne7UMYRQZpMo,11841
 workbench/model_scripts/chemprop/requirements.txt,sha256=2IBHZZNYqhX9Ed7AmRVgN06tO3EHeBbN2EM8-tjWZhs,216
 workbench/model_scripts/custom_models/chem_info/Readme.md,sha256=mH1lxJ4Pb7F5nBnVXaiuxpi8zS_yjUw_LBJepVKXhlA,574
-workbench/model_scripts/custom_models/chem_info/fingerprints.py,sha256=XHRxoP6eV5z_k7w6BmfwpPO8rr6PZIF7KW9jwGjnj7o,5449
+workbench/model_scripts/custom_models/chem_info/fingerprints.py,sha256=ECDzjZs4wSx3ZvAQipMl2NEqI2isCWHLYBv7mp0NVgk,6939
 workbench/model_scripts/custom_models/chem_info/mol_descriptors.py,sha256=c8gkHZ-8s3HJaW9zN9pnYGK7YVW8Y0xFqQ1G_ysrF2Y,18789
 workbench/model_scripts/custom_models/chem_info/mol_standardize.py,sha256=qPLCdVMSXMOWN-01O1isg2zq7eQyFAI0SNatHkRq1uw,17524
 workbench/model_scripts/custom_models/chem_info/molecular_descriptors.py,sha256=xljMjdfh4Idi4v1Afq1zZxvF1SDa7pDOLSAhvGBEj88,2891
 workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py,sha256=LqVh_AHObo0uxHt_uNmeemScTLjM2j9C3I_QFJXdmUI,3232
 workbench/model_scripts/custom_models/chem_info/requirements.txt,sha256=7HBUzvNiM8lOir-UfQabXYlUp3gxdGJ42u18EuSMGjc,39
-workbench/model_scripts/custom_models/meta_endpoints/example.py,sha256=hzOAuLhIGB8vei-555ruNxpsE1GhuByHGjGB0zw8GSs,1726
 workbench/model_scripts/custom_models/network_security/Readme.md,sha256=Z2gtiu0hLHvEJ1x-_oFq3qJZcsK81sceBAGAGltpqQ8,222
 workbench/model_scripts/custom_models/proximity/Readme.md,sha256=RlMFAJZgAT2mCgDk-UwR_R0Y_NbCqeI5-8DUsxsbpWQ,289
 workbench/model_scripts/custom_models/proximity/feature_space_proximity.py,sha256=FYsQd5Lf5CrSWi-1Dcs_NVFN86izifxkWk1-EOvEV54,6950
@@ -151,7 +150,6 @@ workbench/model_scripts/custom_models/uq_models/bayesian_ridge.template,sha256=c
 workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template,sha256=449Enh4-7RrMrxt1oS_SHJHGV8yYcFlWHsLrCVTFQGI,13778
 workbench/model_scripts/custom_models/uq_models/feature_space_proximity.py,sha256=FYsQd5Lf5CrSWi-1Dcs_NVFN86izifxkWk1-EOvEV54,6950
 workbench/model_scripts/custom_models/uq_models/gaussian_process.template,sha256=3nMlCi8nEbc4N-MQTzjfIcljfDQkUmWeLBfmd18m5fg,6632
-workbench/model_scripts/custom_models/uq_models/meta_uq.template,sha256=wLilHll9Hzwyo-y9Vsqx7PjzdMca4xkUt3Ed1zcgOBE,14412
 workbench/model_scripts/custom_models/uq_models/ngboost.template,sha256=_ukYcsL4pnWvFV1oA89_wfVpxWbvoEx6MGwKxc38kSI,8512
 workbench/model_scripts/custom_models/uq_models/requirements.txt,sha256=fw7T7t_YJAXK3T6Ysbesxh_Agx_tv0oYx72cEBTqRDY,98
 workbench/model_scripts/custom_script_example/custom_model_script.py,sha256=T8aydawgRVAdSlDimoWpXxG2YuWWQkbcjBVjAeSG2_0,6408
@@ -160,8 +158,8 @@ workbench/model_scripts/ensemble_xgb/ensemble_xgb.template,sha256=lMEx0IkawcpTI5
 workbench/model_scripts/ensemble_xgb/requirements.txt,sha256=jWlGc7HH7vqyukTm38LN4EyDi8jDUPEay4n45z-30uc,104
 workbench/model_scripts/meta_model/generated_model_script.py,sha256=ncPrHd9-R8l_98vAiuTUJ92C9PKpEgAtpIrmd7TuqSQ,8341
 workbench/model_scripts/meta_model/meta_model.template,sha256=viz-AKVq3YRwOUBt8-rUO1TwdEPFzyP7nnifqcIJurw,8244
-workbench/model_scripts/pytorch_model/generated_model_script.py,sha256=qw5lqFhkRjMGjTWC9SH1lgGETwqEXEmgzk_cdEs2ZFw,24598
-workbench/model_scripts/pytorch_model/model_script_utils.py,sha256=9Js8bc57osH-kkreWPq09VtWeIQ7buzMqutgV63u0UI,11479
+workbench/model_scripts/pytorch_model/generated_model_script.py,sha256=ma2JOiCxCZfq94jvIsDoCa2VQBwKf-trj9QMpaa0VEQ,21108
+workbench/model_scripts/pytorch_model/model_script_utils.py,sha256=rGPdjxmQUPcZNXK_8nKYQWb7IPQ5ietne7UMYRQZpMo,11841
 workbench/model_scripts/pytorch_model/pytorch.template,sha256=KOH7nhq_3u0pHmjGymY5aycF0_ZlwLQ16qmDKUQcE9k,21091
 workbench/model_scripts/pytorch_model/pytorch_utils.py,sha256=vr8ybK45U0H8Jhjb5qx6xbJNozdcl7bVqubknDwh6U0,13704
 workbench/model_scripts/pytorch_model/requirements.txt,sha256=ES7YehHEL4E5oV8FScHm3oNQmkMI4ODgbC1fSbaY7T4,183
@@ -170,8 +168,8 @@ workbench/model_scripts/scikit_learn/generated_model_script.py,sha256=xhQIglpAgP
 workbench/model_scripts/scikit_learn/requirements.txt,sha256=aVvwiJ3LgBUhM_PyFlb2gHXu_kpGPho3ANBzlOkfcvs,107
 workbench/model_scripts/scikit_learn/scikit_learn.template,sha256=QQvqx-eX9ZTbYmyupq6R6vIQwosmsmY_MRBPaHyfjdk,12586
 workbench/model_scripts/uq_models/generated_model_script.py,sha256=kgcIWghY6eazcBWS77MukhQUyYFmfJcS8SQ8RmjM82I,9006
-workbench/model_scripts/xgb_model/generated_model_script.py,sha256=bIue0u9S1y1rBCcTVZ0Aa0PO8-XBphJmAgm0e8ov90k,18585
-workbench/model_scripts/xgb_model/model_script_utils.py,sha256=9Js8bc57osH-kkreWPq09VtWeIQ7buzMqutgV63u0UI,11479
+workbench/model_scripts/xgb_model/generated_model_script.py,sha256=VkgU9jXvWzTjPsq9JoIRJGKYJE-aj3-z7gTOc5f6hH4,18376
+workbench/model_scripts/xgb_model/model_script_utils.py,sha256=rGPdjxmQUPcZNXK_8nKYQWb7IPQ5ietne7UMYRQZpMo,11841
 workbench/model_scripts/xgb_model/requirements.txt,sha256=jWlGc7HH7vqyukTm38LN4EyDi8jDUPEay4n45z-30uc,104
 workbench/model_scripts/xgb_model/uq_harness.py,sha256=70b7dI9Wls03ff6zm2TpfKIsboVBKsj7P7fNzmMe6c0,10305
 workbench/model_scripts/xgb_model/xgb_model.template,sha256=w4-yx82yws-_esObZQIq13S8WKXXnZxqe86ZuyWoP5w,18367
@@ -183,6 +181,7 @@ workbench/scripts/check_double_bond_stereo.py,sha256=p5hnL54Weq77ES0HCELq9JeoM-P
 workbench/scripts/endpoint_test.py,sha256=RV52DZZTOD_ou-ywZjaxQ2_wqnSJqvlnHQZbvf4iM6I,5339
 workbench/scripts/glue_launcher.py,sha256=bIKQvfGxpAhzbeNvTnHfRW_5kQhY-169_868ZnCejJk,10692
 workbench/scripts/lambda_test.py,sha256=SLAPIXeGQn82neQ6-Hif3VS3LWLwT0-dGw8yWw2aXRQ,2077
+workbench/scripts/meta_model_sim.py,sha256=6iGpInA-nH6DSjk0z63fcoL8P7icqnZmKLE5Sqyrh7E,1026
 workbench/scripts/ml_pipeline_batch.py,sha256=1T5JnLlUJR7bwAGBLHmLPOuj1xFRqVIQX8PsuDhHy8o,4907
 workbench/scripts/ml_pipeline_sqs.py,sha256=5c8qX-SoV4htOUcSXk4OzD7BQskCnaA7cLMiF4Et24c,6666
 workbench/scripts/monitor_cloud_watch.py,sha256=s7MY4bsHts0nup9G0lWESCvgJZ9Mw1Eo-c8aKRgLjMw,9235
@@ -236,7 +235,7 @@ workbench/utils/lambda_utils.py,sha256=7GhGRPyXn9o-toWb9HBGSnI8-DhK9YRkwhCSk_mNK
 workbench/utils/license_manager.py,sha256=lNE9zZIglmX3zqqCKBdN1xqTgHCEZgJDxavF6pdG7fc,6825
 workbench/utils/log_utils.py,sha256=7n1NJXO_jUX82e6LWAQug6oPo3wiPDBYsqk9gsYab_A,3167
 workbench/utils/markdown_utils.py,sha256=4lEqzgG4EVmLcvvKKNUwNxVCySLQKJTJmWDiaDroI1w,8306
-workbench/utils/meta_model_simulator.py,sha256=E8O8z4sbSDhKd22_nbuFLUcPNbPGzMacznBdL2H4trU,18755
+workbench/utils/meta_model_simulator.py,sha256=fMKZoLi_VEJohNVvbZSMvZWNdUbIpGlB6Bg6mJQW33s,20630
 workbench/utils/metrics_utils.py,sha256=iAoKrAM4iRX8wFSjSJhfNKbbW1BqB3eI_U3wvdhUdhE,9496
 workbench/utils/model_utils.py,sha256=jiybuv6gGE-p2i2JEQcyAY-ffigtuzZFNvp_rHKCi3A,19284
 workbench/utils/monitor_utils.py,sha256=kVaJ7BgUXs3VPMFYfLC03wkIV4Dq-pEhoXS0wkJFxCc,7858
@@ -250,7 +249,7 @@ workbench/utils/pytorch_utils.py,sha256=RoltE9-fOX2UixzaEmnxN6oJtBEKQ9Jklu0LRzYK
 workbench/utils/redis_cache.py,sha256=39LFSWmOlNNcah02D3sBnmibc-DPeKC3SNq71K4HaB4,12893
 workbench/utils/repl_utils.py,sha256=rWOMv2HiEIp8ZL6Ps6DlwiJlGr-pOhv9OZQhm3aR-1A,4668
 workbench/utils/s3_utils.py,sha256=Xme_o_cftC_jWnw6R9YKS6-6C11zaCBAoQDlY3dZb5o,7337
-workbench/utils/shap_utils.py,sha256=dtjSIwSyvYSaQjjvIp5A9LGS7pr-5Vt907rvVKOrqNY,12651
+workbench/utils/shap_utils.py,sha256=FeFNRH5mJTbuHlpHyFJgjHcU5BU7UthJL1Gb5Gl8_zw,10590
 workbench/utils/shapley_values.py,sha256=3DvQz4HIPnxW42idgtuQ5vtzU-oF4_lToaWzLRjU-E4,3673
 workbench/utils/symbols.py,sha256=PioF1yAQyOabw7kLg8nhvaZBPFe7ABkpfpPPE0qz_2k,1265
 workbench/utils/test_data_generator.py,sha256=gqRXL7IUKG4wVfO1onflY3wg7vLkgx402_Zy3iqY7NU,11921
@@ -264,7 +263,7 @@ workbench/utils/workbench_sqs.py,sha256=RwM80z7YWwdtMaCKh7KWF8v38f7eBRU7kyC7ZhTR
 workbench/utils/xgboost_local_crossfold.py,sha256=GY61F6-avQDiteIb1LAgvkHvAKvLg6H85xBDvfgCVDM,10718
 workbench/utils/xgboost_model_utils.py,sha256=qEnB1viCIXMYLW0LJuyCioKMSilbmKTMuppaxBZqwhc,12967
 workbench/utils/chem_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-workbench/utils/chem_utils/fingerprints.py,sha256=XHRxoP6eV5z_k7w6BmfwpPO8rr6PZIF7KW9jwGjnj7o,5449
+workbench/utils/chem_utils/fingerprints.py,sha256=ECDzjZs4wSx3ZvAQipMl2NEqI2isCWHLYBv7mp0NVgk,6939
 workbench/utils/chem_utils/misc.py,sha256=Nevf8_opu-uIPrv_1_0ubuFVVo2_fGUkMoLAHB3XAeo,7372
 workbench/utils/chem_utils/mol_descriptors.py,sha256=c8gkHZ-8s3HJaW9zN9pnYGK7YVW8Y0xFqQ1G_ysrF2Y,18789
 workbench/utils/chem_utils/mol_standardize.py,sha256=qPLCdVMSXMOWN-01O1isg2zq7eQyFAI0SNatHkRq1uw,17524
@@ -307,9 +306,9 @@ workbench/web_interface/page_views/main_page.py,sha256=X4-KyGTKLAdxR-Zk2niuLJB2Y
 workbench/web_interface/page_views/models_page_view.py,sha256=M0bdC7bAzLyIaE2jviY12FF4abdMFZmg6sFuOY_LaGI,2650
 workbench/web_interface/page_views/page_view.py,sha256=Gh6YnpOGlUejx-bHZAf5pzqoQ1H1R0OSwOpGhOBO06w,455
 workbench/web_interface/page_views/pipelines_page_view.py,sha256=v2pxrIbsHBcYiblfius3JK766NZ7ciD2yPx0t3E5IJo,2656
-workbench-0.8.217.dist-info/licenses/LICENSE,sha256=RTBoTMeEwTgEhS-n8vgQ-VUo5qig0PWVd8xFPKU6Lck,1080
-workbench-0.8.217.dist-info/METADATA,sha256=7aIfI1eWuhBsh22ymfAboL7MK6l3z-8FlA1AXQ5xzMg,10525
-workbench-0.8.217.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-workbench-0.8.217.dist-info/entry_points.txt,sha256=viJ6aXRj63sBIs7avj4kFbCO2J2E7jTCrIk8U1SIc3I,511
-workbench-0.8.217.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
-workbench-0.8.217.dist-info/RECORD,,
+workbench-0.8.219.dist-info/licenses/LICENSE,sha256=RTBoTMeEwTgEhS-n8vgQ-VUo5qig0PWVd8xFPKU6Lck,1080
+workbench-0.8.219.dist-info/METADATA,sha256=1Sks6KYtjjg1QqIH6p4Q8d9Dazr3EfuQdcvv0wgsXgE,10525
+workbench-0.8.219.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+workbench-0.8.219.dist-info/entry_points.txt,sha256=t_9tY7iYku9z96qFZZtUgbWDh_nHtehXxLPLBSpAzeM,566
+workbench-0.8.219.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
+workbench-0.8.219.dist-info/RECORD,,

{workbench-0.8.217.dist-info → workbench-0.8.219.dist-info}/entry_points.txt RENAMED Viewed

@@ -3,6 +3,7 @@ cloud_watch = workbench.scripts.monitor_cloud_watch:main
 endpoint_test = workbench.scripts.endpoint_test:main
 glue_launcher = workbench.scripts.glue_launcher:main
 lambda_test = workbench.scripts.lambda_test:main
+meta_model_sim = workbench.scripts.meta_model_sim:main
 ml_pipeline_batch = workbench.scripts.ml_pipeline_batch:main
 ml_pipeline_sqs = workbench.scripts.ml_pipeline_sqs:main
 training_test = workbench.scripts.training_test:main

workbench/model_scripts/custom_models/meta_endpoints/example.py DELETED Viewed

@@ -1,53 +0,0 @@
-# Model: Meta Endpoint Example
-# This script is a template for creating a custom meta endpoint in AWS Workbench.
-from io import StringIO
-import pandas as pd
-import json
-# Workbench Bridges imports
-try:
-    from workbench_bridges.endpoints.fast_inference import fast_inference
-except ImportError:
-    print("workbench_bridges not found, this is fine for training...")
-# Not Used: We need to define this function for SageMaker
-def model_fn(model_dir):
-    return None
-def input_fn(input_data, content_type):
-    """Parse input data and return a DataFrame."""
-    if not input_data:
-        raise ValueError("Empty input data is not supported!")
-    # Decode bytes to string if necessary
-    if isinstance(input_data, bytes):
-        input_data = input_data.decode("utf-8")
-    # Support CSV and JSON input formats
-    if "text/csv" in content_type:
-        return pd.read_csv(StringIO(input_data))
-    elif "application/json" in content_type:
-        return pd.DataFrame(json.loads(input_data))  # Assumes JSON array of records
-    else:
-        raise ValueError(f"{content_type} not supported!")
-def output_fn(output_df, accept_type):
-    """Supports both CSV and JSON output formats."""
-    if "text/csv" in accept_type:
-        csv_output = output_df.to_csv(index=False)
-        return csv_output, "text/csv"
-    elif "application/json" in accept_type:
-        return output_df.to_json(orient="records"), "application/json"  # JSON array of records (NaNs -> null)
-    else:
-        raise RuntimeError(f"{accept_type} accept type is not supported by this script.")
-# Prediction function
-def predict_fn(df, model):
-    # Call inference on an endpoint
-    df = fast_inference("abalone-regression", df)
-    return df

workbench 0.8.217__py3-none-any.whl → 0.8.219__py3-none-any.whl

workbench 0.8.217py3-none-any.whl → 0.8.219py3-none-any.whl