PyPI - workbench - Versions diffs - 0.8.236__py3-none-any.whl → 0.8.243__py3-none-any.whl - Mend

workbench 0.8.236py3-none-any.whl → 0.8.243py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

workbench/api/model.py CHANGED Viewed

@@ -44,7 +44,7 @@ class Model(ModelCore):
         serverless: bool = True,
         mem_size: int = 2048,
         max_concurrency: int = 5,
-        instance: str = "ml.t2.medium",
+        instance: str = None,
         data_capture: bool = False,
     ) -> Endpoint:
         """Create an Endpoint from the Model.
@@ -55,7 +55,7 @@ class Model(ModelCore):
             serverless (bool): Set the endpoint to be serverless (default: True)
             mem_size (int): The memory size for the Endpoint in MB (default: 2048)
             max_concurrency (int): The maximum concurrency for the Endpoint (default: 5)
-            instance (str): The instance type to use for Realtime(serverless=False) Endpoints (default: "ml.t2.medium")
+            instance (str): The instance type for Realtime Endpoints (default: None = auto-select based on model)
             data_capture (bool): Enable data capture for the Endpoint (default: False)
         Returns:

workbench/core/artifacts/athena_source.py CHANGED Viewed

@@ -258,7 +258,7 @@ class AthenaSource(DataSourceAbstract):
                 # Wait for the query to complete
                 wr.athena.wait_query(query_execution_id=query_execution_id, boto3_session=self.boto3_session)
-                self.log.debug(f"Statement executed successfully: {query_execution_id}")
+                self.log.debug(f"Query executed successfully: {query_execution_id}")
                 break  # If successful, exit the retry loop
             except wr.exceptions.QueryFailed as e:
                 if "AlreadyExistsException" in str(e):
@@ -271,11 +271,13 @@ class AthenaSource(DataSourceAbstract):
                         time.sleep(retry_delay)
                     else:
                         if not silence_errors:
-                            self.log.critical(f"Failed to execute statement after {max_retries} attempts: {e}")
+                            self.log.critical(f"Failed to execute query after {max_retries} attempts: {query}")
+                            self.log.critical(f"Error: {e}")
                         raise
                 else:
                     if not silence_errors:
-                        self.log.critical(f"Failed to execute statement: {e}")
+                        self.log.critical(f"Failed to execute query: {query}")
+                        self.log.critical(f"Error: {e}")
                     raise
     def s3_storage_location(self) -> str:

workbench/core/artifacts/endpoint_core.py CHANGED Viewed

@@ -1138,7 +1138,7 @@ class EndpointCore(Artifact):
             cls.log.error("Error deleting endpoint.")
             raise e
-        time.sleep(5)  # Final sleep for AWS to fully register deletions
+        time.sleep(10)  # Final sleep for AWS to fully register deletions
     @classmethod
     def delete_endpoint_models(cls, endpoint_name: str):

workbench/core/cloud_platform/aws/aws_meta.py CHANGED Viewed

@@ -245,7 +245,8 @@ class AWSMeta:
                     "Model Group": model_group_name,
                     "Health": health_tags,
                     "Owner": aws_tags.get("workbench_owner", "-"),
-                    "Model Type": aws_tags.get("workbench_model_type", "-"),
+                    "Type": aws_tags.get("workbench_model_type", "-"),
+                    "Framework": aws_tags.get("workbench_model_framework", "-"),
                     "Created": created,
                     "Ver": model_details.get("ModelPackageVersion", "-"),
                     "Input": aws_tags.get("workbench_input", "-"),

workbench/core/transforms/features_to_model/features_to_model.py CHANGED Viewed

@@ -247,9 +247,9 @@ class FeaturesToModel(Transform):
         # Create a Sagemaker Model with our script
         image = ModelImages.get_image_uri(self.sm_session.boto_region_name, self.training_image)
-        # Use GPU instance for ChemProp/PyTorch, CPU for others
+        # Use GPU instance for ChemProp/PyTorch
         if self.model_framework in [ModelFramework.CHEMPROP, ModelFramework.PYTORCH]:
-            train_instance_type = "ml.g6.xlarge"  # NVIDIA L4 GPU, ~$0.80/hr
+            train_instance_type = "ml.g6.xlarge"  # NVIDIA L4 GPU, ~$1.00/hr
             self.log.important(f"Using GPU instance {train_instance_type} for {self.model_framework.value}")
         else:
             train_instance_type = "ml.m5.xlarge"

workbench/core/transforms/model_to_endpoint/model_to_endpoint.py CHANGED Viewed

@@ -26,13 +26,13 @@ class ModelToEndpoint(Transform):
         ```
     """
-    def __init__(self, model_name: str, endpoint_name: str, serverless: bool = True, instance: str = "ml.t2.medium"):
+    def __init__(self, model_name: str, endpoint_name: str, serverless: bool = True, instance: str = None):
         """ModelToEndpoint Initialization
         Args:
             model_name(str): The Name of the input Model
             endpoint_name(str): The Name of the output Endpoint
             serverless(bool): Deploy the Endpoint in serverless mode (default: True)
-            instance(str): The instance type to use for the Endpoint (default: "ml.t2.medium")
+            instance(str): The instance type for Realtime Endpoints (default: None = auto-select)
         """
         # Make sure the endpoint_name is a valid name
         Artifact.is_name_valid(endpoint_name, delimiter="-", lower_case=False)
@@ -42,7 +42,7 @@ class ModelToEndpoint(Transform):
         # Set up all my instance attributes
         self.serverless = serverless
-        self.instance_type = "serverless" if serverless else instance
+        self.instance = instance
         self.input_type = TransformInput.MODEL
         self.output_type = TransformOutput.ENDPOINT
@@ -100,24 +100,36 @@ class ModelToEndpoint(Transform):
         # Get the metadata/tags to push into AWS
         aws_tags = self.get_aws_tags()
+        # Check the model framework for resource requirements
+        from workbench.api import ModelFramework
+        self.log.info(f"Model Framework: {workbench_model.model_framework}")
+        needs_more_resources = workbench_model.model_framework in [ModelFramework.PYTORCH, ModelFramework.CHEMPROP]
         # Is this a serverless deployment?
         serverless_config = None
         if self.serverless:
             # For PyTorch or ChemProp we need at least 4GB of memory
-            from workbench.api import ModelFramework
-            self.log.info(f"Model Framework: {workbench_model.model_framework}")
-            if workbench_model.model_framework in [ModelFramework.PYTORCH, ModelFramework.CHEMPROP]:
-                if mem_size < 4096:
-                    self.log.important(
-                        f"{workbench_model.model_framework} needs at least 4GB of memory (setting to 4GB)"
-                    )
-                    mem_size = 4096
+            if needs_more_resources and mem_size < 4096:
+                self.log.important(f"{workbench_model.model_framework} needs at least 4GB of memory (setting to 4GB)")
+                mem_size = 4096
             serverless_config = ServerlessInferenceConfig(
                 memory_size_in_mb=mem_size,
                 max_concurrency=max_concurrency,
             )
+            instance_type = "serverless"
             self.log.important(f"Serverless Config: Memory={mem_size}MB, MaxConcurrency={max_concurrency}")
+        else:
+            # For realtime endpoints, use explicit instance if provided, otherwise auto-select
+            if self.instance:
+                instance_type = self.instance
+                self.log.important(f"Realtime Endpoint: Using specified instance type: {instance_type}")
+            elif needs_more_resources:
+                instance_type = "ml.c7i.large"
+                self.log.important(f"{workbench_model.model_framework} needs more resources (using {instance_type})")
+            else:
+                instance_type = "ml.t2.medium"
+                self.log.important(f"Realtime Endpoint: Instance Type={instance_type}")
         # Configure data capture if requested (and not serverless)
         data_capture_config = None
@@ -141,7 +153,7 @@ class ModelToEndpoint(Transform):
         try:
             model_package.deploy(
                 initial_instance_count=1,
-                instance_type=self.instance_type,
+                instance_type=instance_type,
                 serverless_inference_config=serverless_config,
                 endpoint_name=self.output_name,
                 serializer=CSVSerializer(),
@@ -158,7 +170,7 @@ class ModelToEndpoint(Transform):
                 # Retry the deploy
                 model_package.deploy(
                     initial_instance_count=1,
-                    instance_type=self.instance_type,
+                    instance_type=instance_type,
                     serverless_inference_config=serverless_config,
                     endpoint_name=self.output_name,
                     serializer=CSVSerializer(),

workbench/model_script_utils/pytorch_utils.py CHANGED Viewed

@@ -266,8 +266,8 @@ def train_model(
         train_dataset = TensorDataset(train_x_cont, dummy_cat, train_y)
         val_dataset = TensorDataset(val_x_cont, dummy_val_cat, val_y)
-    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
-    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
+    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
+    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)
     # Loss and optimizer
     if task == "classification":

workbench/model_scripts/chemprop/chemprop.template CHANGED Viewed

@@ -34,7 +34,7 @@ DEFAULT_HYPERPARAMETERS = {
     "n_folds": 5,
     "max_epochs": 400,
     "patience": 50,
-    "batch_size": 32,
+    "batch_size": 64,
     # Message Passing (ignored when using foundation model)
     "hidden_dim": 700,
     "depth": 6,
@@ -220,7 +220,9 @@ def predict_fn(df: pd.DataFrame, model_dict: dict) -> pd.DataFrame:
         return df
     dataset = data.MoleculeDataset(datapoints)
-    dataloader = data.build_dataloader(dataset, shuffle=False, batch_size=64)
+    # Note: Use dataset length as batch_size to prevent ChemProp's build_dataloader from
+    # dropping single-sample batches (its drop_last logic triggers when len(dataset) % batch_size == 1)
+    dataloader = data.build_dataloader(dataset, shuffle=False, batch_size=len(dataset))
     # Ensemble predictions using direct PyTorch inference (no Lightning Trainer)
     all_preds = []
@@ -588,8 +590,9 @@ if __name__ == "__main__":
             val_dataset.normalize_targets(target_scaler)
             output_transform = nn.UnscaleTransform.from_standard_scaler(target_scaler)
-        train_loader = data.build_dataloader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=3)
-        val_loader = data.build_dataloader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=3)
+        num_workers = min(os.cpu_count() or 4, 8)  # Scale with CPUs, cap at 8
+        train_loader = data.build_dataloader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, persistent_workers=num_workers > 0, pin_memory=True, prefetch_factor=2)
+        val_loader = data.build_dataloader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, persistent_workers=num_workers > 0, pin_memory=True, prefetch_factor=2)
         # Build model
         pl.seed_everything(hyperparameters["seed"] + fold_idx)
@@ -615,7 +618,7 @@ if __name__ == "__main__":
                 callbacks.append(pl.callbacks.ModelCheckpoint(
                     dirpath=args.model_dir, filename=f"best_{fold_idx}", monitor="val_loss", mode="min", save_top_k=1
                 ))
-            return pl.Trainer(accelerator="auto", max_epochs=max_epochs, logger=False, enable_progress_bar=True, callbacks=callbacks)
+            return pl.Trainer(accelerator="auto", max_epochs=max_epochs, precision="16-mixed", logger=False, enable_progress_bar=True, callbacks=callbacks)
         if use_two_phase:
             # Phase 1: Freeze MPNN, train FFN only

workbench/model_scripts/chemprop/generated_model_script.py CHANGED Viewed

@@ -45,10 +45,10 @@ DEFAULT_HYPERPARAMETERS = {
     # Loss function for regression (mae, mse)
     "criterion": "mae",
     # Split strategy: "random", "scaffold", or "butina"
-    # - random: Standard random split
+    # - random: Standard random split (default)
     # - scaffold: Bemis-Murcko scaffold-based grouping
     # - butina: Morgan fingerprint clustering (recommended for ADMET)
-    "split_strategy": "butina",
+    "split_strategy": "random",
     "butina_cutoff": 0.4,  # Tanimoto distance cutoff for Butina clustering
     # Random seed
     "seed": 42,
@@ -65,11 +65,11 @@ DEFAULT_HYPERPARAMETERS = {
 # Template parameters (filled in by Workbench)
 TEMPLATE_PARAMS = {
     "model_type": "uq_regressor",
-    "targets": ['logd'],
+    "targets": ['udm_asy_res_extraction_percent'],
     "feature_list": ['smiles'],
-    "id_column": "molecule_name",
-    "model_metrics_s3_path": "s3://sandbox-sageworks-artifacts/models/logd-chemprop-split-butina/training",
-    "hyperparameters": {'split_strategy': 'butina'},
+    "id_column": "udm_mol_bat_id",
+    "model_metrics_s3_path": "s3://idb-prod-sageworks-artifacts/models/hlm-extraction-reg-chemprop-1-260128/training",
+    "hyperparameters": {},
 }
@@ -220,7 +220,9 @@ def predict_fn(df: pd.DataFrame, model_dict: dict) -> pd.DataFrame:
         return df
     dataset = data.MoleculeDataset(datapoints)
-    dataloader = data.build_dataloader(dataset, shuffle=False, batch_size=64)
+    # Note: Use dataset length as batch_size to prevent ChemProp's build_dataloader from
+    # dropping single-sample batches (its drop_last logic triggers when len(dataset) % batch_size == 1)
+    dataloader = data.build_dataloader(dataset, shuffle=False, batch_size=len(dataset))
     # Ensemble predictions using direct PyTorch inference (no Lightning Trainer)
     all_preds = []
@@ -752,6 +754,7 @@ if __name__ == "__main__":
     output_columns += [f"{t}_pred" for t in target_columns] + [f"{t}_pred_std" for t in target_columns]
     output_columns += ["prediction", "prediction_std", "confidence"]
     output_columns += [c for c in df_val.columns if c.endswith("_proba")]
     output_columns = [c for c in output_columns if c in df_val.columns]
     wr.s3.to_csv(df_val[output_columns], f"{model_metrics_s3_path}/validation_predictions.csv", index=False)

workbench/model_scripts/pytorch_model/generated_model_script.py CHANGED Viewed

@@ -54,10 +54,10 @@ DEFAULT_HYPERPARAMETERS = {
     # Loss function for regression (L1Loss=MAE, MSELoss=MSE, HuberLoss, SmoothL1Loss)
     "loss": "L1Loss",
     # Split strategy: "random", "scaffold", or "butina"
-    # - random: Standard random split
+    # - random: Standard random split (default)
     # - scaffold: Bemis-Murcko scaffold-based grouping (requires 'smiles' column in data)
     # - butina: Morgan fingerprint clustering (requires 'smiles' column, recommended for ADMET)
-    "split_strategy": "butina",
+    "split_strategy": "random",
     "butina_cutoff": 0.4,  # Tanimoto distance cutoff for Butina clustering
     # Random seed
     "seed": 42,
@@ -65,13 +65,13 @@ DEFAULT_HYPERPARAMETERS = {
 # Template parameters (filled in by Workbench)
 TEMPLATE_PARAMS = {
-    "model_type": "uq_regressor",
-    "target": "logd",
+    "model_type": "classifier",
+    "target": "class",
     "features": ['chi2v', 'fr_sulfone', 'chi1v', 'bcut2d_logplow', 'fr_piperzine', 'kappa3', 'smr_vsa1', 'slogp_vsa5', 'fr_ketone_topliss', 'fr_sulfonamd', 'fr_imine', 'fr_benzene', 'fr_ester', 'chi2n', 'labuteasa', 'peoe_vsa2', 'smr_vsa6', 'bcut2d_chglo', 'fr_sh', 'peoe_vsa1', 'fr_allylic_oxid', 'chi4n', 'fr_ar_oh', 'fr_nh0', 'fr_term_acetylene', 'slogp_vsa7', 'slogp_vsa4', 'estate_vsa1', 'vsa_estate4', 'numbridgeheadatoms', 'numheterocycles', 'fr_ketone', 'fr_morpholine', 'fr_guanido', 'estate_vsa2', 'numheteroatoms', 'fr_nitro_arom_nonortho', 'fr_piperdine', 'nocount', 'numspiroatoms', 'fr_aniline', 'fr_thiophene', 'slogp_vsa10', 'fr_amide', 'slogp_vsa2', 'fr_epoxide', 'vsa_estate7', 'fr_ar_coo', 'fr_imidazole', 'fr_nitrile', 'fr_oxazole', 'numsaturatedrings', 'fr_pyridine', 'fr_hoccn', 'fr_ndealkylation1', 'numaliphaticheterocycles', 'fr_phenol', 'maxpartialcharge', 'vsa_estate5', 'peoe_vsa13', 'minpartialcharge', 'qed', 'fr_al_oh', 'slogp_vsa11', 'chi0n', 'fr_bicyclic', 'peoe_vsa12', 'fpdensitymorgan1', 'fr_oxime', 'molwt', 'fr_dihydropyridine', 'smr_vsa5', 'peoe_vsa5', 'fr_nitro', 'hallkieralpha', 'heavyatommolwt', 'fr_alkyl_halide', 'peoe_vsa8', 'fr_nhpyrrole', 'fr_isocyan', 'bcut2d_chghi', 'fr_lactam', 'peoe_vsa11', 'smr_vsa9', 'tpsa', 'chi4v', 'slogp_vsa1', 'phi', 'bcut2d_logphi', 'avgipc', 'estate_vsa11', 'fr_coo', 'bcut2d_mwhi', 'numunspecifiedatomstereocenters', 'vsa_estate10', 'estate_vsa8', 'numvalenceelectrons', 'fr_nh2', 'fr_lactone', 'vsa_estate1', 'estate_vsa4', 'numatomstereocenters', 'vsa_estate8', 'fr_para_hydroxylation', 'peoe_vsa3', 'fr_thiazole', 'peoe_vsa10', 'fr_ndealkylation2', 'slogp_vsa12', 'peoe_vsa9', 'maxestateindex', 'fr_quatn', 'smr_vsa7', 'minestateindex', 'numaromaticheterocycles', 'numrotatablebonds', 'fr_ar_nh', 'fr_ether', 'exactmolwt', 'fr_phenol_noorthohbond', 'slogp_vsa3', 'fr_ar_n', 'sps', 'fr_c_o_nocoo', 'bertzct', 'peoe_vsa7', 'slogp_vsa8', 'numradicalelectrons', 'molmr', 'fr_tetrazole', 'numsaturatedcarbocycles', 'bcut2d_mrhi', 'kappa1', 'numamidebonds', 'fpdensitymorgan2', 'smr_vsa8', 'chi1n', 'estate_vsa6', 'fr_barbitur', 'fr_diazo', 'kappa2', 'chi0', 'bcut2d_mrlow', 'balabanj', 'peoe_vsa4', 'numhacceptors', 'fr_sulfide', 'chi3n', 'smr_vsa2', 'fr_al_oh_notert', 'fr_benzodiazepine', 'fr_phos_ester', 'fr_aldehyde', 'fr_coo2', 'estate_vsa5', 'fr_prisulfonamd', 'numaromaticcarbocycles', 'fr_unbrch_alkane', 'fr_urea', 'fr_nitroso', 'smr_vsa10', 'fr_c_s', 'smr_vsa3', 'fr_methoxy', 'maxabspartialcharge', 'slogp_vsa9', 'heavyatomcount', 'fr_azide', 'chi3v', 'smr_vsa4', 'mollogp', 'chi0v', 'fr_aryl_methyl', 'fr_nh1', 'fpdensitymorgan3', 'fr_furan', 'fr_hdrzine', 'fr_arn', 'numaromaticrings', 'vsa_estate3', 'fr_azo', 'fr_halogen', 'estate_vsa9', 'fr_hdrzone', 'numhdonors', 'fr_alkyl_carbamate', 'fr_isothiocyan', 'minabspartialcharge', 'fr_al_coo', 'ringcount', 'chi1', 'estate_vsa7', 'fr_nitro_arom', 'vsa_estate9', 'minabsestateindex', 'maxabsestateindex', 'vsa_estate6', 'estate_vsa10', 'estate_vsa3', 'fr_n_o', 'fr_amidine', 'fr_thiocyan', 'fr_phos_acid', 'fr_c_o', 'fr_imide', 'numaliphaticrings', 'peoe_vsa6', 'vsa_estate2', 'nhohcount', 'numsaturatedheterocycles', 'slogp_vsa6', 'peoe_vsa14', 'fractioncsp3', 'bcut2d_mwlow', 'numaliphaticcarbocycles', 'fr_priamide', 'nacid', 'nbase', 'naromatom', 'narombond', 'sz', 'sm', 'sv', 'sse', 'spe', 'sare', 'sp', 'si', 'mz', 'mm', 'mv', 'mse', 'mpe', 'mare', 'mp', 'mi', 'xch_3d', 'xch_4d', 'xch_5d', 'xch_6d', 'xch_7d', 'xch_3dv', 'xch_4dv', 'xch_5dv', 'xch_6dv', 'xch_7dv', 'xc_3d', 'xc_4d', 'xc_5d', 'xc_6d', 'xc_3dv', 'xc_4dv', 'xc_5dv', 'xc_6dv', 'xpc_4d', 'xpc_5d', 'xpc_6d', 'xpc_4dv', 'xpc_5dv', 'xpc_6dv', 'xp_0d', 'xp_1d', 'xp_2d', 'xp_3d', 'xp_4d', 'xp_5d', 'xp_6d', 'xp_7d', 'axp_0d', 'axp_1d', 'axp_2d', 'axp_3d', 'axp_4d', 'axp_5d', 'axp_6d', 'axp_7d', 'xp_0dv', 'xp_1dv', 'xp_2dv', 'xp_3dv', 'xp_4dv', 'xp_5dv', 'xp_6dv', 'xp_7dv', 'axp_0dv', 'axp_1dv', 'axp_2dv', 'axp_3dv', 'axp_4dv', 'axp_5dv', 'axp_6dv', 'axp_7dv', 'c1sp1', 'c2sp1', 'c1sp2', 'c2sp2', 'c3sp2', 'c1sp3', 'c2sp3', 'c3sp3', 'c4sp3', 'hybratio', 'fcsp3', 'num_stereocenters', 'num_unspecified_stereocenters', 'num_defined_stereocenters', 'num_r_centers', 'num_s_centers', 'num_stereobonds', 'num_e_bonds', 'num_z_bonds', 'stereo_complexity', 'frac_defined_stereo'],
-    "id_column": "molecule_name",
-    "compressed_features": ['fingerprint'],
-    "model_metrics_s3_path": "s3://sandbox-sageworks-artifacts/models/logd-pytorch-split-butina/training",
-    "hyperparameters": {'split_strategy': 'butina'},
+    "id_column": "udm_mol_bat_id",
+    "compressed_features": [],
+    "model_metrics_s3_path": "s3://ideaya-sageworks-bucket/models/pka-b1-value-class-pytorch-1-dt/training",
+    "hyperparameters": {},
 }

workbench/model_scripts/pytorch_model/pytorch.template CHANGED Viewed

@@ -45,7 +45,7 @@ DEFAULT_HYPERPARAMETERS = {
     "n_folds": 5,
     "max_epochs": 200,
     "early_stopping_patience": 30,
-    "batch_size": 128,
+    "batch_size": 64,
     # Model architecture (larger capacity - ensemble provides regularization)
     "layers": "512-256-128",
     "learning_rate": 1e-3,

workbench/model_scripts/pytorch_model/pytorch_utils.py CHANGED Viewed

@@ -266,8 +266,8 @@ def train_model(
         train_dataset = TensorDataset(train_x_cont, dummy_cat, train_y)
         val_dataset = TensorDataset(val_x_cont, dummy_val_cat, val_y)
-    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
-    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
+    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
+    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)
     # Loss and optimizer
     if task == "classification":

workbench/scripts/ml_pipeline_batch.py CHANGED Viewed

@@ -44,7 +44,14 @@ def _log_cloudwatch_link(job: dict, message_prefix: str = "View logs") -> None:
         log.info("Check AWS Batch console for logs")
-def run_batch_job(script_path: str, size: str = "small") -> int:
+def run_batch_job(
+    script_path: str,
+    size: str = "small",
+    realtime: bool = False,
+    dt: bool = False,
+    promote: bool = False,
+    test_promote: bool = False,
+) -> int:
     """
     Submit and monitor an AWS Batch job for ML pipeline execution.
@@ -56,6 +63,10 @@ def run_batch_job(script_path: str, size: str = "small") -> int:
           - small: 2 vCPU, 4GB RAM for lightweight processing
           - medium: 4 vCPU, 8GB RAM for standard ML workloads
           - large: 8 vCPU, 16GB RAM for heavy training/inference
+        realtime: If True, sets serverless=False for real-time processing (default: False)
+        dt: If True, sets DT=True in environment (default: False)
+        promote: If True, sets PROMOTE=True in environment (default: False)
+        test_promote: If True, sets TEST_PROMOTE=True in environment (default: False)
     Returns:
         Exit code (0 for success/disconnected, non-zero for failure)
@@ -81,6 +92,10 @@ def run_batch_job(script_path: str, size: str = "small") -> int:
             "environment": [
                 {"name": "ML_PIPELINE_S3_PATH", "value": s3_path},
                 {"name": "WORKBENCH_BUCKET", "value": workbench_bucket},
+                {"name": "SERVERLESS", "value": "False" if realtime else "True"},
+                {"name": "DT", "value": str(dt)},
+                {"name": "PROMOTE", "value": str(promote)},
+                {"name": "TEST_PROMOTE", "value": str(test_promote)},
             ]
         },
     )
@@ -124,9 +139,39 @@ def main():
     """CLI entry point for running ML pipelines on AWS Batch."""
     parser = argparse.ArgumentParser(description="Run ML pipeline script on AWS Batch")
     parser.add_argument("script_file", help="Local path to ML pipeline script")
+    parser.add_argument(
+        "--size", default="small", choices=["small", "medium", "large"], help="Job size tier (default: small)"
+    )
+    parser.add_argument(
+        "--realtime",
+        action="store_true",
+        help="Create realtime endpoints (default is serverless)",
+    )
+    parser.add_argument(
+        "--dt",
+        action="store_true",
+        help="Set DT=True (models and endpoints will have '-dt' suffix)",
+    )
+    parser.add_argument(
+        "--promote",
+        action="store_true",
+        help="Set Promote=True (models and endpoints will use promoted naming)",
+    )
+    parser.add_argument(
+        "--test-promote",
+        action="store_true",
+        help="Set TEST_PROMOTE=True (creates test endpoint with '-test' suffix)",
+    )
     args = parser.parse_args()
     try:
-        exit_code = run_batch_job(args.script_file)
+        exit_code = run_batch_job(
+            args.script_file,
+            size=args.size,
+            realtime=args.realtime,
+            dt=args.dt,
+            promote=args.promote,
+            test_promote=args.test_promote,
+        )
         exit(exit_code)
     except Exception as e:
         log.error(f"Error: {e}")

workbench/scripts/ml_pipeline_launcher.py ADDED Viewed

@@ -0,0 +1,410 @@
+"""Launch ML pipelines via SQS for testing.
+Run this from a directory containing pipeline subdirectories (e.g., ml_pipelines/).
+Usage:
+    ml_pipeline_launcher --dt                    # Launch 1 random pipeline group (all scripts in a directory)
+    ml_pipeline_launcher --dt -n 3               # Launch 3 random pipeline groups
+    ml_pipeline_launcher --dt --all              # Launch ALL pipelines
+    ml_pipeline_launcher --dt caco2              # Launch pipelines matching 'caco2'
+    ml_pipeline_launcher --dt caco2 ppb          # Launch pipelines matching 'caco2' or 'ppb'
+    ml_pipeline_launcher --promote --all         # Promote ALL pipelines
+    ml_pipeline_launcher --test-promote --all    # Test-promote ALL pipelines
+    ml_pipeline_launcher --dt --dry-run          # Show what would be launched without launching
+"""
+import argparse
+import ast
+import random
+import re
+import subprocess
+import time
+from pathlib import Path
+def parse_workbench_batch(script_path: Path) -> dict | None:
+    """Parse WORKBENCH_BATCH config from a script file."""
+    content = script_path.read_text()
+    match = re.search(r"WORKBENCH_BATCH\s*=\s*(\{[^}]+\})", content, re.DOTALL)
+    if match:
+        try:
+            return ast.literal_eval(match.group(1))
+        except (ValueError, SyntaxError):
+            return None
+    return None
+def build_dependency_graph(configs: dict[Path, dict]) -> dict[str, str]:
+    """Build a mapping from each output to its root producer.
+    For a chain like A -> B -> C (where B depends on A, C depends on B),
+    this returns {A: A, B: A, C: A} so all are in the same message group.
+    """
+    # Build output -> input mapping (what does each output depend on?)
+    output_to_input = {}
+    for config in configs.values():
+        if not config:
+            continue
+        outputs = config.get("outputs", [])
+        inputs = config.get("inputs", [])
+        for output in outputs:
+            output_to_input[output] = inputs[0] if inputs else None
+    # Walk chain to find root
+    def find_root(output: str, visited: set = None) -> str:
+        if visited is None:
+            visited = set()
+        if output in visited:
+            return output
+        visited.add(output)
+        parent = output_to_input.get(output)
+        if parent is None:
+            return output
+        return find_root(parent, visited)
+    return {output: find_root(output) for output in output_to_input}
+def get_group_id(config: dict | None, root_map: dict[str, str]) -> str | None:
+    """Get the root group_id for a pipeline based on its config and root_map."""
+    if not config:
+        return None
+    outputs = config.get("outputs", [])
+    inputs = config.get("inputs", [])
+    # Check inputs first (this script depends on something)
+    if inputs and inputs[0] in root_map:
+        return root_map[inputs[0]]
+    # Check outputs (this script produces something)
+    if outputs and outputs[0] in root_map:
+        return root_map[outputs[0]]
+    return None
+def sort_by_dependencies(pipelines: list[Path]) -> tuple[list[Path], dict[Path, dict], dict[str, str]]:
+    """Sort pipelines by dependency chains. Returns (sorted_list, configs, root_map)."""
+    # Parse all configs
+    configs = {}
+    for pipeline in pipelines:
+        configs[pipeline] = parse_workbench_batch(pipeline)
+    # Build root map for group_id resolution
+    root_map = build_dependency_graph(configs)
+    # Build output -> pipeline mapping
+    output_to_pipeline = {}
+    for pipeline, config in configs.items():
+        if config and config.get("outputs"):
+            for output in config["outputs"]:
+                output_to_pipeline[output] = pipeline
+    # Build chains by walking from root producers
+    sorted_pipelines = []
+    used = set()
+    for pipeline in sorted(pipelines):
+        config = configs.get(pipeline)
+        # Skip if already used or has inputs (not a root)
+        if pipeline in used:
+            continue
+        if config and config.get("inputs"):
+            continue
+        # Walk the chain from this root
+        chain = [pipeline]
+        used.add(pipeline)
+        current = pipeline
+        while True:
+            current_config = configs.get(current)
+            if not current_config or not current_config.get("outputs"):
+                break
+            current_output = current_config["outputs"][0]
+            # Find pipeline that consumes this output
+            next_pipeline = None
+            for p, c in configs.items():
+                if p in used or p not in pipelines:
+                    continue
+                if c and c.get("inputs") and current_output in c["inputs"]:
+                    next_pipeline = p
+                    break
+            if next_pipeline:
+                chain.append(next_pipeline)
+                used.add(next_pipeline)
+                current = next_pipeline
+            else:
+                break
+        sorted_pipelines.extend(chain)
+    # Add any remaining pipelines not in chains
+    for pipeline in sorted(pipelines):
+        if pipeline not in used:
+            sorted_pipelines.append(pipeline)
+    return sorted_pipelines, configs, root_map
+def format_dependency_chains(pipelines: list[Path], configs: dict[Path, dict]) -> list[str]:
+    """Format pipelines as dependency chains for display."""
+    # Build output -> pipeline mapping
+    output_to_pipeline = {}
+    for pipeline, config in configs.items():
+        if config and config.get("outputs"):
+            for output in config["outputs"]:
+                output_to_pipeline[output] = pipeline
+    # Build chains by walking from root producers
+    chains = []
+    used = set()
+    for pipeline in pipelines:
+        config = configs.get(pipeline)
+        # Skip if already part of a chain or has inputs (not a root)
+        if pipeline in used:
+            continue
+        if config and config.get("inputs"):
+            continue
+        # Start a new chain from this root producer (or standalone)
+        chain = [pipeline]
+        used.add(pipeline)
+        # Walk the chain: find who consumes our output
+        current = pipeline
+        while True:
+            current_config = configs.get(current)
+            if not current_config or not current_config.get("outputs"):
+                break
+            current_output = current_config["outputs"][0]
+            # Find a pipeline that takes this output as input
+            next_pipeline = None
+            for p, c in configs.items():
+                if p in used or p not in pipelines:
+                    continue
+                if c and c.get("inputs") and current_output in c["inputs"]:
+                    next_pipeline = p
+                    break
+            if next_pipeline:
+                chain.append(next_pipeline)
+                used.add(next_pipeline)
+                current = next_pipeline
+            else:
+                break
+        chains.append(chain)
+    # Add any remaining pipelines not in chains (shouldn't happen but just in case)
+    for pipeline in pipelines:
+        if pipeline not in used:
+            chains.append([pipeline])
+    # Format chains as strings
+    lines = []
+    for chain in chains:
+        names = [p.stem for p in chain]
+        lines.append("   " + " --> ".join(names))
+    return lines
+def get_all_pipelines() -> list[Path]:
+    """Get all ML pipeline scripts from subdirectories of current working directory."""
+    cwd = Path.cwd()
+    # Find all .py files in subdirectories (not in cwd itself)
+    pipelines = []
+    for subdir in cwd.iterdir():
+        if subdir.is_dir():
+            pipelines.extend(subdir.rglob("*.py"))
+    return pipelines
+def get_pipeline_groups(pipelines: list[Path]) -> dict[Path, list[Path]]:
+    """Group pipelines by their parent directory (leaf directories)."""
+    groups = {}
+    for pipeline in pipelines:
+        parent = pipeline.parent
+        groups.setdefault(parent, []).append(pipeline)
+    return groups
+def select_random_groups(pipelines: list[Path], num_groups: int) -> list[Path]:
+    """Select pipelines from n random leaf directories."""
+    groups = get_pipeline_groups(pipelines)
+    if not groups:
+        return []
+    # Select up to num_groups random directories
+    dirs = list(groups.keys())
+    selected_dirs = random.sample(dirs, min(num_groups, len(dirs)))
+    # Return all pipelines from those directories
+    selected = []
+    for d in selected_dirs:
+        selected.extend(groups[d])
+    return selected
+def filter_pipelines_by_patterns(pipelines: list[Path], patterns: list[str]) -> list[Path]:
+    """Filter pipelines by substring patterns matching the basename."""
+    if not patterns:
+        return pipelines
+    matched = []
+    for pipeline in pipelines:
+        basename = pipeline.stem.lower()
+        if any(pattern.lower() in basename for pattern in patterns):
+            matched.append(pipeline)
+    return matched
+def main():
+    parser = argparse.ArgumentParser(description="Launch ML pipelines via SQS for testing")
+    parser.add_argument(
+        "patterns",
+        nargs="*",
+        help="Substring patterns to filter pipelines by basename (e.g., 'caco2' 'ppb')",
+    )
+    parser.add_argument(
+        "-n",
+        "--num-groups",
+        type=int,
+        default=1,
+        help="Number of random pipeline groups to launch (default: 1, ignored if --all or patterns specified)",
+    )
+    parser.add_argument(
+        "--all",
+        action="store_true",
+        help="Launch ALL pipelines (ignores -n)",
+    )
+    parser.add_argument(
+        "--realtime",
+        action="store_true",
+        help="Create realtime endpoints (default is serverless)",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Show what would be launched without actually launching",
+    )
+    # Mode flags (mutually exclusive)
+    mode_group = parser.add_mutually_exclusive_group(required=True)
+    mode_group.add_argument(
+        "--dt",
+        action="store_true",
+        help="Launch with DT=True (dynamic training mode)",
+    )
+    mode_group.add_argument(
+        "--promote",
+        action="store_true",
+        help="Launch with PROMOTE=True (promotion mode)",
+    )
+    mode_group.add_argument(
+        "--test-promote",
+        action="store_true",
+        help="Launch with TEST_PROMOTE=True (test promotion mode)",
+    )
+    args = parser.parse_args()
+    # Get all pipelines from subdirectories of current working directory
+    all_pipelines = get_all_pipelines()
+    if not all_pipelines:
+        print(f"No pipeline scripts found in subdirectories of {Path.cwd()}")
+        exit(1)
+    # Determine which pipelines to run
+    if args.patterns:
+        # Filter by patterns
+        selected_pipelines = filter_pipelines_by_patterns(all_pipelines, args.patterns)
+        if not selected_pipelines:
+            print(f"No pipelines matching patterns: {args.patterns}")
+            exit(1)
+        selection_mode = f"matching {args.patterns}"
+    elif args.all:
+        # Run all pipelines
+        selected_pipelines = all_pipelines
+        selection_mode = "ALL"
+    else:
+        # Random group selection
+        selected_pipelines = select_random_groups(all_pipelines, args.num_groups)
+        if not selected_pipelines:
+            print("No pipeline groups found")
+            exit(1)
+        # Get the directory names for display
+        groups = get_pipeline_groups(selected_pipelines)
+        group_names = [d.name for d in groups.keys()]
+        selection_mode = f"RANDOM {args.num_groups} group(s): {group_names}"
+    # Sort by dependencies (producers before consumers)
+    selected_pipelines, configs, root_map = sort_by_dependencies(selected_pipelines)
+    # Determine mode for display and CLI flag
+    if args.dt:
+        mode_name = "DT (Dynamic Training)"
+        mode_flag = "--dt"
+    elif args.promote:
+        mode_name = "PROMOTE"
+        mode_flag = "--promote"
+    else:
+        mode_name = "TEST_PROMOTE"
+        mode_flag = "--test-promote"
+    print(f"\n{'=' * 60}")
+    print(f"{'DRY RUN - ' if args.dry_run else ''}LAUNCHING {len(selected_pipelines)} PIPELINES")
+    print(f"{'=' * 60}")
+    print(f"Source: {Path.cwd()}")
+    print(f"Selection: {selection_mode}")
+    print(f"Mode: {mode_name}")
+    print(f"Endpoint: {'Realtime' if args.realtime else 'Serverless'}")
+    print("\nPipeline Chains:")
+    for line in format_dependency_chains(selected_pipelines, configs):
+        print(line)
+    print()
+    # Dry run - just show what would be launched
+    if args.dry_run:
+        print("Dry run complete. No pipelines were launched.\n")
+        return
+    # Countdown before launching
+    print("Launching in ", end="", flush=True)
+    for i in range(10, 0, -1):
+        print(f"{i}...", end="", flush=True)
+        time.sleep(1)
+    print(" GO!\n")
+    # Launch each pipeline using the CLI
+    for i, pipeline in enumerate(selected_pipelines, 1):
+        print(f"\n{'─' * 60}")
+        print(f"Launching pipeline {i}/{len(selected_pipelines)}: {pipeline.name}")
+        print(f"{'─' * 60}")
+        # Build the command
+        cmd = ["ml_pipeline_sqs", str(pipeline), mode_flag]
+        if args.realtime:
+            cmd.append("--realtime")
+        # Pass root group_id for dependency chain ordering
+        group_id = get_group_id(configs.get(pipeline), root_map)
+        if group_id:
+            cmd.extend(["--group-id", group_id])
+        print(f"Running: {' '.join(cmd)}\n")
+        result = subprocess.run(cmd)
+        if result.returncode != 0:
+            print(f"Failed to launch {pipeline.name} (exit code: {result.returncode})")
+    print(f"\n{'=' * 60}")
+    print(f"FINISHED LAUNCHING {len(selected_pipelines)} PIPELINES")
+    print(f"{'=' * 60}\n")
+if __name__ == "__main__":
+    main()

workbench/scripts/ml_pipeline_sqs.py CHANGED Viewed

@@ -71,6 +71,8 @@ def submit_to_sqs(
     realtime: bool = False,
     dt: bool = False,
     promote: bool = False,
+    test_promote: bool = False,
+    group_id: str | None = None,
 ) -> None:
     """
     Upload script to S3 and submit message to SQS queue for processing.
@@ -81,6 +83,8 @@ def submit_to_sqs(
         realtime: If True, sets serverless=False for real-time processing (default: False)
         dt: If True, sets DT=True in environment (default: False)
         promote: If True, sets PROMOTE=True in environment (default: False)
+        test_promote: If True, sets TEST_PROMOTE=True in environment (default: False)
+        group_id: Optional MessageGroupId override for dependency chains (default: derived from script)
     Raises:
         ValueError: If size is invalid or script file not found
@@ -99,7 +103,8 @@ def submit_to_sqs(
     # Read script content and parse WORKBENCH_BATCH config
     script_content = script_file.read_text()
     batch_config = parse_workbench_batch(script_content)
-    group_id = get_message_group_id(batch_config)
+    if group_id is None:
+        group_id = get_message_group_id(batch_config)
     outputs = (batch_config or {}).get("outputs", [])
     inputs = (batch_config or {}).get("inputs", [])
@@ -108,6 +113,7 @@ def submit_to_sqs(
     print(f"⚡  Mode: {'Real-time' if realtime else 'Serverless'} (serverless={'False' if realtime else 'True'})")
     print(f"🔄  DynamicTraining: {dt}")
     print(f"🆕  Promote: {promote}")
+    print(f"🧪  Test Promote: {test_promote}")
     print(f"🪣  Bucket: {workbench_bucket}")
     if outputs:
         print(f"📤  Outputs: {outputs}")
@@ -174,6 +180,7 @@ def submit_to_sqs(
         "SERVERLESS": "False" if realtime else "True",
         "DT": str(dt),
         "PROMOTE": str(promote),
+        "TEST_PROMOTE": str(test_promote),
     }
     # Send the message to SQS
@@ -200,6 +207,7 @@ def submit_to_sqs(
     print(f"⚡  Mode: {'Real-time' if realtime else 'Serverless'} (SERVERLESS={'False' if realtime else 'True'})")
     print(f"🔄  DynamicTraining: {dt}")
     print(f"🆕  Promote: {promote}")
+    print(f"🧪  Test Promote: {test_promote}")
     if outputs:
         print(f"📤  Outputs: {outputs}")
     if inputs:
@@ -234,7 +242,17 @@ def main():
     parser.add_argument(
         "--promote",
         action="store_true",
-        help="Set Promote=True (models and endpoints will use promoted naming",
+        help="Set Promote=True (models and endpoints will use promoted naming)",
+    )
+    parser.add_argument(
+        "--test-promote",
+        action="store_true",
+        help="Set TEST_PROMOTE=True (creates test endpoint with '-test' suffix)",
+    )
+    parser.add_argument(
+        "--group-id",
+        default=None,
+        help="Override MessageGroupId for SQS (used for dependency chain ordering)",
     )
     args = parser.parse_args()
     try:
@@ -244,6 +262,8 @@ def main():
             realtime=args.realtime,
             dt=args.dt,
             promote=args.promote,
+            test_promote=args.test_promote,
+            group_id=args.group_id,
         )
     except Exception as e:
         print(f"\n❌  ERROR: {e}")

workbench/web_interface/components/plugins/ag_table.py CHANGED Viewed

@@ -96,18 +96,11 @@ class AGTable(PluginInterface):
 if __name__ == "__main__":
     # Run the Unit Test for the Plugin
+    from workbench.api import Meta
     from workbench.web_interface.components.plugin_unit_test import PluginUnitTest
-    # Test data
-    data = {
-        "ID": [f"id_{i}" for i in range(10)],
-        "feat1": [1.0, 1.0, 1.1, 3.0, 4.0, 1.0, 1.0, 1.1, 3.0, 4.0],
-        "feat2": [1.0, 1.0, 1.1, 3.0, 4.0, 1.0, 1.0, 1.1, 3.0, 4.0],
-        "feat3": [0.1, 0.15, 0.2, 0.9, 2.8, 0.25, 0.35, 0.4, 1.6, 2.5],
-        "price": [31, 60, 62, 40, 20, 31, 61, 60, 40, 20],
-        "name": ["A", "B", "C", "D", "E", "F", "G", "H", "I", "Z" * 55],
-    }
-    test_df = pd.DataFrame(data)
+    # Test on model data
+    models_df = Meta().models(details=True)
     # Run the Unit Test on the Plugin
-    PluginUnitTest(AGTable, theme="dark", input_data=test_df, max_height=500).run()
+    PluginUnitTest(AGTable, theme="dark", input_data=models_df, max_height=500).run()

{workbench-0.8.236.dist-info → workbench-0.8.243.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: workbench
-Version: 0.8.236
+Version: 0.8.243
 Summary: Workbench: A Dashboard and Python API for creating and deploying AWS SageMaker Model Pipelines
 Author-email: SuperCowPowers LLC <support@supercowpowers.com>
 License: MIT License
@@ -40,7 +40,7 @@ Requires-Dist: boto3>=1.31.76
 Requires-Dist: botocore>=1.31.76
 Requires-Dist: redis>=5.0.1
 Requires-Dist: numpy>=1.26.4
-Requires-Dist: pandas>=2.2.1
+Requires-Dist: pandas<3.0,>=2.2.1
 Requires-Dist: awswrangler>=3.4.0
 Requires-Dist: sagemaker<3.0,>=2.143
 Requires-Dist: cryptography>=44.0.2

{workbench-0.8.236.dist-info → workbench-0.8.243.dist-info}/RECORD RENAMED Viewed

@@ -40,7 +40,7 @@ workbench/api/feature_set.py,sha256=-21ztp7JDqs7CKF3KtNdPoXppkiDqfb4JVK8xBK9rIY,
 workbench/api/graph_store.py,sha256=LremJyPrQFgsHb7hxsctuCsoxx3p7TKtaY5qALHe6pc,4372
 workbench/api/meta.py,sha256=1JxCpLn4JENiWUJaVjGgDL7WqhIy-s1swUbBzprI-uY,8595
 workbench/api/meta_model.py,sha256=2DpjjBSw60QPMWQ2sTu2492PrFWFMXK8hH9U13gXzi8,11226
-workbench/api/model.py,sha256=uU2sO7qm1wqdVhl7WVzzg79p1Z26Kf5inMhYzgmhzDw,5523
+workbench/api/model.py,sha256=h3TAlKT8X7q6tW6Q134ZTnr1I9au-2d72p1QovlIfd4,5507
 workbench/api/monitor.py,sha256=Cez89Uac7Tzt47FxkjoX-YDGccEhvBcxw3sZFtw4ud8,4506
 workbench/api/parameter_store.py,sha256=_3MmPxKiVy7_OIgCSRlUv9xbk8nuiOWiCtZgT-AxN1k,2574
 workbench/api/pipeline.py,sha256=MSYGrDSXrRB_oQELtAlOwBfxSBTw3REAkHy5XBHau0Y,6261
@@ -54,13 +54,13 @@ workbench/cached/cached_pipeline.py,sha256=QOVnEKu5RbIdlNpJUi-0Ebh0_-C68RigSPwKh
 workbench/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 workbench/core/artifacts/__init__.py,sha256=ukcgbYlI9m99bzwaBNO01K1h0-cQkzsbh_jT_GyQ-LY,1034
 workbench/core/artifacts/artifact.py,sha256=scWUbX2Sk1rxT8VEm_Z7YTxbOzkDASNyqqXB56xLZ2w,17721
-workbench/core/artifacts/athena_source.py,sha256=RNmCe7s6uH4gVHpcdJcL84aSbF5Q1ahJBLLGwHYRXEU,26081
+workbench/core/artifacts/athena_source.py,sha256=w65c3fjbs1dlosoogj-fo0-DznXUifbfM2bsE-QUzRY,26195
 workbench/core/artifacts/cached_artifact_mixin.py,sha256=ngqFLZ4cQx_TFouXZgXZQsv_7W6XCvxVGXXSfzzaft8,3775
 workbench/core/artifacts/data_capture_core.py,sha256=q8f79rRTYiZ7T4IQRWXl8ZvPpcvZyNxYERwvo8o0OQc,14858
 workbench/core/artifacts/data_source_abstract.py,sha256=5IRCzFVK-17cd4NXPMRfx99vQAmQ0WHE5jcm5RfsVTg,10619
 workbench/core/artifacts/data_source_factory.py,sha256=YL_tA5fsgubbB3dPF6T4tO0rGgz-6oo3ge4i_YXVC-M,2380
 workbench/core/artifacts/df_store_core.py,sha256=AueNr_JvuLLu_ByE7cb3u-isH9u0Q7cMP-UCgCX-Ctg,3536
-workbench/core/artifacts/endpoint_core.py,sha256=e9Fs07D2SXvLabaywTndX8R1iyO-WjHoNW4A80UUiSs,55694
+workbench/core/artifacts/endpoint_core.py,sha256=hEjMXag9nsLGu_eOLxPSSqcu1aP_1Vo9DHIGemLutvc,55695
 workbench/core/artifacts/feature_set_core.py,sha256=IjSUpxpj2S611uo5LmnOK-aH3CZhfbC5ztC02PQ5gqE,42128
 workbench/core/artifacts/model_core.py,sha256=wPkpdRlxnAXMqsDtJGPotGFO146Hm7NCfYbImHwZo9c,52343
 workbench/core/artifacts/monitor_core.py,sha256=M307yz7tEzOEHgv-LmtVy9jKjSbM98fHW3ckmNYrwlU,27897
@@ -69,7 +69,7 @@ workbench/core/cloud_platform/cloud_meta.py,sha256=QFEsGfqhaCkw9Jl4PRln-xRaHnt-e
 workbench/core/cloud_platform/aws/README.md,sha256=QT5IQXoUHbIA0qQ2wO6_2P2lYjYQFVYuezc22mWY4i8,97
 workbench/core/cloud_platform/aws/aws_account_clamp.py,sha256=V5iVsoGvSRilARtTdExnt27QptzAcJaW0s3nm2B8-ow,8286
 workbench/core/cloud_platform/aws/aws_graph_store.py,sha256=ytYxQTplUmeWbsPmxyZbf6mO9qyTl60ewlJG8MyfyEY,9414
-workbench/core/cloud_platform/aws/aws_meta.py,sha256=eY9Pn6pl2yAyseACFb2nitR-0vLwG4i8CSEXe8Iaswc,34778
+workbench/core/cloud_platform/aws/aws_meta.py,sha256=BwvQbEJVBW5b3wnw25ndFm33QptgoSMpeFh1Zc9vfmw,34853
 workbench/core/cloud_platform/aws/aws_secrets_manager.py,sha256=TUnddp1gX-OwxJ_oO5ONh7OI4Z2HC_6euGkJ-himCCk,8615
 workbench/core/cloud_platform/aws/aws_session.py,sha256=2Gc_k4Q87BBeQDgXgVR-w-qmsF6ncZR8wvTeNnixM6k,6926
 workbench/core/cloud_platform/aws/cache_dataframe.py,sha256=VnObkVqcjg7v4fegrIkXR1j-K2AHTBpSAoriUXDe12A,2314
@@ -107,9 +107,9 @@ workbench/core/transforms/features_to_features/__init__.py,sha256=47DEQpj8HBSa-_
 workbench/core/transforms/features_to_features/heavy/emr/Readme.md,sha256=YtQgCEQeKe0CQXQkhzMTYq9xOtCsCYb5P5LW2BmRKWQ,68
 workbench/core/transforms/features_to_features/heavy/glue/Readme.md,sha256=TuyCatWfoDr99zUwvOcxf-TqMkQzaMqXlj5nmFcRzfo,48
 workbench/core/transforms/features_to_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-workbench/core/transforms/features_to_model/features_to_model.py,sha256=stTOKAh_OJaI4ao6G8GRECa78sViaJXBzwt9myK5joM,20892
+workbench/core/transforms/features_to_model/features_to_model.py,sha256=pYGdfnp-6xh79kxQ5iXySi7oYcaRuQ-xLDo1rFgDB7g,20876
 workbench/core/transforms/model_to_endpoint/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-workbench/core/transforms/model_to_endpoint/model_to_endpoint.py,sha256=MGsc-pRNxTpDQz8i3DmYCIAAVyZOKmWBDFmkQHt2h4Y,8152
+workbench/core/transforms/model_to_endpoint/model_to_endpoint.py,sha256=I44_ziQ0IegudLQ_qJ-XNfWZInDkXWI9LsE-1o9855w,8823
 workbench/core/transforms/pandas_transforms/__init__.py,sha256=xL4MT8-fZ1SFqDbTLc8XyxjupHtB1YR6Ej0AC2nwd7I,894
 workbench/core/transforms/pandas_transforms/data_to_pandas.py,sha256=sJHPeuNF8Q8aQqgRnkdWkyvur5cbggdUVIwR-xF3Dlo,3621
 workbench/core/transforms/pandas_transforms/features_to_pandas.py,sha256=af6xdPt2V4zhh-SzQa_UYxdmNMzMLXbrbsznV5QoIJg,3441
@@ -128,11 +128,11 @@ workbench/core/views/view.py,sha256=DvmEA1xdvL980GET_cnbmHzqSy6IhlNaZcoQnVTtYis,
 workbench/core/views/view_utils.py,sha256=CwOlpqXpumCr6REi-ey7Qjz5_tpg-s4oWHmlOVu8POQ,12270
 workbench/core/views/storage/mdq_view.py,sha256=qf_ep1KwaXOIfO930laEwNIiCYP7VNOqjE3VdHfopRE,5195
 workbench/model_script_utils/model_script_utils.py,sha256=aM3ZaJxyMy7smokIF83fXUx3YSzLs8BNNMLfJDCoe8I,21231
-workbench/model_script_utils/pytorch_utils.py,sha256=vr8ybK45U0H8Jhjb5qx6xbJNozdcl7bVqubknDwh6U0,13704
+workbench/model_script_utils/pytorch_utils.py,sha256=kQCTRqdbszlurMrzyflyOo2amDJYx3Pni1rRhGHWXm4,13738
 workbench/model_script_utils/uq_harness.py,sha256=Qv5UQdjn72Ssa3NWGGsnSB_wDp0au2TXVauFK81Ebr0,11498
 workbench/model_scripts/script_generation.py,sha256=Sv0OJdASNKk1KXr8goiZWUL5W7i8G8gBb_R_OTb8caI,8257
-workbench/model_scripts/chemprop/chemprop.template,sha256=9Rb4J2_FVG2avqONLY-_5rZc-6PwSykvn7395bm0EUk,36473
-workbench/model_scripts/chemprop/generated_model_script.py,sha256=awO8O1Arbpct8c3QoUjABWQ2ZbVus-ie8dNRLo1UiD4,36498
+workbench/model_scripts/chemprop/chemprop.template,sha256=otuR2Ee-GogsNo4z1MlefXY9G--ZOTgg4rFc_5NXivw,36941
+workbench/model_scripts/chemprop/generated_model_script.py,sha256=6duTkJUH1eRrsGHAZN1DWRKR74K5tsXKcQPrWd3vjxQ,36724
 workbench/model_scripts/chemprop/model_script_utils.py,sha256=aM3ZaJxyMy7smokIF83fXUx3YSzLs8BNNMLfJDCoe8I,21231
 workbench/model_scripts/chemprop/requirements.txt,sha256=2IBHZZNYqhX9Ed7AmRVgN06tO3EHeBbN2EM8-tjWZhs,216
 workbench/model_scripts/custom_models/chem_info/Readme.md,sha256=mH1lxJ4Pb7F5nBnVXaiuxpi8zS_yjUw_LBJepVKXhlA,574
@@ -160,10 +160,10 @@ workbench/model_scripts/ensemble_xgb/ensemble_xgb.template,sha256=lMEx0IkawcpTI5
 workbench/model_scripts/ensemble_xgb/requirements.txt,sha256=jWlGc7HH7vqyukTm38LN4EyDi8jDUPEay4n45z-30uc,104
 workbench/model_scripts/meta_model/generated_model_script.py,sha256=ncPrHd9-R8l_98vAiuTUJ92C9PKpEgAtpIrmd7TuqSQ,8341
 workbench/model_scripts/meta_model/meta_model.template,sha256=viz-AKVq3YRwOUBt8-rUO1TwdEPFzyP7nnifqcIJurw,8244
-workbench/model_scripts/pytorch_model/generated_model_script.py,sha256=JfCPF7lFhnKVbOBmNyqiFXcynXltpfEm_e_BQwLeiAg,27038
+workbench/model_scripts/pytorch_model/generated_model_script.py,sha256=1B4RortOxbB7feTrr5Kf9qUqdqG4Qc1a6evdNUYLSNg,27011
 workbench/model_scripts/pytorch_model/model_script_utils.py,sha256=aM3ZaJxyMy7smokIF83fXUx3YSzLs8BNNMLfJDCoe8I,21231
-workbench/model_scripts/pytorch_model/pytorch.template,sha256=FZYI4D-u5lDkJSyvgJYVhtvt9PnfL_pEVGtBYv64sNU,22767
-workbench/model_scripts/pytorch_model/pytorch_utils.py,sha256=vr8ybK45U0H8Jhjb5qx6xbJNozdcl7bVqubknDwh6U0,13704
+workbench/model_scripts/pytorch_model/pytorch.template,sha256=78TBsT1NoPkVL-cINZMjA1SE91abUgKtNqedOCvS7lU,22766
+workbench/model_scripts/pytorch_model/pytorch_utils.py,sha256=kQCTRqdbszlurMrzyflyOo2amDJYx3Pni1rRhGHWXm4,13738
 workbench/model_scripts/pytorch_model/requirements.txt,sha256=ES7YehHEL4E5oV8FScHm3oNQmkMI4ODgbC1fSbaY7T4,183
 workbench/model_scripts/pytorch_model/uq_harness.py,sha256=Qv5UQdjn72Ssa3NWGGsnSB_wDp0au2TXVauFK81Ebr0,11498
 workbench/model_scripts/scikit_learn/generated_model_script.py,sha256=xhQIglpAgPRCH9iwI3wI0N0V6p9AgqW0mVOMuSXzUCk,17187
@@ -184,8 +184,9 @@ workbench/scripts/endpoint_test.py,sha256=RV52DZZTOD_ou-ywZjaxQ2_wqnSJqvlnHQZbvf
 workbench/scripts/glue_launcher.py,sha256=bIKQvfGxpAhzbeNvTnHfRW_5kQhY-169_868ZnCejJk,10692
 workbench/scripts/lambda_test.py,sha256=SLAPIXeGQn82neQ6-Hif3VS3LWLwT0-dGw8yWw2aXRQ,2077
 workbench/scripts/meta_model_sim.py,sha256=6iGpInA-nH6DSjk0z63fcoL8P7icqnZmKLE5Sqyrh7E,1026
-workbench/scripts/ml_pipeline_batch.py,sha256=1T5JnLlUJR7bwAGBLHmLPOuj1xFRqVIQX8PsuDhHy8o,4907
-workbench/scripts/ml_pipeline_sqs.py,sha256=s9V7l4dogEYXtTyNFsgYMDiZoPnG3Zq3ITcVDGoyH8g,8703
+workbench/scripts/ml_pipeline_batch.py,sha256=EbtOMtXIzvE07cLw4xV3nDM5NL_bYskO_kWySdegCjw,6567
+workbench/scripts/ml_pipeline_launcher.py,sha256=Xxg5m5Q0Ji4tiIp3Vo4JdNNuQ3n1L0Dx19Hrzb-vqBc,13801
+workbench/scripts/ml_pipeline_sqs.py,sha256=YFbc-tLvRFAnj4ABlpnGUTf5sz1MxriHIP-n4dGLitM,9537
 workbench/scripts/monitor_cloud_watch.py,sha256=s7MY4bsHts0nup9G0lWESCvgJZ9Mw1Eo-c8aKRgLjMw,9235
 workbench/scripts/redis_expire.py,sha256=DxI_RKSNlrW2BsJZXcsSbaWGBgPZdPhtzHjV9SUtElE,1120
 workbench/scripts/redis_report.py,sha256=iaJSuGPyLCs6e0TMcZDoT0YyJ43xJ1u74YD8FLnnUg4,990
@@ -281,7 +282,7 @@ workbench/web_interface/components/settings_menu.py,sha256=HdMhi0Lm7s6U9c7qzQJdU
 workbench/web_interface/components/violin_plots.py,sha256=3_T85hIs_R_WZpfFkSrqY2eYXmYzWsywDqsLhB7W1RQ,5320
 workbench/web_interface/components/experiments/dashboard_metric_plots.py,sha256=DPIw13tO9XOGxA6IeRPLgl-C3XUJ2N287JkSEg73Rjg,2984
 workbench/web_interface/components/experiments/outlier_plot.py,sha256=5yGVnVScM0TR80OjPypx_83Ksg7r5HDR3hGjpT4Ub14,3646
-workbench/web_interface/components/plugins/ag_table.py,sha256=MUtaKNzumCOvnvmZJGY4_j6rpl-ITeYCVKrxmLDwSzM,3923
+workbench/web_interface/components/plugins/ag_table.py,sha256=mIfpeBn0-zrPj0hmlSTSuo17Od2OSRnIHwFPNdfuTMA,3573
 workbench/web_interface/components/plugins/confusion_matrix.py,sha256=gkmbAOWsZRVBoPQSav-aglDtw0Nt54YcDya9Z4OG0Vc,7387
 workbench/web_interface/components/plugins/dashboard_status.py,sha256=4plmoiXj3dDjoQerUNpep_jfk50pI9rHvcoSP20UbE8,5832
 workbench/web_interface/components/plugins/data_details.py,sha256=pZm1AbM_0EXQwx77qUkfyrU9MedAs4Wlkp6iOtSrUtI,11104
@@ -304,9 +305,9 @@ workbench/web_interface/page_views/main_page.py,sha256=DyChwOGX_KtbJ09pw2Iswofba
 workbench/web_interface/page_views/models_page_view.py,sha256=M0bdC7bAzLyIaE2jviY12FF4abdMFZmg6sFuOY_LaGI,2650
 workbench/web_interface/page_views/page_view.py,sha256=Gh6YnpOGlUejx-bHZAf5pzqoQ1H1R0OSwOpGhOBO06w,455
 workbench/web_interface/page_views/pipelines_page_view.py,sha256=v2pxrIbsHBcYiblfius3JK766NZ7ciD2yPx0t3E5IJo,2656
-workbench-0.8.236.dist-info/licenses/LICENSE,sha256=RTBoTMeEwTgEhS-n8vgQ-VUo5qig0PWVd8xFPKU6Lck,1080
-workbench-0.8.236.dist-info/METADATA,sha256=uujaljy_KtWusibQh5-Lcyw9Z0Bv7p7R5cvDZtk9Ibo,10033
-workbench-0.8.236.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
-workbench-0.8.236.dist-info/entry_points.txt,sha256=t_9tY7iYku9z96qFZZtUgbWDh_nHtehXxLPLBSpAzeM,566
-workbench-0.8.236.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
-workbench-0.8.236.dist-info/RECORD,,
+workbench-0.8.243.dist-info/licenses/LICENSE,sha256=RTBoTMeEwTgEhS-n8vgQ-VUo5qig0PWVd8xFPKU6Lck,1080
+workbench-0.8.243.dist-info/METADATA,sha256=mWeiFAV-J1TZKygpTEymX4gPhl2dFfscZKPOFitOMFo,10038
+workbench-0.8.243.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+workbench-0.8.243.dist-info/entry_points.txt,sha256=Stivs_FFse2pHLXfWNpyh649z0bj7Ks5laQy8LuexCA,633
+workbench-0.8.243.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
+workbench-0.8.243.dist-info/RECORD,,

{workbench-0.8.236.dist-info → workbench-0.8.243.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.10.1)
+Generator: setuptools (80.10.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

{workbench-0.8.236.dist-info → workbench-0.8.243.dist-info}/entry_points.txt RENAMED Viewed

@@ -5,6 +5,7 @@ glue_launcher = workbench.scripts.glue_launcher:main
 lambda_test = workbench.scripts.lambda_test:main
 meta_model_sim = workbench.scripts.meta_model_sim:main
 ml_pipeline_batch = workbench.scripts.ml_pipeline_batch:main
+ml_pipeline_launcher = workbench.scripts.ml_pipeline_launcher:main
 ml_pipeline_sqs = workbench.scripts.ml_pipeline_sqs:main
 training_test = workbench.scripts.training_test:main
 workbench = workbench.repl.workbench_shell:launch_shell

{workbench-0.8.236.dist-info → workbench-0.8.243.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{workbench-0.8.236.dist-info → workbench-0.8.243.dist-info}/top_level.txt RENAMED Viewed

File without changes

workbench 0.8.236__py3-none-any.whl → 0.8.243__py3-none-any.whl

workbench 0.8.236py3-none-any.whl → 0.8.243py3-none-any.whl