PyPI - workbench - Versions diffs - 0.8.177__py3-none-any.whl → 0.8.178__py3-none-any.whl - Mend

workbench 0.8.177py3-none-any.whl → 0.8.178py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of workbench might be problematic. Click here for more details.

Files changed (13) hide show

workbench/core/artifacts/feature_set_core.py CHANGED Viewed

@@ -17,7 +17,7 @@ from workbench.core.artifacts.artifact import Artifact
 from workbench.core.artifacts.data_source_factory import DataSourceFactory
 from workbench.core.artifacts.athena_source import AthenaSource
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Optional
 from workbench.utils.aws_utils import aws_throttle
@@ -509,6 +509,25 @@ class FeatureSetCore(Artifact):
         ].tolist()
         return hold_out_ids
+    def set_training_filter(self, filter_expression: Optional[str] = None):
+        """Set a filter expression for the training view for this FeatureSet
+        Args:
+            filter_expression (Optional[str]): A SQL filter expression (e.g., "age > 25 AND status = 'active'")
+                If None or empty string, will reset to default training view with no filter
+                (default: None)
+        """
+        from workbench.core.views import TrainingView
+        # Grab the existing holdout ids
+        holdout_ids = self.get_training_holdouts()
+        # Create a NEW training view
+        self.log.important(f"Setting Training Filter: {filter_expression}")
+        TrainingView.create(
+            self, id_column=self.id_column, holdout_ids=holdout_ids, filter_expression=filter_expression
+        )
     @classmethod
     def delete_views(cls, table: str, database: str):
         """Delete any views associated with this FeatureSet
@@ -707,7 +726,7 @@ if __name__ == "__main__":
     # Test getting the holdout ids
     print("Getting the hold out ids...")
-    holdout_ids = my_features.get_training_holdouts("id")
+    holdout_ids = my_features.get_training_holdouts()
     print(f"Holdout IDs: {holdout_ids}")
     # Get a sample of the data
@@ -729,16 +748,26 @@ if __name__ == "__main__":
     table = my_features.view("training").table
     df = my_features.query(f'SELECT id, name FROM "{table}"')
     my_holdout_ids = [id for id in df["id"] if id < 20]
-    my_features.set_training_holdouts("id", my_holdout_ids)
-    # Test the hold out set functionality with strings
-    print("Setting hold out ids (strings)...")
-    my_holdout_ids = [name for name in df["name"] if int(name.split(" ")[1]) > 80]
-    my_features.set_training_holdouts("name", my_holdout_ids)
+    my_features.set_training_holdouts(my_holdout_ids)
     # Get the training data
     print("Getting the training data...")
     training_data = my_features.get_training_data()
+    print(f"Training Data: {training_data.shape}")
+    # Test the filter expression functionality
+    print("Setting a filter expression...")
+    my_features.set_training_filter("id < 50 AND height > 65.0")
+    training_data = my_features.get_training_data()
+    print(f"Training Data: {training_data.shape}")
+    print(training_data)
+    # Remove training filter
+    print("Removing the filter expression...")
+    my_features.set_training_filter(None)
+    training_data = my_features.get_training_data()
+    print(f"Training Data: {training_data.shape}")
+    print(training_data)
     # Now delete the AWS artifacts associated with this Feature Set
     # print("Deleting Workbench Feature Set...")

workbench/core/views/training_view.py CHANGED Viewed

@@ -3,7 +3,7 @@
 from typing import Union
 # Workbench Imports
-from workbench.api import DataSource, FeatureSet
+from workbench.api import FeatureSet
 from workbench.core.views.view import View
 from workbench.core.views.create_view import CreateView
 from workbench.core.views.view_utils import get_column_list
@@ -34,6 +34,7 @@ class TrainingView(CreateView):
         source_table: str = None,
         id_column: str = None,
         holdout_ids: Union[list[str], list[int], None] = None,
+        filter_expression: str = None,
     ) -> Union[View, None]:
         """Factory method to create and return a TrainingView instance.
@@ -42,6 +43,8 @@ class TrainingView(CreateView):
             source_table (str, optional): The table/view to create the view from. Defaults to None.
             id_column (str, optional): The name of the id column. Defaults to None.
             holdout_ids (Union[list[str], list[int], None], optional): A list of holdout ids. Defaults to None.
+            filter_expression (str, optional): SQL filter expression (e.g., "age > 25 AND status = 'active'").
+                                               Defaults to None.
         Returns:
             Union[View, None]: The created View object (or None if failed to create the view)
@@ -69,28 +72,36 @@ class TrainingView(CreateView):
                 else:
                     id_column = instance.auto_id_column
-        # If we don't have holdout ids, create a default training view
-        if not holdout_ids:
-            instance._default_training_view(instance.data_source, id_column)
-            return View(instance.data_source, instance.view_name, auto_create_view=False)
+        # Enclose each column name in double quotes
+        sql_columns = ", ".join([f'"{column}"' for column in column_list])
+        # Build the training assignment logic
+        if holdout_ids:
+            # Format the list of holdout ids for SQL IN clause
+            if all(isinstance(id, str) for id in holdout_ids):
+                formatted_holdout_ids = ", ".join(f"'{id}'" for id in holdout_ids)
+            else:
+                formatted_holdout_ids = ", ".join(map(str, holdout_ids))
-        # Format the list of holdout ids for SQL IN clause
-        if holdout_ids and all(isinstance(id, str) for id in holdout_ids):
-            formatted_holdout_ids = ", ".join(f"'{id}'" for id in holdout_ids)
+            training_logic = f"""CASE
+                WHEN {id_column} IN ({formatted_holdout_ids}) THEN False
+                ELSE True
+            END AS training"""
         else:
-            formatted_holdout_ids = ", ".join(map(str, holdout_ids))
+            # Default 80/20 split using modulo
+            training_logic = f"""CASE
+                WHEN MOD(ROW_NUMBER() OVER (ORDER BY {id_column}), 10) < 8 THEN True
+                ELSE False
+            END AS training"""
-        # Enclose each column name in double quotes
-        sql_columns = ", ".join([f'"{column}"' for column in column_list])
+        # Build WHERE clause if filter_expression is provided
+        where_clause = f"\nWHERE {filter_expression}" if filter_expression else ""
         # Construct the CREATE VIEW query
         create_view_query = f"""
         CREATE OR REPLACE VIEW {instance.table} AS
-        SELECT {sql_columns}, CASE
-            WHEN {id_column} IN ({formatted_holdout_ids}) THEN False
-            ELSE True
-        END AS training
-        FROM {instance.source_table}
+        SELECT {sql_columns}, {training_logic}
+        FROM {instance.source_table}{where_clause}
         """
         # Execute the CREATE VIEW query
@@ -99,43 +110,13 @@ class TrainingView(CreateView):
         # Return the View
         return View(instance.data_source, instance.view_name, auto_create_view=False)
-    # This is an internal method that's used to create a default training view
-    def _default_training_view(self, data_source: DataSource, id_column: str):
-        """Create a default view in Athena that assigns roughly 80% of the data to training
-        Args:
-            data_source (DataSource): The Workbench DataSource object
-            id_column (str): The name of the id column
-        """
-        self.log.important(f"Creating default Training View {self.table}...")
-        # Drop any columns generated from AWS
-        aws_cols = ["write_time", "api_invocation_time", "is_deleted", "event_time"]
-        column_list = [col for col in data_source.columns if col not in aws_cols]
-        # Enclose each column name in double quotes
-        sql_columns = ", ".join([f'"{column}"' for column in column_list])
-        # Construct the CREATE VIEW query with a simple modulo operation for the 80/20 split
-        create_view_query = f"""
-        CREATE OR REPLACE VIEW "{self.table}" AS
-        SELECT {sql_columns}, CASE
-            WHEN MOD(ROW_NUMBER() OVER (ORDER BY {id_column}), 10) < 8 THEN True  -- Assign 80% to training
-            ELSE False  -- Assign roughly 20% to validation/test
-        END AS training
-        FROM {self.base_table_name}
-        """
-        # Execute the CREATE VIEW query
-        data_source.execute_statement(create_view_query)
 if __name__ == "__main__":
     """Exercise the Training View functionality"""
     from workbench.api import FeatureSet
     # Get the FeatureSet
-    fs = FeatureSet("test_features")
+    fs = FeatureSet("abalone_features")
     # Delete the existing training view
     training_view = TrainingView.create(fs)
@@ -152,9 +133,18 @@ if __name__ == "__main__":
     # Create a TrainingView with holdout ids
     my_holdout_ids = list(range(10))
-    training_view = TrainingView.create(fs, id_column="id", holdout_ids=my_holdout_ids)
+    training_view = TrainingView.create(fs, id_column="auto_id", holdout_ids=my_holdout_ids)
     # Pull the training data
     df = training_view.pull_dataframe()
     print(df.head())
     print(df["training"].value_counts())
+    print(f"Shape: {df.shape}")
+    print(f"Diameter min: {df['diameter'].min()}, max: {df['diameter'].max()}")
+    # Test the filter expression
+    training_view = TrainingView.create(fs, id_column="auto_id", filter_expression="diameter > 0.5")
+    df = training_view.pull_dataframe()
+    print(df.head())
+    print(f"Shape with filter: {df.shape}")
+    print(f"Diameter min: {df['diameter'].min()}, max: {df['diameter'].max()}")

workbench/model_scripts/custom_models/uq_models/generated_model_script.py CHANGED Viewed

@@ -22,7 +22,7 @@ from typing import List, Tuple
 # Template Placeholders
 TEMPLATE_PARAMS = {
-    "target": "udm_asy_res_value",
+    "target": "logs",
     "features": ['chi2v', 'fr_sulfone', 'chi1v', 'bcut2d_logplow', 'fr_piperzine', 'kappa3', 'smr_vsa1', 'slogp_vsa5', 'fr_ketone_topliss', 'fr_sulfonamd', 'fr_imine', 'fr_benzene', 'fr_ester', 'chi2n', 'labuteasa', 'peoe_vsa2', 'smr_vsa6', 'bcut2d_chglo', 'fr_sh', 'peoe_vsa1', 'fr_allylic_oxid', 'chi4n', 'fr_ar_oh', 'fr_nh0', 'fr_term_acetylene', 'slogp_vsa7', 'slogp_vsa4', 'estate_vsa1', 'vsa_estate4', 'numbridgeheadatoms', 'numheterocycles', 'fr_ketone', 'fr_morpholine', 'fr_guanido', 'estate_vsa2', 'numheteroatoms', 'fr_nitro_arom_nonortho', 'fr_piperdine', 'nocount', 'numspiroatoms', 'fr_aniline', 'fr_thiophene', 'slogp_vsa10', 'fr_amide', 'slogp_vsa2', 'fr_epoxide', 'vsa_estate7', 'fr_ar_coo', 'fr_imidazole', 'fr_nitrile', 'fr_oxazole', 'numsaturatedrings', 'fr_pyridine', 'fr_hoccn', 'fr_ndealkylation1', 'numaliphaticheterocycles', 'fr_phenol', 'maxpartialcharge', 'vsa_estate5', 'peoe_vsa13', 'minpartialcharge', 'qed', 'fr_al_oh', 'slogp_vsa11', 'chi0n', 'fr_bicyclic', 'peoe_vsa12', 'fpdensitymorgan1', 'fr_oxime', 'molwt', 'fr_dihydropyridine', 'smr_vsa5', 'peoe_vsa5', 'fr_nitro', 'hallkieralpha', 'heavyatommolwt', 'fr_alkyl_halide', 'peoe_vsa8', 'fr_nhpyrrole', 'fr_isocyan', 'bcut2d_chghi', 'fr_lactam', 'peoe_vsa11', 'smr_vsa9', 'tpsa', 'chi4v', 'slogp_vsa1', 'phi', 'bcut2d_logphi', 'avgipc', 'estate_vsa11', 'fr_coo', 'bcut2d_mwhi', 'numunspecifiedatomstereocenters', 'vsa_estate10', 'estate_vsa8', 'numvalenceelectrons', 'fr_nh2', 'fr_lactone', 'vsa_estate1', 'estate_vsa4', 'numatomstereocenters', 'vsa_estate8', 'fr_para_hydroxylation', 'peoe_vsa3', 'fr_thiazole', 'peoe_vsa10', 'fr_ndealkylation2', 'slogp_vsa12', 'peoe_vsa9', 'maxestateindex', 'fr_quatn', 'smr_vsa7', 'minestateindex', 'numaromaticheterocycles', 'numrotatablebonds', 'fr_ar_nh', 'fr_ether', 'exactmolwt', 'fr_phenol_noorthohbond', 'slogp_vsa3', 'fr_ar_n', 'sps', 'fr_c_o_nocoo', 'bertzct', 'peoe_vsa7', 'slogp_vsa8', 'numradicalelectrons', 'molmr', 'fr_tetrazole', 'numsaturatedcarbocycles', 'bcut2d_mrhi', 'kappa1', 'numamidebonds', 'fpdensitymorgan2', 'smr_vsa8', 'chi1n', 'estate_vsa6', 'fr_barbitur', 'fr_diazo', 'kappa2', 'chi0', 'bcut2d_mrlow', 'balabanj', 'peoe_vsa4', 'numhacceptors', 'fr_sulfide', 'chi3n', 'smr_vsa2', 'fr_al_oh_notert', 'fr_benzodiazepine', 'fr_phos_ester', 'fr_aldehyde', 'fr_coo2', 'estate_vsa5', 'fr_prisulfonamd', 'numaromaticcarbocycles', 'fr_unbrch_alkane', 'fr_urea', 'fr_nitroso', 'smr_vsa10', 'fr_c_s', 'smr_vsa3', 'fr_methoxy', 'maxabspartialcharge', 'slogp_vsa9', 'heavyatomcount', 'fr_azide', 'chi3v', 'smr_vsa4', 'mollogp', 'chi0v', 'fr_aryl_methyl', 'fr_nh1', 'fpdensitymorgan3', 'fr_furan', 'fr_hdrzine', 'fr_arn', 'numaromaticrings', 'vsa_estate3', 'fr_azo', 'fr_halogen', 'estate_vsa9', 'fr_hdrzone', 'numhdonors', 'fr_alkyl_carbamate', 'fr_isothiocyan', 'minabspartialcharge', 'fr_al_coo', 'ringcount', 'chi1', 'estate_vsa7', 'fr_nitro_arom', 'vsa_estate9', 'minabsestateindex', 'maxabsestateindex', 'vsa_estate6', 'estate_vsa10', 'estate_vsa3', 'fr_n_o', 'fr_amidine', 'fr_thiocyan', 'fr_phos_acid', 'fr_c_o', 'fr_imide', 'numaliphaticrings', 'peoe_vsa6', 'vsa_estate2', 'nhohcount', 'numsaturatedheterocycles', 'slogp_vsa6', 'peoe_vsa14', 'fractioncsp3', 'bcut2d_mwlow', 'numaliphaticcarbocycles', 'fr_priamide', 'nacid', 'nbase', 'naromatom', 'narombond', 'sz', 'sm', 'sv', 'sse', 'spe', 'sare', 'sp', 'si', 'mz', 'mm', 'mv', 'mse', 'mpe', 'mare', 'mp', 'mi', 'xch_3d', 'xch_4d', 'xch_5d', 'xch_6d', 'xch_7d', 'xch_3dv', 'xch_4dv', 'xch_5dv', 'xch_6dv', 'xch_7dv', 'xc_3d', 'xc_4d', 'xc_5d', 'xc_6d', 'xc_3dv', 'xc_4dv', 'xc_5dv', 'xc_6dv', 'xpc_4d', 'xpc_5d', 'xpc_6d', 'xpc_4dv', 'xpc_5dv', 'xpc_6dv', 'xp_0d', 'xp_1d', 'xp_2d', 'xp_3d', 'xp_4d', 'xp_5d', 'xp_6d', 'xp_7d', 'axp_0d', 'axp_1d', 'axp_2d', 'axp_3d', 'axp_4d', 'axp_5d', 'axp_6d', 'axp_7d', 'xp_0dv', 'xp_1dv', 'xp_2dv', 'xp_3dv', 'xp_4dv', 'xp_5dv', 'xp_6dv', 'xp_7dv', 'axp_0dv', 'axp_1dv', 'axp_2dv', 'axp_3dv', 'axp_4dv', 'axp_5dv', 'axp_6dv', 'axp_7dv', 'c1sp1', 'c2sp1', 'c1sp2', 'c2sp2', 'c3sp2', 'c1sp3', 'c2sp3', 'c3sp3', 'c4sp3', 'hybratio', 'fcsp3', 'num_stereocenters', 'num_unspecified_stereocenters', 'num_defined_stereocenters', 'num_r_centers', 'num_s_centers', 'num_stereobonds', 'num_e_bonds', 'num_z_bonds', 'stereo_complexity', 'frac_defined_stereo'],
     "compressed_features": [],
     "train_all_data": True
@@ -242,7 +242,7 @@ if __name__ == "__main__":
     print(f"R2: {xgb_r2:.3f}")
     # Define confidence levels we want to model
-    confidence_levels = [0.50, 0.80, 0.90, 0.95]  # 50%, 80%, 90%, 95% confidence intervals
+    confidence_levels = [0.50, 0.68, 0.80, 0.90, 0.95]  # 50%, 68%, 80%, 90%, 95% confidence intervals
     # Store MAPIE models for each confidence level
     mapie_models = {}
@@ -459,6 +459,9 @@ def predict_fn(df, models) -> pd.DataFrame:
         if conf_level == 0.50:  # 50% CI
             df["q_25"] = y_pis[:, 0, 0]
             df["q_75"] = y_pis[:, 1, 0]
+        elif conf_level == 0.68:  # 68% CI
+            df["q_16"] = y_pis[:, 0, 0]
+            df["q_84"] = y_pis[:, 1, 0]
         elif conf_level == 0.80:  # 80% CI
             df["q_10"] = y_pis[:, 0, 0]
             df["q_90"] = y_pis[:, 1, 0]
@@ -472,23 +475,16 @@ def predict_fn(df, models) -> pd.DataFrame:
     # Add median (q_50) from XGBoost prediction
     df["q_50"] = df["prediction"]
-    # Calculate uncertainty metrics based on 95% interval
-    interval_width = df["q_975"] - df["q_025"]
-    df["prediction_std"] = interval_width / 3.92
+    # Calculate a psueduo-standard deviation from the 68% interval width
+    df["prediction_std"] = (df["q_84"] - df["q_16"]) / 2.0
     # Reorder the quantile columns for easier reading
-    quantile_cols = ["q_025", "q_05", "q_10", "q_25", "q_75", "q_90", "q_95", "q_975"]
+    quantile_cols = ["q_025", "q_05", "q_10", "q_16", "q_25", "q_75", "q_84", "q_90", "q_95", "q_975"]
     other_cols = [col for col in df.columns if col not in quantile_cols]
     df = df[other_cols + quantile_cols]
-    # Uncertainty score
-    df["uncertainty_score"] = interval_width / (np.abs(df["prediction"]) + 1e-6)
-    # Confidence bands
-    df["confidence_band"] = pd.cut(
-        df["uncertainty_score"],
-        bins=[0, 0.5, 1.0, 2.0, np.inf],
-        labels=["high", "medium", "low", "very_low"]
-    )
+    # Adjust the outer quantiles to ensure they encompass the prediction
+    df["q_025"] = np.minimum(df["q_025"], df["prediction"])
+    df["q_975"] = np.maximum(df["q_975"], df["prediction"])
     return df

workbench/model_scripts/custom_models/uq_models/mapie.template CHANGED Viewed

@@ -242,7 +242,7 @@ if __name__ == "__main__":
     print(f"R2: {xgb_r2:.3f}")
     # Define confidence levels we want to model
-    confidence_levels = [0.50, 0.80, 0.90, 0.95]  # 50%, 80%, 90%, 95% confidence intervals
+    confidence_levels = [0.50, 0.68, 0.80, 0.90, 0.95]  # 50%, 68%, 80%, 90%, 95% confidence intervals
     # Store MAPIE models for each confidence level
     mapie_models = {}
@@ -459,6 +459,9 @@ def predict_fn(df, models) -> pd.DataFrame:
         if conf_level == 0.50:  # 50% CI
             df["q_25"] = y_pis[:, 0, 0]
             df["q_75"] = y_pis[:, 1, 0]
+        elif conf_level == 0.68:  # 68% CI
+            df["q_16"] = y_pis[:, 0, 0]
+            df["q_84"] = y_pis[:, 1, 0]
         elif conf_level == 0.80:  # 80% CI
             df["q_10"] = y_pis[:, 0, 0]
             df["q_90"] = y_pis[:, 1, 0]
@@ -472,23 +475,16 @@ def predict_fn(df, models) -> pd.DataFrame:
     # Add median (q_50) from XGBoost prediction
     df["q_50"] = df["prediction"]
-    # Calculate uncertainty metrics based on 50% interval
-    interval_width = df["q_75"] - df["q_25"]
-    df["prediction_std"] = interval_width / 1.348
+    # Calculate a psueduo-standard deviation from the 68% interval width
+    df["prediction_std"] = (df["q_84"] - df["q_16"]) / 2.0
     # Reorder the quantile columns for easier reading
-    quantile_cols = ["q_025", "q_05", "q_10", "q_25", "q_75", "q_90", "q_95", "q_975"]
+    quantile_cols = ["q_025", "q_05", "q_10", "q_16", "q_25", "q_75", "q_84", "q_90", "q_95", "q_975"]
     other_cols = [col for col in df.columns if col not in quantile_cols]
     df = df[other_cols + quantile_cols]
-    # Uncertainty score
-    df["uncertainty_score"] = interval_width / (np.abs(df["prediction"]) + 1e-6)
-    # Confidence bands
-    df["confidence_band"] = pd.cut(
-        df["uncertainty_score"],
-        bins=[0, 0.5, 1.0, 2.0, np.inf],
-        labels=["high", "medium", "low", "very_low"]
-    )
+    # Adjust the outer quantiles to ensure they encompass the prediction
+    df["q_025"] = np.minimum(df["q_025"], df["prediction"])
+    df["q_975"] = np.maximum(df["q_975"], df["prediction"])
     return df

workbench/scripts/ml_pipeline_sqs.py CHANGED Viewed

@@ -13,12 +13,13 @@ cm = ConfigManager()
 workbench_bucket = cm.get_config("WORKBENCH_BUCKET")
-def submit_to_sqs(script_path: str, size: str = "small") -> None:
+def submit_to_sqs(script_path: str, size: str = "small", realtime: bool = False) -> None:
     """
     Upload script to S3 and submit message to SQS queue for processing.
     Args:
         script_path: Local path to the ML pipeline script
         size: Job size tier - "small" (default), "medium", or "large"
+        realtime: If True, sets serverless=False for real-time processing (default: False, meaning serverless=True)
     """
     print(f"\n{'=' * 60}")
     print("🚀  SUBMITTING ML PIPELINE JOB")
@@ -33,6 +34,7 @@ def submit_to_sqs(script_path: str, size: str = "small") -> None:
     print(f"📄  Script: {script_file.name}")
     print(f"📏  Size tier: {size}")
+    print(f"⚡  Mode: {'Real-time' if realtime else 'Serverless'} (serverless={'False' if realtime else 'True'})")
     print(f"🪣  Bucket: {workbench_bucket}")
     sqs = AWSAccountClamp().boto3_session.client("sqs")
     script_name = script_file.name
@@ -88,6 +90,10 @@ def submit_to_sqs(script_path: str, size: str = "small") -> None:
     # Prepare message
     message = {"script_path": s3_path, "size": size}
+    # Set serverless environment variable (defaults to True, False if --realtime)
+    message["environment"] = {"SERVERLESS": "False" if realtime else "True"}
     print("\n📨  Sending message to SQS...")
     # Send the message to SQS
@@ -110,6 +116,7 @@ def submit_to_sqs(script_path: str, size: str = "small") -> None:
     print(f"{'=' * 60}")
     print(f"📄  Script: {script_name}")
     print(f"📏  Size: {size}")
+    print(f"⚡  Mode: {'Real-time' if realtime else 'Serverless'} (SERVERLESS={'False' if realtime else 'True'})")
     print(f"🆔  Message ID: {message_id}")
     print("\n🔍  MONITORING LOCATIONS:")
     print(f"   • SQS Queue: AWS Console → SQS → {queue_name}")
@@ -126,9 +133,14 @@ def main():
     parser.add_argument(
         "--size", default="small", choices=["small", "medium", "large"], help="Job size tier (default: small)"
     )
+    parser.add_argument(
+        "--realtime",
+        action="store_true",
+        help="Run in real-time mode (sets serverless=False). Default is serverless mode (serverless=True)",
+    )
     args = parser.parse_args()
     try:
-        submit_to_sqs(args.script_file, args.size)
+        submit_to_sqs(args.script_file, args.size, realtime=args.realtime)
     except Exception as e:
         print(f"\n❌  ERROR: {e}")
         log.error(f"Error: {e}")

workbench/utils/model_utils.py CHANGED Viewed

@@ -222,6 +222,7 @@ def uq_metrics(df: pd.DataFrame, target_col: str) -> Dict[str, Any]:
         lower_95, upper_95 = df["q_025"], df["q_975"]
         lower_90, upper_90 = df["q_05"], df["q_95"]
         lower_80, upper_80 = df["q_10"], df["q_90"]
+        lower_68, upper_68 = df["q_16"], df["q_84"]
         lower_50, upper_50 = df["q_25"], df["q_75"]
     elif "prediction_std" in df.columns:
         lower_95 = df["prediction"] - 1.96 * df["prediction_std"]
@@ -230,6 +231,8 @@ def uq_metrics(df: pd.DataFrame, target_col: str) -> Dict[str, Any]:
         upper_90 = df["prediction"] + 1.645 * df["prediction_std"]
         lower_80 = df["prediction"] - 1.282 * df["prediction_std"]
         upper_80 = df["prediction"] + 1.282 * df["prediction_std"]
+        lower_68 = df["prediction"] - 1.0 * df["prediction_std"]
+        upper_68 = df["prediction"] + 1.0 * df["prediction_std"]
         lower_50 = df["prediction"] - 0.674 * df["prediction_std"]
         upper_50 = df["prediction"] + 0.674 * df["prediction_std"]
     else:
@@ -241,11 +244,13 @@ def uq_metrics(df: pd.DataFrame, target_col: str) -> Dict[str, Any]:
     coverage_95 = np.mean((df[target_col] >= lower_95) & (df[target_col] <= upper_95))
     coverage_90 = np.mean((df[target_col] >= lower_90) & (df[target_col] <= upper_90))
     coverage_80 = np.mean((df[target_col] >= lower_80) & (df[target_col] <= upper_80))
+    coverage_68 = np.mean((df[target_col] >= lower_68) & (df[target_col] <= upper_68))
     coverage_50 = np.mean((df[target_col] >= lower_50) & (df[target_col] <= upper_50))
     avg_width_95 = np.mean(upper_95 - lower_95)
     avg_width_90 = np.mean(upper_90 - lower_90)
     avg_width_80 = np.mean(upper_80 - lower_80)
     avg_width_50 = np.mean(upper_50 - lower_50)
+    avg_width_68 = np.mean(upper_68 - lower_68)
     # --- CRPS (measures calibration + sharpness) ---
     z = (df[target_col] - df["prediction"]) / df["prediction_std"]
@@ -269,12 +274,14 @@ def uq_metrics(df: pd.DataFrame, target_col: str) -> Dict[str, Any]:
     # Collect results
     results = {
         "coverage_50": coverage_50,
+        "coverage_68": coverage_68,
         "coverage_80": coverage_80,
         "coverage_90": coverage_90,
         "coverage_95": coverage_95,
-        "avg_std": avg_std,
         "median_std": median_std,
+        "avg_std": avg_std,
         "avg_width_50": avg_width_50,
+        "avg_width_68": avg_width_68,
         "avg_width_80": avg_width_80,
         "avg_width_90": avg_width_90,
         "avg_width_95": avg_width_95,
@@ -286,12 +293,14 @@ def uq_metrics(df: pd.DataFrame, target_col: str) -> Dict[str, Any]:
     print("\n=== UQ Metrics ===")
     print(f"Coverage @ 50%: {coverage_50:.3f} (target: 0.50)")
+    print(f"Coverage @ 68%: {coverage_68:.3f} (target: 0.68)")
     print(f"Coverage @ 80%: {coverage_80:.3f} (target: 0.80)")
     print(f"Coverage @ 90%: {coverage_90:.3f} (target: 0.90)")
     print(f"Coverage @ 95%: {coverage_95:.3f} (target: 0.95)")
-    print(f"Avg Prediction StdDev: {avg_std:.3f}")
     print(f"Median Prediction StdDev: {median_std:.3f}")
+    print(f"Avg Prediction StdDev: {avg_std:.3f}")
     print(f"Average 50% Width: {avg_width_50:.3f}")
+    print(f"Average 68% Width: {avg_width_68:.3f}")
     print(f"Average 80% Width: {avg_width_80:.3f}")
     print(f"Average 90% Width: {avg_width_90:.3f}")
     print(f"Average 95% Width: {avg_width_95:.3f}")

workbench/utils/xgboost_model_utils.py CHANGED Viewed

@@ -259,7 +259,7 @@ def cross_fold_inference(workbench_model: Any, nfolds: int = 5) -> Dict[str, Any
     xgb_model._Booster = loaded_booster
     # Prepare data
     fs = FeatureSet(workbench_model.get_input())
-    df = fs.pull_dataframe()
+    df = fs.view("training").pull_dataframe()
     feature_cols = workbench_model.features()
     # Convert string features to categorical
     for col in feature_cols:

{workbench-0.8.177.dist-info → workbench-0.8.178.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: workbench
-Version: 0.8.177
+Version: 0.8.178
 Summary: Workbench: A Dashboard and Python API for creating and deploying AWS SageMaker Model Pipelines
 Author-email: SuperCowPowers LLC <support@supercowpowers.com>
 License-Expression: MIT

{workbench-0.8.177.dist-info → workbench-0.8.178.dist-info}/RECORD RENAMED Viewed

@@ -55,7 +55,7 @@ workbench/core/artifacts/data_capture_core.py,sha256=q8f79rRTYiZ7T4IQRWXl8ZvPpcv
 workbench/core/artifacts/data_source_abstract.py,sha256=5IRCzFVK-17cd4NXPMRfx99vQAmQ0WHE5jcm5RfsVTg,10619
 workbench/core/artifacts/data_source_factory.py,sha256=YL_tA5fsgubbB3dPF6T4tO0rGgz-6oo3ge4i_YXVC-M,2380
 workbench/core/artifacts/endpoint_core.py,sha256=Q6wL0IpMgCkVssX-BvPwawgogQjq9klSaoBUZ6tEIuc,49146
-workbench/core/artifacts/feature_set_core.py,sha256=055VdSYR09HP4ygAuYvIYtHQ7Ec4XxsZygpgEl5H5jQ,29136
+workbench/core/artifacts/feature_set_core.py,sha256=0wvW4VyZii0GmO6tBudoGEqZktLtb6spDyIkn7MkDcw,30292
 workbench/core/artifacts/model_core.py,sha256=ECDwQ0qM5qb1yGJ07U70BVdfkrW9m7p9e6YJWib3uR0,50855
 workbench/core/artifacts/monitor_core.py,sha256=M307yz7tEzOEHgv-LmtVy9jKjSbM98fHW3ckmNYrwlU,27897
 workbench/core/cloud_platform/cloud_meta.py,sha256=-g4-LTC3D0PXb3VfaXdLR1ERijKuHdffeMK_zhD-koQ,8809
@@ -118,7 +118,7 @@ workbench/core/views/create_view.py,sha256=2Ykzb2NvJGoD4PP4k2Bka46GDog9iGG5SWnAc
 workbench/core/views/display_view.py,sha256=9K4O77ZnKOh93aMRhxcQJQ1lqScLhuJnU_tHtYZ_U4E,2598
 workbench/core/views/inference_view.py,sha256=9s70M0dFdGq0tWvzMZfgUK7EPKtuvcQhux0uyRZuuLM,3293
 workbench/core/views/pandas_to_view.py,sha256=20uCsnG2iMh-U1VxqVUUtnrWAY98SeuHjmfJK_wcq1I,6422
-workbench/core/views/training_view.py,sha256=mUkv1oVhDG-896RdLNKxCg0j0yvudEcPnvL5EH8WERQ,6359
+workbench/core/views/training_view.py,sha256=UWW8Asxtm_kV7Z8NooitMA4xC5vTc7lSWwTGbLdifqY,5900
 workbench/core/views/view.py,sha256=Ujzw6zLROP9oKfKm3zJwaOyfpyjh5uM9fAu1i3kUOig,11764
 workbench/core/views/view_utils.py,sha256=y0YuPW-90nAfgAD1UW_49-j7Mvncfm7-5rV8I_97CK8,12274
 workbench/core/views/storage/mdq_view.py,sha256=qf_ep1KwaXOIfO930laEwNIiCYP7VNOqjE3VdHfopRE,5195
@@ -140,8 +140,8 @@ workbench/model_scripts/custom_models/uq_models/Readme.md,sha256=UVpL-lvtTrLqwBe
 workbench/model_scripts/custom_models/uq_models/bayesian_ridge.template,sha256=U4LIlpp8Rbu3apyzPR7-55lvlutpTsCro_PUvQ5pklY,6457
 workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template,sha256=0IJnSBACQ556ldEiPqR7yPCOOLJs1hQhHmPBvB2d9tY,13491
 workbench/model_scripts/custom_models/uq_models/gaussian_process.template,sha256=QbDUfkiPCwJ-c-4Twgu4utZuYZaAyeW_3T1IP-_tutw,6683
-workbench/model_scripts/custom_models/uq_models/generated_model_script.py,sha256=AcLf-vXOmn_vpTeiKpNKCW_dRhR8Co1sMFC84EPT4IE,22392
-workbench/model_scripts/custom_models/uq_models/mapie.template,sha256=Vou_g0ux-KOrs36S98g27Y8ckU9sdYrKWwypJjasQX4,18180
+workbench/model_scripts/custom_models/uq_models/generated_model_script.py,sha256=DUH80Y-We_-3OomUNjvBdRPrNQLQb3zlSsKZIPiglU4,22402
+workbench/model_scripts/custom_models/uq_models/mapie.template,sha256=SHP1Sd-nWMVF5sgB9Ski6C4IkQlm4g0EqpnJT1GfHl4,18204
 workbench/model_scripts/custom_models/uq_models/meta_uq.template,sha256=eawh0Fp3DhbdCXzWN6KloczT5ZS_ou4ayW65yUTTE4o,14109
 workbench/model_scripts/custom_models/uq_models/ngboost.template,sha256=9-O6P-SW50ul5Wl6es2DMWXSbrwOg7HWsdc8Qdln0MM,8278
 workbench/model_scripts/custom_models/uq_models/proximity.py,sha256=zqmNlX70LnWXr5fdtFFQppSNTLjlOciQVrjGr-g9jRE,13716
@@ -169,7 +169,7 @@ workbench/resources/signature_verify_pub.pem,sha256=V3-u-3_z2PH-805ybkKvzDOBwAbv
 workbench/scripts/check_double_bond_stereo.py,sha256=p5hnL54Weq77ES0HCELq9JeoM-PyUGkvVSeWYF2dKyo,7776
 workbench/scripts/glue_launcher.py,sha256=bIKQvfGxpAhzbeNvTnHfRW_5kQhY-169_868ZnCejJk,10692
 workbench/scripts/ml_pipeline_batch.py,sha256=1T5JnLlUJR7bwAGBLHmLPOuj1xFRqVIQX8PsuDhHy8o,4907
-workbench/scripts/ml_pipeline_sqs.py,sha256=7w67UUuZNYnxXiZG48gpoEFbH-c_cUfjMg0FgWI0DbQ,5100
+workbench/scripts/ml_pipeline_sqs.py,sha256=COewJcFYuv5Pa_l0q0PA4ZZb-AQ_7opKJP4JTEKBQ2c,5847
 workbench/scripts/monitor_cloud_watch.py,sha256=s7MY4bsHts0nup9G0lWESCvgJZ9Mw1Eo-c8aKRgLjMw,9235
 workbench/scripts/redis_expire.py,sha256=DxI_RKSNlrW2BsJZXcsSbaWGBgPZdPhtzHjV9SUtElE,1120
 workbench/scripts/redis_report.py,sha256=iaJSuGPyLCs6e0TMcZDoT0YyJ43xJ1u74YD8FLnnUg4,990
@@ -219,7 +219,7 @@ workbench/utils/lambda_utils.py,sha256=7GhGRPyXn9o-toWb9HBGSnI8-DhK9YRkwhCSk_mNK
 workbench/utils/license_manager.py,sha256=sDuhk1mZZqUbFmnuFXehyGnui_ALxrmYBg7gYwoo7ho,6975
 workbench/utils/log_utils.py,sha256=7n1NJXO_jUX82e6LWAQug6oPo3wiPDBYsqk9gsYab_A,3167
 workbench/utils/markdown_utils.py,sha256=4lEqzgG4EVmLcvvKKNUwNxVCySLQKJTJmWDiaDroI1w,8306
-workbench/utils/model_utils.py,sha256=7TYxTa2KCoLJfJ47QcnzmibMwKHX3bP37-sPvfqgdVM,12273
+workbench/utils/model_utils.py,sha256=97yqEEeGLV8KSDt_RTGexcUEK1wU_UnmLj-cfuryPOs,12779
 workbench/utils/monitor_utils.py,sha256=kVaJ7BgUXs3VPMFYfLC03wkIV4Dq-pEhoXS0wkJFxCc,7858
 workbench/utils/pandas_utils.py,sha256=uTUx-d1KYfjbS9PMQp2_9FogCV7xVZR6XLzU5YAGmfs,39371
 workbench/utils/performance_utils.py,sha256=WDNvz-bOdC99cDuXl0urAV4DJ7alk_V3yzKPwvqgST4,1329
@@ -242,7 +242,7 @@ workbench/utils/workbench_cache.py,sha256=IQchxB81iR4eVggHBxUJdXxUCRkqWz1jKe5gxN
 workbench/utils/workbench_event_bridge.py,sha256=z1GmXOB-Qs7VOgC6Hjnp2DI9nSEWepaSXejACxTIR7o,4150
 workbench/utils/workbench_logging.py,sha256=WCuMWhQwibrvcGAyj96h2wowh6dH7zNlDJ7sWUzdCeI,10263
 workbench/utils/workbench_sqs.py,sha256=RwM80z7YWwdtMaCKh7KWF8v38f7eBRU7kyC7ZhTRuI0,2072
-workbench/utils/xgboost_model_utils.py,sha256=iiDJH0O81aO6aOTwgssqQygvTgjE7lRDRzLJ_fI3RVs,15554
+workbench/utils/xgboost_model_utils.py,sha256=NNcALcBNOveqkIJiG7Wh7DS0O95RlGE3GZJbdSB8XWY,15571
 workbench/utils/chem_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 workbench/utils/chem_utils/fingerprints.py,sha256=Qvs8jaUwguWUq3Q3j695MY0t0Wk3BvroW-oWBwalMUo,5255
 workbench/utils/chem_utils/misc.py,sha256=Nevf8_opu-uIPrv_1_0ubuFVVo2_fGUkMoLAHB3XAeo,7372
@@ -287,9 +287,9 @@ workbench/web_interface/page_views/main_page.py,sha256=X4-KyGTKLAdxR-Zk2niuLJB2Y
 workbench/web_interface/page_views/models_page_view.py,sha256=M0bdC7bAzLyIaE2jviY12FF4abdMFZmg6sFuOY_LaGI,2650
 workbench/web_interface/page_views/page_view.py,sha256=Gh6YnpOGlUejx-bHZAf5pzqoQ1H1R0OSwOpGhOBO06w,455
 workbench/web_interface/page_views/pipelines_page_view.py,sha256=v2pxrIbsHBcYiblfius3JK766NZ7ciD2yPx0t3E5IJo,2656
-workbench-0.8.177.dist-info/licenses/LICENSE,sha256=z4QMMPlLJkZjU8VOKqJkZiQZCEZ--saIU2Z8-p3aVc0,1080
-workbench-0.8.177.dist-info/METADATA,sha256=sjKEEHLha3-tDo9uYsRtpjPTHV_pj5PkucHuc2WWxBM,9210
-workbench-0.8.177.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-workbench-0.8.177.dist-info/entry_points.txt,sha256=zPFPruY9uayk8-wsKrhfnIyIB6jvZOW_ibyllEIsLWo,356
-workbench-0.8.177.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
-workbench-0.8.177.dist-info/RECORD,,
+workbench-0.8.178.dist-info/licenses/LICENSE,sha256=z4QMMPlLJkZjU8VOKqJkZiQZCEZ--saIU2Z8-p3aVc0,1080
+workbench-0.8.178.dist-info/METADATA,sha256=kS1snm2EjzaXVrpsg3TX28OmXqYDdZD1K7kQ0lXhNg8,9210
+workbench-0.8.178.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+workbench-0.8.178.dist-info/entry_points.txt,sha256=zPFPruY9uayk8-wsKrhfnIyIB6jvZOW_ibyllEIsLWo,356
+workbench-0.8.178.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
+workbench-0.8.178.dist-info/RECORD,,

{workbench-0.8.177.dist-info → workbench-0.8.178.dist-info}/WHEEL RENAMED Viewed

File without changes

{workbench-0.8.177.dist-info → workbench-0.8.178.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{workbench-0.8.177.dist-info → workbench-0.8.178.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{workbench-0.8.177.dist-info → workbench-0.8.178.dist-info}/top_level.txt RENAMED Viewed

File without changes

workbench 0.8.177__py3-none-any.whl → 0.8.178__py3-none-any.whl

Potentially problematic release.

workbench 0.8.177py3-none-any.whl → 0.8.178py3-none-any.whl