PyPI - workbench - Versions diffs - 0.8.163__py3-none-any.whl → 0.8.164__py3-none-any.whl - Mend

workbench 0.8.163py3-none-any.whl → 0.8.164py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of workbench might be problematic. Click here for more details.

Files changed (9) hide show

workbench/api/endpoint.py CHANGED Viewed

@@ -70,6 +70,17 @@ class Endpoint(EndpointCore):
         """
         return super().fast_inference(eval_df, threads=threads)
+    def cross_fold_inference(self, nfolds: int = 5) -> dict:
+        """Run cross-fold inference (only works for XGBoost models)
+        Args:
+            nfolds (int): The number of folds to use for cross-validation (default: 5)
+        Returns:
+            dict: A dictionary with fold results
+        """
+        return super().cross_fold_inference(nfolds)
 if __name__ == "__main__":
     """Exercise the Endpoint Class"""

workbench/core/artifacts/endpoint_core.py CHANGED Viewed

@@ -378,16 +378,17 @@ class EndpointCore(Artifact):
             self.log.important("No model associated with this endpoint, running 'no frills' inference...")
             return self.fast_inference(eval_df)
+        # Grab the model features and target column
+        model = ModelCore(self.model_name)
+        features = model.features()
+        target_column = model.target()
         # Run predictions on the evaluation data
-        prediction_df = self._predict(eval_df, drop_error_rows)
+        prediction_df = self._predict(eval_df, features, drop_error_rows)
         if prediction_df.empty:
             self.log.warning("No predictions were made. Returning empty DataFrame.")
             return prediction_df
-        # Get the target column
-        model = ModelCore(self.model_name)
-        target_column = model.target()
         # Sanity Check that the target column is present
         if target_column and (target_column not in prediction_df.columns):
             self.log.important(f"Target Column {target_column} not found in prediction_df!")
@@ -419,12 +420,6 @@ class EndpointCore(Artifact):
                     capture_name, prediction_df, target_column, model_type, metrics, description, features, id_column
                 )
-                # Capture CrossFold Inference Results
-                cross_fold_metrics = cross_fold_inference(model)
-                if cross_fold_metrics:
-                    # Now put into the Parameter Store Model Inference Namespace
-                    self.param_store.upsert(f"/workbench/models/{model.name}/inference/cross_fold", cross_fold_metrics)
                 # For UQ Models we also capture the uncertainty metrics
                 if model_type in [ModelType.UQ_REGRESSOR]:
                     metrics = uq_metrics(prediction_df, target_column)
@@ -435,6 +430,25 @@ class EndpointCore(Artifact):
         # Return the prediction DataFrame
         return prediction_df
+    def cross_fold_inference(self, nfolds: int = 5) -> dict:
+        """Run cross-fold inference (only works for XGBoost models)
+        Args:
+            nfolds (int): Number of folds to use for cross-fold (default: 5)
+        Returns:
+            dict: Dictionary with the cross-fold inference results
+        """
+        # Grab our model
+        model = ModelCore(self.model_name)
+        # Compute CrossFold Metrics
+        cross_fold_metrics = cross_fold_inference(model, nfolds=nfolds)
+        if cross_fold_metrics:
+            self.param_store.upsert(f"/workbench/models/{model.name}/inference/cross_fold", cross_fold_metrics)
+        return cross_fold_metrics
     def fast_inference(self, eval_df: pd.DataFrame, threads: int = 4) -> pd.DataFrame:
         """Run inference on the Endpoint using the provided DataFrame
@@ -450,11 +464,12 @@ class EndpointCore(Artifact):
         """
         return fast_inference(self.name, eval_df, self.sm_session, threads=threads)
-    def _predict(self, eval_df: pd.DataFrame, drop_error_rows: bool = False) -> pd.DataFrame:
-        """Internal: Run prediction on the given observations in the given DataFrame
+    def _predict(self, eval_df: pd.DataFrame, features: list[str], drop_error_rows: bool = False) -> pd.DataFrame:
+        """Internal: Run prediction on observations in the given DataFrame
         Args:
             eval_df (pd.DataFrame): DataFrame to run predictions on (must have superset of features)
+            features (list[str]): List of feature column names needed for prediction
             drop_error_rows (bool): If True, drop rows that had endpoint errors/issues (default=False)
         Returns:
             pd.DataFrame: Return the DataFrame with additional columns, prediction and any _proba columns
@@ -465,19 +480,12 @@ class EndpointCore(Artifact):
             self.log.warning("Evaluation DataFrame has 0 rows. No predictions to run.")
             return pd.DataFrame(columns=eval_df.columns)  # Return empty DataFrame with same structure
-        # Sanity check: Does the Model have Features?
-        features = ModelCore(self.model_name).features()
-        if not features:
-            self.log.warning("Model does not have features defined, using all columns in the DataFrame")
-        else:
-            # Sanity check: Does the DataFrame have the required features?
-            df_columns_lower = set(col.lower() for col in eval_df.columns)
-            features_lower = set(feature.lower() for feature in features)
-            # Check if the features are a subset of the DataFrame columns (case-insensitive)
-            if not features_lower.issubset(df_columns_lower):
-                missing_features = features_lower - df_columns_lower
-                raise ValueError(f"DataFrame does not contain required features: {missing_features}")
+        # Sanity check: Does the DataFrame have the required features?
+        df_columns_lower = set(col.lower() for col in eval_df.columns)
+        features_lower = set(feature.lower() for feature in features)
+        if not features_lower.issubset(df_columns_lower):
+            missing_features = features_lower - df_columns_lower
+            raise ValueError(f"DataFrame does not contain required features: {missing_features}")
         # Create our Endpoint Predictor Class
         predictor = Predictor(
@@ -713,18 +721,10 @@ class EndpointCore(Artifact):
             # Note: Unlike other dataframes here, we want to write the index (labels) to the CSV
             wr.s3.to_csv(conf_mtx, f"{inference_capture_path}/inference_cm.csv", index=True)
-        # Generate SHAP values for our Prediction Dataframe
-        # generate_shap_values(self.endpoint_name, model_type.value, pred_results_df, inference_capture_path)
         # Now recompute the details for our Model
-        self.log.important(f"Recomputing Details for {self.model_name} to show latest Inference Results...")
+        self.log.important(f"Loading inference metrics for {self.model_name}...")
         model = ModelCore(self.model_name)
         model._load_inference_metrics(capture_name)
-        model.details()
-        # Recompute the details so that inference model metrics are updated
-        self.log.important(f"Recomputing Details for {self.name} to show latest Inference Results...")
-        self.details()
     def regression_metrics(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
         """Compute the performance metrics for this Endpoint
@@ -876,9 +876,11 @@ class EndpointCore(Artifact):
     def generate_confusion_matrix(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
         """Compute the confusion matrix for this Endpoint
         Args:
             target_column (str): Name of the target column
             prediction_df (pd.DataFrame): DataFrame with the prediction results
         Returns:
             pd.DataFrame: DataFrame with the confusion matrix
         """
@@ -887,25 +889,20 @@ class EndpointCore(Artifact):
         prediction_col = "prediction" if "prediction" in prediction_df.columns else "predictions"
         y_pred = prediction_df[prediction_col]
-        # Check if our model has class labels, if not we'll use the unique labels in the prediction
-        class_labels = ModelCore(self.model_name).class_labels()
-        if class_labels is None:
-            class_labels = sorted(list(set(y_true) | set(y_pred)))
+        # Get model class labels
+        model_class_labels = ModelCore(self.model_name).class_labels()
-        # Compute the confusion matrix (sklearn confusion_matrix)
-        conf_mtx = confusion_matrix(y_true, y_pred, labels=class_labels)
+        # Use model labels if available, otherwise infer from data
+        if model_class_labels:
+            self.log.important("Using model class labels for confusion matrix ordering...")
+            labels = model_class_labels
+        else:
+            labels = sorted(list(set(y_true) | set(y_pred)))
-        # Create a DataFrame
-        conf_mtx_df = pd.DataFrame(conf_mtx, index=class_labels, columns=class_labels)
+        # Compute confusion matrix and create DataFrame
+        conf_mtx = confusion_matrix(y_true, y_pred, labels=labels)
+        conf_mtx_df = pd.DataFrame(conf_mtx, index=labels, columns=labels)
         conf_mtx_df.index.name = "labels"
-        # Check if our model has class labels. If so make the index and columns ordered
-        model_class_labels = ModelCore(self.model_name).class_labels()
-        if model_class_labels:
-            self.log.important("Reordering the confusion matrix based on model class labels...")
-            conf_mtx_df.index = pd.Categorical(conf_mtx_df.index, categories=model_class_labels, ordered=True)
-            conf_mtx_df.columns = pd.Categorical(conf_mtx_df.columns, categories=model_class_labels, ordered=True)
-            conf_mtx_df = conf_mtx_df.sort_index().sort_index(axis=1)
         return conf_mtx_df
     def endpoint_config_name(self) -> str:
@@ -1086,13 +1083,20 @@ if __name__ == "__main__":
     df = fs.pull_dataframe()[:100]
     cap_df = df.copy()
     cap_df.columns = [col.upper() for col in cap_df.columns]
-    my_endpoint._predict(cap_df)
+    my_endpoint.inference(cap_df)
     # Boolean Type Test
     df["bool_column"] = [random.choice([True, False]) for _ in range(len(df))]
-    result_df = my_endpoint._predict(df)
+    result_df = my_endpoint.inference(df)
     assert result_df["bool_column"].dtype == bool
+    # Missing Feature Test
+    missing_df = df.drop(columns=["length"])
+    try:
+        my_endpoint.inference(missing_df)
+    except ValueError as e:
+        print(f"Expected error for missing feature: {e}")
     # Run Auto Inference on the Endpoint (uses the FeatureSet)
     print("Running Auto Inference...")
     my_endpoint.auto_inference()
@@ -1107,6 +1111,9 @@ if __name__ == "__main__":
     my_eval_df = fs_evaluation_data(my_endpoint)
     pred_results = my_endpoint.inference(my_eval_df, capture_name="holdout_xyz")
+    # Run predictions using the fast_inference method
+    fast_results = my_endpoint.fast_inference(my_eval_df)
     # Run Inference and metrics for a Classification Endpoint
     class_endpoint = EndpointCore("wine-classification")
     auto_predictions = class_endpoint.auto_inference()
@@ -1115,9 +1122,6 @@ if __name__ == "__main__":
     target = "wine_class"
     print(class_endpoint.generate_confusion_matrix(target, auto_predictions))
-    # Run predictions using the fast_inference method
-    fast_results = my_endpoint.fast_inference(my_eval_df)
     # Test the class method delete (commented out for now)
     # from workbench.api import Model
     # model = Model("abalone-regression")

workbench/utils/workbench_logging.py CHANGED Viewed

@@ -181,9 +181,6 @@ def logging_setup(color_logs=True):
         log.debug("Debugging enabled via WORKBENCH_DEBUG environment variable.")
     else:
         log.setLevel(logging.INFO)
-        # Note: Not using the ThrottlingFilter for now
-        # throttle_filter = ThrottlingFilter(rate_seconds=5)
-        # handler.addFilter(throttle_filter)
     # Suppress specific logger
     logging.getLogger("sagemaker.config").setLevel(logging.WARNING)

{workbench-0.8.163.dist-info → workbench-0.8.164.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: workbench
-Version: 0.8.163
+Version: 0.8.164
 Summary: Workbench: A Dashboard and Python API for creating and deploying AWS SageMaker Model Pipelines
 Author-email: SuperCowPowers LLC <support@supercowpowers.com>
 License-Expression: MIT

{workbench-0.8.163.dist-info → workbench-0.8.164.dist-info}/RECORD RENAMED Viewed

@@ -31,7 +31,7 @@ workbench/api/__init__.py,sha256=kvrP70ypDOMdPGj_Eeftdh8J0lu_1qQVne6GXMkD4_E,102
 workbench/api/compound.py,sha256=BHd3Qu4Ra45FEuwiowhFfGMI_HKRRB10XMmoS6ljKrM,2541
 workbench/api/data_source.py,sha256=Ngz36YZWxFfpJbmURhM1LQPYjh5kdpZNGo6_fCRePbA,8321
 workbench/api/df_store.py,sha256=Wybb3zO-jPpAi2Ns8Ks1-lagvXAaBlRpBZHhnnl3Lms,6131
-workbench/api/endpoint.py,sha256=ejDnfBBgNYMZB-bOA5nX7C6CtBlAjmtrF8M_zpri9Io,3451
+workbench/api/endpoint.py,sha256=RWGqxsCW_pMiENMb_XZlm2ZCldMS4suEBM3F5gT3hYI,3814
 workbench/api/feature_set.py,sha256=wzNxNjN0K2FaIC7QUIogMnoHqw2vo0iAHYlGk6fWLCw,6649
 workbench/api/graph_store.py,sha256=LremJyPrQFgsHb7hxsctuCsoxx3p7TKtaY5qALHe6pc,4372
 workbench/api/meta.py,sha256=fCOtZMfAHWaerzcsTeFnimXfgV8STe9JDiB7QBogktc,8456
@@ -53,7 +53,7 @@ workbench/core/artifacts/athena_source.py,sha256=RNmCe7s6uH4gVHpcdJcL84aSbF5Q1ah
 workbench/core/artifacts/cached_artifact_mixin.py,sha256=ngqFLZ4cQx_TFouXZgXZQsv_7W6XCvxVGXXSfzzaft8,3775
 workbench/core/artifacts/data_source_abstract.py,sha256=5IRCzFVK-17cd4NXPMRfx99vQAmQ0WHE5jcm5RfsVTg,10619
 workbench/core/artifacts/data_source_factory.py,sha256=YL_tA5fsgubbB3dPF6T4tO0rGgz-6oo3ge4i_YXVC-M,2380
-workbench/core/artifacts/endpoint_core.py,sha256=L6uWOxHKItjbpRS2rFrAbxAqDyZIv2CO9dnZpohKrUI,48768
+workbench/core/artifacts/endpoint_core.py,sha256=6uDOl-VKrTbLMlHZEYFY80XwrCP5H0W36JoHySjhl7M,48163
 workbench/core/artifacts/feature_set_core.py,sha256=055VdSYR09HP4ygAuYvIYtHQ7Ec4XxsZygpgEl5H5jQ,29136
 workbench/core/artifacts/model_core.py,sha256=U0dSkpZMrsIgbUglVkPwAgN0gji7Oa7glOjqMQJDAzE,50927
 workbench/core/artifacts/monitor_core.py,sha256=BP6UuCyBI4zB2wwcIXvUw6RC0EktTcQd5Rv0x73qzio,37670
@@ -239,7 +239,7 @@ workbench/utils/trace_calls.py,sha256=tY4DOVMGXBh-mbUWzo1l-X9XjD0ux_qR9I1ypkjWNI
 workbench/utils/type_abbrev.py,sha256=3ai7ZbE8BgvdotOSb48w_BmgrEGVYvLoyzoNYH8ZuOs,1470
 workbench/utils/workbench_cache.py,sha256=IQchxB81iR4eVggHBxUJdXxUCRkqWz1jKe5gxN3z6yc,5657
 workbench/utils/workbench_event_bridge.py,sha256=z1GmXOB-Qs7VOgC6Hjnp2DI9nSEWepaSXejACxTIR7o,4150
-workbench/utils/workbench_logging.py,sha256=aOUjMZeKqrK03z5mwuVAAwwjIjVxyTA7g-brr85oxY8,10424
+workbench/utils/workbench_logging.py,sha256=WCuMWhQwibrvcGAyj96h2wowh6dH7zNlDJ7sWUzdCeI,10263
 workbench/utils/workbench_sqs.py,sha256=WFQTqOxoEdOzPEMmTVZcdPzylmkynZ5aKtvRrOAO06w,2127
 workbench/utils/xgboost_model_utils.py,sha256=AEBSyIXYFk6vI3u89w7J4VdI1dgNJOgQe6XZv4pUhOM,15501
 workbench/web_interface/components/component_interface.py,sha256=QCPWqiZLkVsAEzQFEQxFelk7H0UF5uI2dVvJNf0lRV4,7980
@@ -275,9 +275,9 @@ workbench/web_interface/page_views/main_page.py,sha256=X4-KyGTKLAdxR-Zk2niuLJB2Y
 workbench/web_interface/page_views/models_page_view.py,sha256=M0bdC7bAzLyIaE2jviY12FF4abdMFZmg6sFuOY_LaGI,2650
 workbench/web_interface/page_views/page_view.py,sha256=Gh6YnpOGlUejx-bHZAf5pzqoQ1H1R0OSwOpGhOBO06w,455
 workbench/web_interface/page_views/pipelines_page_view.py,sha256=v2pxrIbsHBcYiblfius3JK766NZ7ciD2yPx0t3E5IJo,2656
-workbench-0.8.163.dist-info/licenses/LICENSE,sha256=z4QMMPlLJkZjU8VOKqJkZiQZCEZ--saIU2Z8-p3aVc0,1080
-workbench-0.8.163.dist-info/METADATA,sha256=TwnUicLddrHeMkx_gDGiUR6uQD7TR6mRjNG0XY3kh1E,9209
-workbench-0.8.163.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-workbench-0.8.163.dist-info/entry_points.txt,sha256=oZykkheWiiIBjRE8cS5SdcxwmZKSFaQEGwMBjNh-eNM,238
-workbench-0.8.163.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
-workbench-0.8.163.dist-info/RECORD,,
+workbench-0.8.164.dist-info/licenses/LICENSE,sha256=z4QMMPlLJkZjU8VOKqJkZiQZCEZ--saIU2Z8-p3aVc0,1080
+workbench-0.8.164.dist-info/METADATA,sha256=qZKnCu_6ahD4lz6rmMk2VW4RyI--YnZafIZaUGMiOHI,9209
+workbench-0.8.164.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+workbench-0.8.164.dist-info/entry_points.txt,sha256=oZykkheWiiIBjRE8cS5SdcxwmZKSFaQEGwMBjNh-eNM,238
+workbench-0.8.164.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
+workbench-0.8.164.dist-info/RECORD,,

{workbench-0.8.163.dist-info → workbench-0.8.164.dist-info}/WHEEL RENAMED Viewed

File without changes

{workbench-0.8.163.dist-info → workbench-0.8.164.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{workbench-0.8.163.dist-info → workbench-0.8.164.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{workbench-0.8.163.dist-info → workbench-0.8.164.dist-info}/top_level.txt RENAMED Viewed

File without changes

workbench 0.8.163__py3-none-any.whl → 0.8.164__py3-none-any.whl

Potentially problematic release.

workbench 0.8.163py3-none-any.whl → 0.8.164py3-none-any.whl