PyPI - workbench - Versions diffs - 0.8.158__py3-none-any.whl → 0.8.159__py3-none-any.whl - Mend

workbench 0.8.158py3-none-any.whl → 0.8.159py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

workbench/api/feature_set.py CHANGED Viewed

@@ -87,8 +87,9 @@ class FeatureSet(FeatureSetCore):
         model_import_str: str = None,
         custom_script: Union[str, Path] = None,
         custom_args: dict = None,
+        training_image: str = "xgb_training",
+        inference_image: str = "xgb_inference",
         inference_arch: str = "x86_64",
-        inference_image: str = "inference",
         **kwargs,
     ) -> Union[Model, None]:
         """Create a Model from the FeatureSet
@@ -101,11 +102,12 @@ class FeatureSet(FeatureSetCore):
             description (str, optional): Set the description for the model. If not give a description is generated.
             feature_list (list, optional): Set the feature list for the model. If not given a feature list is generated.
             target_column (str, optional): The target column for the model (use None for unsupervised model)
-            model_class (str, optional): Scikit model class to use (e.g. "KMeans", default: None)
+            model_class (str, optional): Model class to use (e.g. "KMeans", "PyTorch", default: None)
             model_import_str (str, optional): The import for the model (e.g. "from sklearn.cluster import KMeans")
             custom_script (str, optional): The custom script to use for the model (default: None)
+            training_image (str, optional): The training image to use (default: "xgb_training")
+            inference_image (str, optional): The inference image to use (default: "xgb_inference")
             inference_arch (str, optional): The architecture to use for inference (default: "x86_64")
-            inference_image (str, optional): The inference image to use (default: "inference")
         Returns:
             Model: The Model created from the FeatureSet (or None if the Model could not be created)
@@ -125,6 +127,11 @@ class FeatureSet(FeatureSetCore):
         # Create the Model Tags
         tags = [name] if tags is None else tags
+        # If the model_class is PyTorch, ensure we set the training and inference images
+        if model_class and model_class.lower() == "pytorch":
+            training_image = "pytorch_training"
+            inference_image = "pytorch_inference"
         # Transform the FeatureSet into a Model
         features_to_model = FeaturesToModel(
             feature_name=self.name,
@@ -134,8 +141,9 @@ class FeatureSet(FeatureSetCore):
             model_import_str=model_import_str,
             custom_script=custom_script,
             custom_args=custom_args,
-            inference_arch=inference_arch,
+            training_image=training_image,
             inference_image=inference_image,
+            inference_arch=inference_arch,
         )
         features_to_model.set_output_tags(tags)
         features_to_model.transform(

workbench/api/meta.py CHANGED Viewed

@@ -28,7 +28,7 @@ class Meta(CloudMeta):
        meta.data_sources()
        meta.feature_sets(details=True/False)
        meta.models(details=True/False)
-       meta.endpoints()
+       meta.endpoints(details=True/False)
        meta.views()
        meta.pipelines()

workbench/cached/cached_feature_set.py CHANGED Viewed

@@ -79,6 +79,7 @@ if __name__ == "__main__":
     # Retrieve an existing FeatureSet
     my_features = CachedFeatureSet("abalone_features")
+    pprint(my_features.smart_sample())
     pprint(my_features.summary())
     pprint(my_features.details())
     pprint(my_features.health_check())

workbench/cached/cached_meta.py CHANGED Viewed

@@ -13,8 +13,6 @@ from workbench.utils.workbench_cache import WorkbenchCache
 # Decorator to cache method results from the Meta class
-# Note: This has to be outside the class definition to work properly in Python 3.9
-#       When we deprecated support for 3.9, move this back into the class definition
 def cache_result(method):
     """Decorator to cache method results in meta_cache"""
@@ -24,11 +22,8 @@ def cache_result(method):
         cache_key = CachedMeta._flatten_redis_key(method, *args, **kwargs)
         # Check for fresh data, spawn thread to refresh if stale
-        if WorkbenchCache.refresh_enabled and self.fresh_cache.get(cache_key) is None:
-            self.log.debug(f"Async: Metadata for {cache_key} refresh thread started...")
-            self.fresh_cache.set(cache_key, True)  # Mark as refreshed
-            # Spawn a thread to refresh data without blocking
+        if self.fresh_cache.atomic_set(cache_key, True):
+            self.log.important(f"Async: Metadata for {cache_key} refresh thread started...")
             self.thread_pool.submit(self._refresh_data_in_background, cache_key, method, *args, **kwargs)
         # Return data (fresh or stale) if available
@@ -62,7 +57,7 @@ class CachedMeta(CloudMeta):
        meta.data_sources()
        meta.feature_sets(details=True/False)
        meta.models(details=True/False)
-       meta.endpoints()
+       meta.endpoints(details=True/False)
        meta.views()
        # These are 'describe' methods
@@ -91,7 +86,7 @@ class CachedMeta(CloudMeta):
         # Create both our Meta Cache and Fresh Cache (tracks if data is stale)
         self.meta_cache = WorkbenchCache(prefix="meta")
-        self.fresh_cache = WorkbenchCache(prefix="meta_fresh", expire=90)  # 90-second expiration
+        self.fresh_cache = WorkbenchCache(prefix="meta_fresh", expire=300)  # 5-minute expiration
         # Create a ThreadPoolExecutor for refreshing stale data
         self.thread_pool = ThreadPoolExecutor(max_workers=5)
@@ -193,13 +188,16 @@ class CachedMeta(CloudMeta):
         return super().models(details=details)
     @cache_result
-    def endpoints(self) -> pd.DataFrame:
+    def endpoints(self, details: bool = False) -> pd.DataFrame:
         """Get a summary of the Endpoints deployed in the Cloud Platform
+        Args:
+            details (bool, optional): Include detailed information. Defaults to False.
         Returns:
             pd.DataFrame: A summary of the Endpoints in the Cloud Platform
         """
-        return super().endpoints()
+        return super().endpoints(details=details)
     @cache_result
     def glue_job(self, job_name: str) -> Union[dict, None]:
@@ -266,7 +264,7 @@ class CachedMeta(CloudMeta):
         """Background task to refresh AWS metadata."""
         result = method(self, *args, **kwargs)
         self.meta_cache.set(cache_key, result)
-        self.log.debug(f"Updated Metadata for {cache_key}")
+        self.log.important(f"Updated Metadata for {cache_key}")
     @staticmethod
     def _flatten_redis_key(method, *args, **kwargs):

workbench/core/artifacts/cached_artifact_mixin.py CHANGED Viewed

@@ -13,7 +13,7 @@ class CachedArtifactMixin:
     # Class-level caches, thread pool, and shutdown flag
     log = logging.getLogger("workbench")
     artifact_cache = WorkbenchCache(prefix="artifact_cache")
-    fresh_cache = WorkbenchCache(prefix="artifact_fresh_cache", expire=10)
+    fresh_cache = WorkbenchCache(prefix="artifact_fresh_cache", expire=120)
     thread_pool = ThreadPoolExecutor(max_workers=5)
     @staticmethod
@@ -45,8 +45,8 @@ class CachedArtifactMixin:
                 cls.artifact_cache.set(cache_key, result)
                 return result
-            # Stale cache: Refresh in the background if enabled and no refresh is already in progress
-            if WorkbenchCache.refresh_enabled and cache_fresh is None:
+            # Stale cache: Refresh in the background
+            if cache_fresh is None:
                 self.log.debug(f"Async: Refresh thread started: {cache_key}...")
                 cls.fresh_cache.set(cache_key, True)
                 cls.thread_pool.submit(cls._refresh_data_in_background, self, cache_key, method, *args, **kwargs)
@@ -88,4 +88,7 @@ if __name__ == "__main__":
     my_model = CachedModel("abalone-regression")
     pprint(my_model.summary())
     pprint(my_model.details())
+    # Second call to demonstrate caching
+    pprint(my_model.summary())
+    pprint(my_model.details())
     CachedArtifactMixin._shutdown()

workbench/core/artifacts/model_core.py CHANGED Viewed

@@ -42,24 +42,36 @@ class ModelImages:
     image_uris = {
         # US East 1 images
-        ("us-east-1", "training", "0.1", "x86_64"): (
+        ("us-east-1", "xgb_training", "0.1", "x86_64"): (
             "507740646243.dkr.ecr.us-east-1.amazonaws.com/aws-ml-images/py312-sklearn-xgb-training:0.1"
         ),
-        ("us-east-1", "inference", "0.1", "x86_64"): (
+        ("us-east-1", "xgb_inference", "0.1", "x86_64"): (
             "507740646243.dkr.ecr.us-east-1.amazonaws.com/aws-ml-images/py312-sklearn-xgb-inference:0.1"
         ),
+        ("us-east-1", "pytorch_training", "0.1", "x86_64"): (
+            "507740646243.dkr.ecr.us-east-1.amazonaws.com/aws-ml-images/py312-pytorch-training:0.1"
+        ),
+        ("us-east-1", "pytorch_inference", "0.1", "x86_64"): (
+            "507740646243.dkr.ecr.us-east-1.amazonaws.com/aws-ml-images/py312-pytorch-inference:0.1"
+        ),
         # US West 2 images
-        ("us-west-2", "training", "0.1", "x86_64"): (
+        ("us-west-2", "xgb_training", "0.1", "x86_64"): (
             "507740646243.dkr.ecr.us-west-2.amazonaws.com/aws-ml-images/py312-sklearn-xgb-training:0.1"
         ),
-        ("us-west-2", "inference", "0.1", "x86_64"): (
+        ("us-west-2", "xgb_inference", "0.1", "x86_64"): (
             "507740646243.dkr.ecr.us-west-2.amazonaws.com/aws-ml-images/py312-sklearn-xgb-inference:0.1"
         ),
+        ("us-west-2", "pytorch_training", "0.1", "x86_64"): (
+            "507740646243.dkr.ecr.us-west-2.amazonaws.com/aws-ml-images/py312-pytorch-training:0.1"
+        ),
+        ("us-west-2", "pytorch_inference", "0.1", "x86_64"): (
+            "507740646243.dkr.ecr.us-west-2.amazonaws.com/aws-ml-images/py312-pytorch-inference:0.1"
+        ),
         # ARM64 images
-        ("us-east-1", "inference", "0.1", "arm64"): (
+        ("us-east-1", "xgb_inference", "0.1", "arm64"): (
             "507740646243.dkr.ecr.us-east-1.amazonaws.com/aws-ml-images/py312-sklearn-xgb-inference:0.1-arm64"
         ),
-        ("us-west-2", "inference", "0.1", "arm64"): (
+        ("us-west-2", "xgb_inference", "0.1", "arm64"): (
             "507740646243.dkr.ecr.us-west-2.amazonaws.com/aws-ml-images/py312-sklearn-xgb-inference:0.1-arm64"
         ),
         # Meta Endpoint inference images
@@ -72,7 +84,7 @@ class ModelImages:
     }
     @classmethod
-    def get_image_uri(cls, region, image_type="training", version="0.1", architecture="x86_64"):
+    def get_image_uri(cls, region, image_type, version="0.1", architecture="x86_64"):
         key = (region, image_type, version, architecture)
         if key in cls.image_uris:
             return cls.image_uris[key]

workbench/core/cloud_platform/aws/aws_meta.py CHANGED Viewed

@@ -179,7 +179,7 @@ class AWSMeta:
                     feature_set_details.update(self.sm_client.describe_feature_group(FeatureGroupName=name))
                 # Retrieve Workbench metadata from tags
-                aws_tags = self.get_aws_tags(fg["FeatureGroupArn"])
+                aws_tags = self.get_aws_tags(fg["FeatureGroupArn"]) if details else {}
                 summary = {
                     "Feature Group": name,
                     "Health": "",
@@ -258,70 +258,60 @@ class AWSMeta:
         df = pd.DataFrame(model_summary).convert_dtypes()
         return df.sort_values(by="Created", ascending=False)
-    def endpoints(self, refresh: bool = False) -> pd.DataFrame:
+    def endpoints(self, details: bool = False) -> pd.DataFrame:
         """Get a summary of the Endpoints in AWS.
         Args:
-            refresh (bool, optional): Force a refresh of the metadata. Defaults to False.
+            details (bool, optional): Get additional details (Defaults to False).
         Returns:
             pd.DataFrame: A summary of the Endpoints in AWS.
         """
         from workbench.utils.endpoint_utils import is_monitored  # noqa: E402
-        # Initialize the SageMaker client and list all endpoints
-        sagemaker_client = self.boto3_session.client("sagemaker")
-        paginator = sagemaker_client.get_paginator("list_endpoints")
+        # Use our SageMaker client to list all endpoints
+        paginator = self.sm_client.get_paginator("list_endpoints")
         data_summary = []
         # Use the paginator to retrieve all endpoints
         for page in paginator.paginate():
             for endpoint in page["Endpoints"]:
                 endpoint_name = endpoint["EndpointName"]
-                endpoint_info = sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
-                # Retrieve Workbench metadata from tags
-                aws_tags = self.get_aws_tags(endpoint_info["EndpointArn"])
-                health_tags = aws_tags.get("workbench_health_tags", "")
-                # Retrieve endpoint configuration to determine instance type or serverless info
-                endpoint_config_name = endpoint_info["EndpointConfigName"]
-                # Getting the endpoint configuration can fail so account for that
-                try:
-                    endpoint_config = sagemaker_client.describe_endpoint_config(EndpointConfigName=endpoint_config_name)
-                    production_variant = endpoint_config["ProductionVariants"][0]
-                    # Determine instance type or serverless configuration
-                    instance_type = production_variant.get("InstanceType")
-                    if instance_type is None:
-                        # If no instance type, it's a serverless configuration
-                        mem_size = production_variant["ServerlessConfig"]["MemorySizeInMB"]
-                        concurrency = production_variant["ServerlessConfig"]["MaxConcurrency"]
-                        instance_type = f"Serverless ({mem_size // 1024}GB/{concurrency})"
-                except sagemaker_client.exceptions.ClientError:
-                    # If the endpoint config is not found, change the config name to reflect this
-                    endpoint_config_name = f"{endpoint_config_name} (Not Found)"
-                    production_variant = {}
-                    instance_type = "Unknown"
-                # Check if the endpoint has monitoring enabled
-                endpoint_monitored = is_monitored(endpoint_name, sagemaker_client)
+                # Grab various endpoint details
+                endpoint_details = {"config": {"instance": "-", "variant": "-"}, "monitored": "-"}
+                aws_tags = {}
+                if details:
+                    endpoint_details = self.sm_client.describe_endpoint(EndpointName=endpoint_name)
+                    # Retrieve AWS Tags for this Endpoint
+                    aws_tags = self.get_aws_tags(endpoint_details["EndpointArn"])
+                    # Getting the endpoint configuration
+                    config_info = self._endpoint_config_info(endpoint_details["EndpointConfigName"])
+                    endpoint_details["config"] = config_info
+                    # Check if the endpoint has monitoring enabled
+                    endpoint_details["monitored"] = is_monitored(endpoint_name, self.sm_client)
                 # Compile endpoint summary
+                created = (
+                    datetime_string(endpoint_details["CreationTime"]) if "CreationTime" in endpoint_details else "-"
+                )
                 summary = {
                     "Name": endpoint_name,
-                    "Health": health_tags,
+                    "Health": aws_tags.get("workbench_health_tags", ""),
                     "Owner": aws_tags.get("workbench_owner", "-"),
-                    "Instance": instance_type,
-                    "Created": datetime_string(endpoint_info.get("CreationTime")),
+                    "Instance": endpoint_details["config"]["instance"],
+                    "Created": created,
                     "Input": aws_tags.get("workbench_input", "-"),
-                    "Status": endpoint_info["EndpointStatus"],
-                    "Config": endpoint_config_name,
-                    "Variant": production_variant.get("VariantName", "-"),
-                    "Capture": str(endpoint_info.get("DataCaptureConfig", {}).get("EnableCapture", "False")),
-                    "Samp(%)": str(endpoint_info.get("DataCaptureConfig", {}).get("CurrentSamplingPercentage", "-")),
+                    "Status": endpoint_details.get("EndpointStatus", "-"),
+                    "Config": endpoint_details.get("EndpointConfigName", "-"),
+                    "Variant": endpoint_details["config"]["variant"],
+                    "Capture": str(endpoint_details.get("DataCaptureConfig", {}).get("EnableCapture", "False")),
+                    "Samp(%)": str(endpoint_details.get("DataCaptureConfig", {}).get("CurrentSamplingPercentage", "-")),
                     "Tags": aws_tags.get("workbench_tags", "-"),
-                    "Monitored": endpoint_monitored,
+                    "Monitored": endpoint_details["monitored"],
                 }
                 data_summary.append(summary)
@@ -329,6 +319,34 @@ class AWSMeta:
         df = pd.DataFrame(data_summary).convert_dtypes()
         return df.sort_values(by="Created", ascending=False)
+    def _endpoint_config_info(self, endpoint_config_name: str) -> dict:
+        """Internal: Get the Endpoint Configuration information for the given endpoint config name.
+        Args:
+            endpoint_config_name (str): The name of the endpoint configuration.
+        Returns:
+            dict: The endpoint configuration details.
+        """
+        # Retrieve the endpoint configuration
+        try:
+            endpoint_config = self.sm_client.describe_endpoint_config(EndpointConfigName=endpoint_config_name)
+            production_variant = endpoint_config["ProductionVariants"][0]
+            # Determine instance type or serverless configuration
+            instance_type = production_variant.get("InstanceType")
+            if instance_type is None:
+                # If no instance type, it's a serverless configuration
+                mem_size = production_variant["ServerlessConfig"]["MemorySizeInMB"]
+                concurrency = production_variant["ServerlessConfig"]["MaxConcurrency"]
+                instance_type = f"Serverless ({mem_size // 1024}GB/{concurrency})"
+            return {"instance": instance_type, "variant": production_variant.get("VariantName", "-")}
+        except self.sm_client.exceptions.ClientError as e:
+            self.log.error(f"Error retrieving endpoint config {endpoint_config_name}: {e}")
+            return {"instance": "-", "variant": "-"}
     def pipelines(self) -> pd.DataFrame:
         """List all the Pipelines in the S3 Bucket
@@ -702,7 +720,6 @@ class AWSMeta:
 if __name__ == "__main__":
     """Exercise the Workbench AWSMeta Class"""
-    import time
     from pprint import pprint
     # Pandas Display Options
@@ -712,6 +729,7 @@ if __name__ == "__main__":
     # Create the class
     meta = AWSMeta()
+    """
     # Test the __repr__ method
     print(meta)
@@ -759,11 +777,15 @@ if __name__ == "__main__":
     start_time = time.time()
     pprint(meta.models(details=True))
     print(f"Elapsed Time Model (with details): {time.time() - start_time:.2f}")
+    """
     # Get the Endpoints
     print("\n\n*** Endpoints ***")
     pprint(meta.endpoints())
+    # Get the Endpoints with Details
+    print("\n\n*** Endpoints with Details ***")
+    pprint(meta.endpoints(details=True))
     # List Pipelines
     print("\n\n*** Workbench Pipelines ***")
     pprint(meta.pipelines())
@@ -785,7 +807,6 @@ if __name__ == "__main__":
     pprint(meta.model("abalone-regression"))
     print("\n\n*** Endpoint Details ***")
     pprint(meta.endpoint("abalone-regression"))
-    pprint(meta.endpoint("test-timing-realtime"))
     # Test out a non-existent model
     print("\n\n*** Model Doesn't Exist ***")

workbench/core/cloud_platform/cloud_meta.py CHANGED Viewed

@@ -121,13 +121,16 @@ class CloudMeta(AWSMeta):
         """
         return super().models(details=details)
-    def endpoints(self) -> pd.DataFrame:
+    def endpoints(self, details: bool = False) -> pd.DataFrame:
         """Get a summary of the Endpoints deployed in the Cloud Platform
+        Args:
+            details (bool, optional): Include detailed information. Defaults to False.
         Returns:
             pd.DataFrame: A summary of the Endpoints in the Cloud Platform
         """
-        return super().endpoints()
+        return super().endpoints(details=details)
     def pipelines(self) -> pd.DataFrame:
         """Get a summary of the Pipelines deployed in the Cloud Platform

workbench/core/transforms/features_to_model/features_to_model.py CHANGED Viewed

@@ -37,8 +37,9 @@ class FeaturesToModel(Transform):
         model_import_str=None,
         custom_script=None,
         custom_args=None,
+        training_image="xgb_training",
+        inference_image="xgb_inference",
         inference_arch="x86_64",
-        inference_image="inference",
     ):
         """FeaturesToModel Initialization
         Args:
@@ -49,8 +50,9 @@ class FeaturesToModel(Transform):
             model_import_str (str, optional): The import string for the model (default None)
             custom_script (str, optional): Custom script to use for the model (default None)
             custom_args (dict, optional): Custom arguments to pass to custom model scripts (default None)
+            training_image (str, optional): Training image (default "xgb_training")
+            inference_image (str, optional): Inference image (default "xgb_inference")
             inference_arch (str, optional): Inference architecture (default "x86_64")
-            inference_image (str, optional): Inference image (default "inference")
         """
         # Make sure the model_name is a valid name
@@ -73,8 +75,9 @@ class FeaturesToModel(Transform):
         self.model_feature_list = None
         self.target_column = None
         self.class_labels = None
-        self.inference_arch = inference_arch
+        self.training_image = training_image
         self.inference_image = inference_image
+        self.inference_arch = inference_arch
     def transform_impl(
         self, target_column: str, description: str = None, feature_list: list = None, train_all_data=False, **kwargs
@@ -229,7 +232,7 @@ class FeaturesToModel(Transform):
         source_dir = str(Path(script_path).parent)
         # Create a Sagemaker Model with our script
-        image = ModelImages.get_image_uri(self.sm_session.boto_region_name, "training", "0.1")
+        image = ModelImages.get_image_uri(self.sm_session.boto_region_name, self.training_image, "0.1")
         self.estimator = Estimator(
             entry_point=entry_point,
             source_dir=source_dir,
@@ -246,6 +249,7 @@ class FeaturesToModel(Transform):
         training_job_name = f"{self.output_name}-{training_date_time_utc}"
         # Train the estimator
+        self.log.important(f"Training the Model {self.output_name} with Training Image {image}...")
         self.estimator.fit({"train": s3_training_path}, job_name=training_job_name)
         # Now delete the training data
@@ -297,7 +301,7 @@ class FeaturesToModel(Transform):
         image = ModelImages.get_image_uri(
             self.sm_session.boto_region_name, self.inference_image, "0.1", self.inference_arch
         )
-        self.log.important(f"Registering model {self.output_name} with image {image}...")
+        self.log.important(f"Registering model {self.output_name} with Inference Image {image}...")
         model = self.estimator.create_model(role=self.workbench_role_arn)
         if aws_region:
             self.log.important(f"Setting AWS Region: {aws_region} for model {self.output_name}...")

workbench/core/transforms/model_to_endpoint/model_to_endpoint.py CHANGED Viewed

@@ -78,6 +78,12 @@ class ModelToEndpoint(Transform):
             sagemaker_session=self.sm_session,
         )
+        # Log the image that will be used for deployment
+        inference_image = self.sm_client.describe_model_package(ModelPackageName=model_package_arn)[
+            "InferenceSpecification"
+        ]["Containers"][0]["Image"]
+        self.log.important(f"Deploying Model Package: {self.input_name} with Inference Image: {inference_image}")
         # Get the metadata/tags to push into AWS
         aws_tags = self.get_aws_tags()

workbench 0.8.158__py3-none-any.whl → 0.8.159__py3-none-any.whl

workbench 0.8.158py3-none-any.whl → 0.8.159py3-none-any.whl