PyPI - workbench - Versions diffs - 0.8.162__py3-none-any.whl → 0.8.202__py3-none-any.whl - Mend

workbench 0.8.162py3-none-any.whl → 0.8.202py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of workbench might be problematic. Click here for more details.

Files changed (113) hide show

workbench/algorithms/dataframe/__init__.py +1 -2
workbench/algorithms/dataframe/fingerprint_proximity.py +2 -2
workbench/algorithms/dataframe/proximity.py +261 -235
workbench/algorithms/graph/light/proximity_graph.py +10 -8
workbench/api/__init__.py +2 -1
workbench/api/compound.py +1 -1
workbench/api/endpoint.py +11 -0
workbench/api/feature_set.py +11 -8
workbench/api/meta.py +5 -2
workbench/api/model.py +16 -15
workbench/api/monitor.py +1 -16
workbench/core/artifacts/__init__.py +11 -2
workbench/core/artifacts/artifact.py +11 -3
workbench/core/artifacts/data_capture_core.py +355 -0
workbench/core/artifacts/endpoint_core.py +256 -118
workbench/core/artifacts/feature_set_core.py +265 -16
workbench/core/artifacts/model_core.py +107 -60
workbench/core/artifacts/monitor_core.py +33 -248
workbench/core/cloud_platform/aws/aws_account_clamp.py +50 -1
workbench/core/cloud_platform/aws/aws_meta.py +12 -5
workbench/core/cloud_platform/aws/aws_parameter_store.py +18 -2
workbench/core/cloud_platform/aws/aws_session.py +4 -4
workbench/core/transforms/data_to_features/light/molecular_descriptors.py +4 -4
workbench/core/transforms/features_to_model/features_to_model.py +42 -32
workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +36 -6
workbench/core/transforms/pandas_transforms/pandas_to_features.py +27 -0
workbench/core/views/training_view.py +113 -42
workbench/core/views/view.py +53 -3
workbench/core/views/view_utils.py +4 -4
workbench/model_scripts/chemprop/chemprop.template +852 -0
workbench/model_scripts/chemprop/generated_model_script.py +852 -0
workbench/model_scripts/chemprop/requirements.txt +11 -0
workbench/model_scripts/custom_models/chem_info/fingerprints.py +134 -0
workbench/model_scripts/custom_models/chem_info/mol_descriptors.py +483 -0
workbench/model_scripts/custom_models/chem_info/mol_standardize.py +450 -0
workbench/model_scripts/custom_models/chem_info/molecular_descriptors.py +7 -9
workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py +1 -1
workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +3 -5
workbench/model_scripts/custom_models/proximity/proximity.py +261 -235
workbench/model_scripts/custom_models/uq_models/bayesian_ridge.template +7 -8
workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +20 -21
workbench/model_scripts/custom_models/uq_models/gaussian_process.template +5 -11
workbench/model_scripts/custom_models/uq_models/meta_uq.template +166 -62
workbench/model_scripts/custom_models/uq_models/ngboost.template +30 -18
workbench/model_scripts/custom_models/uq_models/proximity.py +261 -235
workbench/model_scripts/custom_models/uq_models/requirements.txt +1 -3
workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +15 -17
workbench/model_scripts/pytorch_model/generated_model_script.py +373 -190
workbench/model_scripts/pytorch_model/pytorch.template +370 -187
workbench/model_scripts/scikit_learn/generated_model_script.py +7 -12
workbench/model_scripts/scikit_learn/scikit_learn.template +4 -9
workbench/model_scripts/script_generation.py +17 -9
workbench/model_scripts/uq_models/generated_model_script.py +605 -0
workbench/model_scripts/uq_models/mapie.template +605 -0
workbench/model_scripts/uq_models/requirements.txt +1 -0
workbench/model_scripts/xgb_model/generated_model_script.py +37 -46
workbench/model_scripts/xgb_model/xgb_model.template +44 -46
workbench/repl/workbench_shell.py +28 -14
workbench/scripts/endpoint_test.py +162 -0
workbench/scripts/lambda_test.py +73 -0
workbench/scripts/ml_pipeline_batch.py +137 -0
workbench/scripts/ml_pipeline_sqs.py +186 -0
workbench/scripts/monitor_cloud_watch.py +20 -100
workbench/utils/aws_utils.py +4 -3
workbench/utils/chem_utils/__init__.py +0 -0
workbench/utils/chem_utils/fingerprints.py +134 -0
workbench/utils/chem_utils/misc.py +194 -0
workbench/utils/chem_utils/mol_descriptors.py +483 -0
workbench/utils/chem_utils/mol_standardize.py +450 -0
workbench/utils/chem_utils/mol_tagging.py +348 -0
workbench/utils/chem_utils/projections.py +209 -0
workbench/utils/chem_utils/salts.py +256 -0
workbench/utils/chem_utils/sdf.py +292 -0
workbench/utils/chem_utils/toxicity.py +250 -0
workbench/utils/chem_utils/vis.py +253 -0
workbench/utils/chemprop_utils.py +760 -0
workbench/utils/cloudwatch_handler.py +1 -1
workbench/utils/cloudwatch_utils.py +137 -0
workbench/utils/config_manager.py +3 -7
workbench/utils/endpoint_utils.py +5 -7
workbench/utils/license_manager.py +2 -6
workbench/utils/model_utils.py +95 -34
workbench/utils/monitor_utils.py +44 -62
workbench/utils/pandas_utils.py +3 -3
workbench/utils/pytorch_utils.py +526 -0
workbench/utils/shap_utils.py +10 -2
workbench/utils/workbench_logging.py +0 -3
workbench/utils/workbench_sqs.py +1 -1
workbench/utils/xgboost_model_utils.py +371 -156
workbench/web_interface/components/model_plot.py +7 -1
workbench/web_interface/components/plugin_unit_test.py +5 -2
workbench/web_interface/components/plugins/dashboard_status.py +3 -1
workbench/web_interface/components/plugins/generated_compounds.py +1 -1
workbench/web_interface/components/plugins/model_details.py +9 -7
workbench/web_interface/components/plugins/scatter_plot.py +3 -3
{workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/METADATA +27 -6
{workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/RECORD +101 -85
{workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/entry_points.txt +4 -0
{workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/licenses/LICENSE +1 -1
workbench/model_scripts/custom_models/chem_info/local_utils.py +0 -769
workbench/model_scripts/custom_models/chem_info/tautomerize.py +0 -83
workbench/model_scripts/custom_models/proximity/generated_model_script.py +0 -138
workbench/model_scripts/custom_models/uq_models/generated_model_script.py +0 -393
workbench/model_scripts/custom_models/uq_models/mapie_xgb.template +0 -203
workbench/model_scripts/ensemble_xgb/generated_model_script.py +0 -279
workbench/model_scripts/quant_regression/quant_regression.template +0 -279
workbench/model_scripts/quant_regression/requirements.txt +0 -1
workbench/utils/chem_utils.py +0 -1556
workbench/utils/execution_environment.py +0 -211
workbench/utils/fast_inference.py +0 -167
workbench/utils/resource_utils.py +0 -39
{workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/WHEEL +0 -0
{workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/top_level.txt +0 -0

workbench/core/artifacts/feature_set_core.py CHANGED Viewed

@@ -16,8 +16,9 @@ from sagemaker.feature_store.feature_store import FeatureStore
 from workbench.core.artifacts.artifact import Artifact
 from workbench.core.artifacts.data_source_factory import DataSourceFactory
 from workbench.core.artifacts.athena_source import AthenaSource
+from workbench.utils.deprecated_utils import deprecated
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Optional, List, Dict, Union
 from workbench.utils.aws_utils import aws_throttle
@@ -194,24 +195,24 @@ class FeatureSetCore(Artifact):
         return View(self, view_name)
-    def set_display_columns(self, diplay_columns: list[str]):
+    def set_display_columns(self, display_columns: list[str]):
         """Set the display columns for this Data Source
         Args:
-            diplay_columns (list[str]): The display columns for this Data Source
+            display_columns (list[str]): The display columns for this Data Source
         """
         # Check mismatch of display columns to computation columns
         c_view = self.view("computation")
         computation_columns = c_view.columns
-        mismatch_columns = [col for col in diplay_columns if col not in computation_columns]
+        mismatch_columns = [col for col in display_columns if col not in computation_columns]
         if mismatch_columns:
             self.log.monitor(f"Display View/Computation mismatch: {mismatch_columns}")
-        self.log.important(f"Setting Display Columns...{diplay_columns}")
+        self.log.important(f"Setting Display Columns...{display_columns}")
         from workbench.core.views import DisplayView
         # Create a NEW display view
-        DisplayView.create(self, source_table=c_view.table, column_list=diplay_columns)
+        DisplayView.create(self, source_table=c_view.table, column_list=display_columns)
     def set_computation_columns(self, computation_columns: list[str], reset_display: bool = True):
         """Set the computation columns for this FeatureSet
@@ -509,6 +510,184 @@ class FeatureSetCore(Artifact):
         ].tolist()
         return hold_out_ids
+    def set_sample_weights(
+        self,
+        weight_dict: Dict[Union[str, int], float],
+        default_weight: float = 1.0,
+        exclude_zero_weights: bool = True,
+    ):
+        """Configure training view with sample weights for each ID.
+        Args:
+            weight_dict: Mapping of ID to sample weight
+                - weight > 1.0: oversample/emphasize
+                - weight = 1.0: normal (default)
+                - 0 < weight < 1.0: downweight/de-emphasize
+                - weight = 0.0: exclude from training
+            default_weight: Weight for IDs not in weight_dict (default: 1.0)
+            exclude_zero_weights: If True, filter out rows with sample_weight=0 (default: True)
+        Example:
+            weights = {
+                'compound_42': 3.0,  # oversample 3x
+                'compound_99': 0.1,  # noisy, downweight
+                'compound_123': 0.0, # exclude from training
+            }
+            model.set_sample_weights(weights)  # zeros automatically excluded
+            model.set_sample_weights(weights, exclude_zero_weights=False)  # keep zeros
+        """
+        from workbench.core.views import TrainingView
+        if not weight_dict:
+            self.log.important("Empty weight_dict, creating standard training view")
+            TrainingView.create(self, id_column=self.id_column)
+            return
+        self.log.important(f"Setting sample weights for {len(weight_dict)} IDs")
+        # Helper to format IDs for SQL
+        def format_id(id_val):
+            return repr(id_val)
+        # Build CASE statement for sample_weight
+        case_conditions = [
+            f"WHEN {self.id_column} = {format_id(id_val)} THEN {weight}" for id_val, weight in weight_dict.items()
+        ]
+        case_statement = "\n        ".join(case_conditions)
+        # Build inner query with sample weights
+        inner_sql = f"""SELECT
+            *,
+            CASE
+                {case_statement}
+                ELSE {default_weight}
+            END AS sample_weight
+        FROM {self.table}"""
+        # Optionally filter out zero weights
+        if exclude_zero_weights:
+            zero_count = sum(1 for weight in weight_dict.values() if weight == 0.0)
+            custom_sql = f"SELECT * FROM ({inner_sql}) WHERE sample_weight > 0"
+            self.log.important(f"Filtering out {zero_count} rows with sample_weight = 0")
+        else:
+            custom_sql = inner_sql
+        TrainingView.create_with_sql(self, sql_query=custom_sql, id_column=self.id_column)
+    @deprecated(version=0.9)
+    def set_training_filter(self, filter_expression: Optional[str] = None):
+        """Set a filter expression for the training view for this FeatureSet
+        Args:
+            filter_expression (Optional[str]): A SQL filter expression (e.g., "age > 25 AND status = 'active'")
+                If None or empty string, will reset to training view with no filter
+                (default: None)
+        """
+        from workbench.core.views import TrainingView
+        # Grab the existing holdout ids
+        holdout_ids = self.get_training_holdouts()
+        # Create a NEW training view
+        self.log.important(f"Setting Training Filter: {filter_expression}")
+        TrainingView.create(
+            self, id_column=self.id_column, holdout_ids=holdout_ids, filter_expression=filter_expression
+        )
+    @deprecated(version="0.9")
+    def exclude_ids_from_training(self, ids: List[Union[str, int]], column_name: Optional[str] = None):
+        """Exclude a list of IDs from the training view
+        Args:
+            ids (List[Union[str, int]],): List of IDs to exclude from training
+            column_name (Optional[str]): Column name to filter on.
+                If None, uses self.id_column (default: None)
+        """
+        # Use the default id_column if not specified
+        column = column_name or self.id_column
+        # Handle empty list case
+        if not ids:
+            self.log.warning("No IDs provided to exclude")
+            return
+        # Build the filter expression with proper SQL quoting
+        quoted_ids = ", ".join([repr(id) for id in ids])
+        filter_expression = f"{column} NOT IN ({quoted_ids})"
+        # Apply the filter
+        self.set_training_filter(filter_expression)
+    @deprecated(version="0.9")
+    def set_training_sampling(
+        self,
+        exclude_ids: Optional[List[Union[str, int]]] = None,
+        replicate_ids: Optional[List[Union[str, int]]] = None,
+        replication_factor: int = 2,
+    ):
+        """Configure training view with ID exclusions and replications (oversampling).
+        Args:
+            exclude_ids: List of IDs to exclude from training view
+            replicate_ids: List of IDs to replicate in training view for oversampling
+            replication_factor: Number of times to replicate each ID (default: 2)
+        Note:
+            If an ID appears in both lists, exclusion takes precedence.
+        """
+        from workbench.core.views import TrainingView
+        # Normalize to empty lists if None
+        exclude_ids = exclude_ids or []
+        replicate_ids = replicate_ids or []
+        # Remove any replicate_ids that are also in exclude_ids (exclusion wins)
+        replicate_ids = [rid for rid in replicate_ids if rid not in exclude_ids]
+        # If no sampling needed, just create normal view
+        if not exclude_ids and not replicate_ids:
+            self.log.important("No sampling specified, creating standard training view")
+            TrainingView.create(self, id_column=self.id_column)
+            return
+        # Build the custom SQL query
+        self.log.important(
+            f"Excluding {len(exclude_ids)} IDs, Replicating {len(replicate_ids)} IDs "
+            f"(factor: {replication_factor}x)"
+        )
+        # Helper to format IDs for SQL
+        def format_ids(ids):
+            return ", ".join([repr(id) for id in ids])
+        # Start with base query
+        base_query = f"SELECT * FROM {self.table}"
+        # Add exclusions if needed
+        if exclude_ids:
+            base_query += f"\nWHERE {self.id_column} NOT IN ({format_ids(exclude_ids)})"
+        # Build full query with replication
+        if replicate_ids:
+            # Generate VALUES clause for CROSS JOIN: (1), (2), ..., (N-1)
+            # We want N-1 additional copies since the original row is already in base_query
+            values_clause = ", ".join([f"({i})" for i in range(1, replication_factor)])
+            custom_sql = f"""{base_query}
+            UNION ALL
+            SELECT t.*
+            FROM {self.table} t
+            CROSS JOIN (VALUES {values_clause}) AS n(num)
+            WHERE t.{self.id_column} IN ({format_ids(replicate_ids)})"""
+        else:
+            # Only exclusions, no UNION needed
+            custom_sql = base_query
+        # Create the training view with our custom SQL
+        TrainingView.create_with_sql(self, sql_query=custom_sql, id_column=self.id_column)
     @classmethod
     def delete_views(cls, table: str, database: str):
         """Delete any views associated with this FeatureSet
@@ -667,7 +846,7 @@ if __name__ == "__main__":
     pd.set_option("display.width", 1000)
     # Grab a FeatureSet object and pull some information from it
-    my_features = LocalFeatureSetCore("test_features")
+    my_features = LocalFeatureSetCore("abalone_features")
     if not my_features.exists():
         print("FeatureSet not found!")
         sys.exit(1)
@@ -707,7 +886,7 @@ if __name__ == "__main__":
     # Test getting the holdout ids
     print("Getting the hold out ids...")
-    holdout_ids = my_features.get_training_holdouts("id")
+    holdout_ids = my_features.get_training_holdouts()
     print(f"Holdout IDs: {holdout_ids}")
     # Get a sample of the data
@@ -727,20 +906,90 @@ if __name__ == "__main__":
     # Set the holdout ids for the training view
     print("Setting hold out ids...")
     table = my_features.view("training").table
-    df = my_features.query(f'SELECT id, name FROM "{table}"')
-    my_holdout_ids = [id for id in df["id"] if id < 20]
-    my_features.set_training_holdouts("id", my_holdout_ids)
-    # Test the hold out set functionality with strings
-    print("Setting hold out ids (strings)...")
-    my_holdout_ids = [name for name in df["name"] if int(name.split(" ")[1]) > 80]
-    my_features.set_training_holdouts("name", my_holdout_ids)
+    df = my_features.query(f'SELECT auto_id, length FROM "{table}"')
+    my_holdout_ids = [id for id in df["auto_id"] if id < 20]
+    my_features.set_training_holdouts(my_holdout_ids)
     # Get the training data
     print("Getting the training data...")
     training_data = my_features.get_training_data()
+    print(f"Training Data: {training_data.shape}")
+    # Test the filter expression functionality
+    print("Setting a filter expression...")
+    my_features.set_training_filter("auto_id < 50 AND length > 65.0")
+    training_data = my_features.get_training_data()
+    print(f"Training Data: {training_data.shape}")
+    print(training_data)
+    # Remove training filter
+    print("Removing the filter expression...")
+    my_features.set_training_filter(None)
+    training_data = my_features.get_training_data()
+    print(f"Training Data: {training_data.shape}")
+    print(training_data)
+    # Test excluding ids from training
+    print("Excluding ids from training...")
+    my_features.exclude_ids_from_training([1, 2, 3, 4, 5])
+    training_data = my_features.get_training_data()
+    print(f"Training Data: {training_data.shape}")
+    print(training_data)
     # Now delete the AWS artifacts associated with this Feature Set
     # print("Deleting Workbench Feature Set...")
     # my_features.delete()
     # print("Done")
+    # Test set_training_sampling with exclusions and replications
+    print("\n--- Testing set_training_sampling ---")
+    my_features.set_training_filter(None)  # Reset any existing filters
+    original_count = num_rows
+    # Get valid IDs from the table
+    all_data = my_features.query(f'SELECT auto_id, length FROM "{table}"')
+    valid_ids = sorted(all_data["auto_id"].tolist())
+    print(f"Valid IDs range from {valid_ids[0]} to {valid_ids[-1]}")
+    exclude_list = valid_ids[0:3]  # First 3 IDs
+    replicate_list = valid_ids[10:13]  # IDs at positions 10, 11, 12
+    print(f"Original row count: {original_count}")
+    print(f"Excluding IDs: {exclude_list}")
+    print(f"Replicating IDs: {replicate_list}")
+    # Test with default replication factor (2x)
+    print("\n--- Testing with replication_factor=2 (default) ---")
+    my_features.set_training_sampling(exclude_ids=exclude_list, replicate_ids=replicate_list)
+    training_data = my_features.get_training_data()
+    print(f"Training Data after sampling: {training_data.shape}")
+    # Verify exclusions
+    for exc_id in exclude_list:
+        count = len(training_data[training_data["auto_id"] == exc_id])
+        print(f"Excluded ID {exc_id} appears {count} times (should be 0)")
+    # Verify replications
+    for rep_id in replicate_list:
+        count = len(training_data[training_data["auto_id"] == rep_id])
+        print(f"Replicated ID {rep_id} appears {count} times (should be 2)")
+    # Test with replication factor of 5
+    print("\n--- Testing with replication_factor=5 ---")
+    replicate_list_5x = [20, 21]
+    my_features.set_training_sampling(exclude_ids=exclude_list, replicate_ids=replicate_list_5x, replication_factor=5)
+    training_data = my_features.get_training_data()
+    print(f"Training Data after sampling: {training_data.shape}")
+    # Verify 5x replication
+    for rep_id in replicate_list_5x:
+        count = len(training_data[training_data["auto_id"] == rep_id])
+        print(f"Replicated ID {rep_id} appears {count} times (should be 5)")
+    # Test with large replication list (simulate 100 IDs)
+    print("\n--- Testing with large ID list (100 IDs) ---")
+    large_replicate_list = list(range(30, 130))  # 100 IDs
+    my_features.set_training_sampling(replicate_ids=large_replicate_list, replication_factor=3)
+    training_data = my_features.get_training_data()
+    print(f"Training Data after sampling: {training_data.shape}")
+    print(f"Expected extra rows: {len(large_replicate_list) * 3}")

workbench/core/artifacts/model_core.py CHANGED Viewed

@@ -21,6 +21,7 @@ from workbench.utils.aws_utils import newest_path, pull_s3_data
 from workbench.utils.s3_utils import compute_s3_object_hash
 from workbench.utils.shap_utils import shap_values_data, shap_feature_importance
 from workbench.utils.deprecated_utils import deprecated
+from workbench.utils.model_utils import proximity_model
 class ModelType(Enum):
@@ -29,69 +30,62 @@ class ModelType(Enum):
     CLASSIFIER = "classifier"
     REGRESSOR = "regressor"
     CLUSTERER = "clusterer"
-    TRANSFORMER = "transformer"
     PROXIMITY = "proximity"
     PROJECTION = "projection"
     UQ_REGRESSOR = "uq_regressor"
     ENSEMBLE_REGRESSOR = "ensemble_regressor"
+    TRANSFORMER = "transformer"
+    UNKNOWN = "unknown"
+class ModelFramework(Enum):
+    """Enumerated Types for Workbench Model Frameworks"""
+    SKLEARN = "sklearn"
+    XGBOOST = "xgboost"
+    LIGHTGBM = "lightgbm"
+    PYTORCH_TABULAR = "pytorch_tabular"
+    CHEMPROP = "chemprop"
+    TRANSFORMER = "transformer"
     UNKNOWN = "unknown"
 class ModelImages:
     """Class for retrieving workbench inference images"""
-    image_uris = {
-        # US East 1 images
-        ("us-east-1", "xgb_training", "0.1", "x86_64"): (
-            "507740646243.dkr.ecr.us-east-1.amazonaws.com/aws-ml-images/py312-sklearn-xgb-training:0.1"
-        ),
-        ("us-east-1", "xgb_inference", "0.1", "x86_64"): (
-            "507740646243.dkr.ecr.us-east-1.amazonaws.com/aws-ml-images/py312-sklearn-xgb-inference:0.1"
-        ),
-        ("us-east-1", "pytorch_training", "0.1", "x86_64"): (
-            "507740646243.dkr.ecr.us-east-1.amazonaws.com/aws-ml-images/py312-pytorch-training:0.1"
-        ),
-        ("us-east-1", "pytorch_inference", "0.1", "x86_64"): (
-            "507740646243.dkr.ecr.us-east-1.amazonaws.com/aws-ml-images/py312-pytorch-inference:0.1"
-        ),
-        # US West 2 images
-        ("us-west-2", "xgb_training", "0.1", "x86_64"): (
-            "507740646243.dkr.ecr.us-west-2.amazonaws.com/aws-ml-images/py312-sklearn-xgb-training:0.1"
-        ),
-        ("us-west-2", "xgb_inference", "0.1", "x86_64"): (
-            "507740646243.dkr.ecr.us-west-2.amazonaws.com/aws-ml-images/py312-sklearn-xgb-inference:0.1"
-        ),
-        ("us-west-2", "pytorch_training", "0.1", "x86_64"): (
-            "507740646243.dkr.ecr.us-west-2.amazonaws.com/aws-ml-images/py312-pytorch-training:0.1"
-        ),
-        ("us-west-2", "pytorch_inference", "0.1", "x86_64"): (
-            "507740646243.dkr.ecr.us-west-2.amazonaws.com/aws-ml-images/py312-pytorch-inference:0.1"
-        ),
-        # ARM64 images
-        ("us-east-1", "xgb_inference", "0.1", "arm64"): (
-            "507740646243.dkr.ecr.us-east-1.amazonaws.com/aws-ml-images/py312-sklearn-xgb-inference:0.1-arm64"
-        ),
-        ("us-west-2", "xgb_inference", "0.1", "arm64"): (
-            "507740646243.dkr.ecr.us-west-2.amazonaws.com/aws-ml-images/py312-sklearn-xgb-inference:0.1-arm64"
-        ),
-        # Meta Endpoint inference images
-        ("us-east-1", "meta-endpoint", "0.1", "x86_64"): (
-            "507740646243.dkr.ecr.us-east-1.amazonaws.com/aws-ml-images/py312-meta-endpoint:0.1"
-        ),
-        ("us-west-2", "meta-endpoint", "0.1", "x86_64"): (
-            "507740646243.dkr.ecr.us-west-2.amazonaws.com/aws-ml-images/py312-meta-endpoint:0.1"
-        ),
+    # Account ID
+    ACCOUNT_ID = "507740646243"
+    # Image name mappings
+    IMAGE_NAMES = {
+        "training": "py312-general-ml-training",
+        "inference": "py312-general-ml-inference",
+        "pytorch_training": "py312-pytorch-training",
+        "pytorch_inference": "py312-pytorch-inference",
+        "meta-endpoint": "py312-meta-endpoint",
     }
     @classmethod
-    def get_image_uri(cls, region, image_type, version="0.1", architecture="x86_64"):
-        key = (region, image_type, version, architecture)
-        if key in cls.image_uris:
-            return cls.image_uris[key]
-        else:
-            raise ValueError(
-                f"No matching image found for region: {region}, image_type: {image_type}, version: {version}"
-            )
+    def get_image_uri(cls, region, image_type, version="latest", architecture="x86_64"):
+        """
+        Dynamically construct ECR image URI.
+        Args:
+            region: AWS region (e.g., 'us-east-1', 'us-west-2')
+            image_type: Type of image (e.g., 'training', 'inference', 'pytorch_training')
+            version: Image version (e.g., '0.1', '0.2' defaults to 'latest')
+            architecture: CPU architecture (default: 'x86_64', currently unused but kept for compatibility)
+        Returns:
+            ECR image URI string
+        """
+        if image_type not in cls.IMAGE_NAMES:
+            raise ValueError(f"Unknown image_type: {image_type}. Valid types: {list(cls.IMAGE_NAMES.keys())}")
+        image_name = cls.IMAGE_NAMES[image_type]
+        uri = f"{cls.ACCOUNT_ID}.dkr.ecr.{region}.amazonaws.com/aws-ml-images/{image_name}:{version}"
+        return uri
 class ModelCore(Artifact):
@@ -105,11 +99,10 @@ class ModelCore(Artifact):
         ```
     """
-    def __init__(self, model_name: str, model_type: ModelType = None, **kwargs):
+    def __init__(self, model_name: str, **kwargs):
         """ModelCore Initialization
         Args:
             model_name (str): Name of Model in Workbench.
-            model_type (ModelType, optional): Set this for newly created Models. Defaults to None.
             **kwargs: Additional keyword arguments
         """
@@ -143,10 +136,8 @@ class ModelCore(Artifact):
                 self.latest_model = self.model_meta["ModelPackageList"][0]
                 self.description = self.latest_model.get("ModelPackageDescription", "-")
                 self.training_job_name = self._extract_training_job_name()
-                if model_type:
-                    self._set_model_type(model_type)
-                else:
-                    self.model_type = self._get_model_type()
+                self.model_type = self._get_model_type()
+                self.model_framework = self._get_model_framework()
             except (IndexError, KeyError):
                 self.log.critical(f"Model {self.model_name} appears to be malformed. Delete and recreate it!")
                 return
@@ -597,6 +588,24 @@ class ModelCore(Artifact):
         # Return the details
         return details
+    # Training View for this model
+    def training_view(self):
+        """Get the training view for this model"""
+        from workbench.core.artifacts.feature_set_core import FeatureSetCore
+        from workbench.core.views import View
+        # Grab our FeatureSet
+        fs = FeatureSetCore(self.get_input())
+        # See if we have a training view for this model
+        my_model_training_view = f"{self.name.replace('-', '_')}_training".lower()
+        view = View(fs, my_model_training_view, auto_create_view=False)
+        if view.exists():
+            return view
+        else:
+            self.log.important(f"No specific training view {my_model_training_view}, returning default training view")
+            return fs.view("training")
     # Pipeline for this model
     def get_pipeline(self) -> str:
         """Get the pipeline for this model"""
@@ -879,10 +888,24 @@ class ModelCore(Artifact):
         except (KeyError, IndexError, TypeError):
             return None
+    def publish_prox_model(self, prox_model_name: str = None, track_columns: list = None):
+        """Create and publish a Proximity Model for this Model
+        Args:
+            prox_model_name (str, optional): Name of the Proximity Model (if not specified, a name will be generated)
+            track_columns (list, optional): List of columns to track in the Proximity Model.
+        Returns:
+            Model: The published Proximity Model
+        """
+        if prox_model_name is None:
+            prox_model_name = self.model_name + "-prox"
+        return proximity_model(self, prox_model_name, track_columns=track_columns)
     def delete(self):
         """Delete the Model Packages and the Model Group"""
         if not self.exists():
-            self.log.warning(f"Trying to delete an Model that doesn't exist: {self.name}")
+            self.log.warning(f"Trying to delete a Model that doesn't exist: {self.name}")
         # Call the Class Method to delete the Model Group
         ModelCore.managed_delete(model_group_name=self.name)
@@ -958,6 +981,27 @@ class ModelCore(Artifact):
             self.log.warning(f"Could not determine model type for {self.model_name}!")
             return ModelType.UNKNOWN
+    def _set_model_framework(self, model_framework: ModelFramework):
+        """Internal: Set the Model Framework for this Model"""
+        self.model_framework = model_framework
+        self.upsert_workbench_meta({"workbench_model_framework": self.model_framework.value})
+        self.remove_health_tag("model_framework_unknown")
+    def _get_model_framework(self) -> ModelFramework:
+        """Internal: Query the Workbench Metadata to get the model framework
+        Returns:
+            ModelFramework: The ModelFramework of this Model
+        Notes:
+            This is an internal method that should not be called directly
+            Use the model_framework attribute instead
+        """
+        model_framework = self.workbench_meta().get("workbench_model_framework")
+        try:
+            return ModelFramework(model_framework)
+        except ValueError:
+            self.log.warning(f"Could not determine model framework for {self.model_name}!")
+            return ModelFramework.UNKNOWN
     def _load_training_metrics(self):
         """Internal: Retrieve the training metrics and Confusion Matrix for this model
                      and load the data into the Workbench Metadata
@@ -1149,13 +1193,11 @@ if __name__ == "__main__":
     # Grab a ModelCore object and pull some information from it
     my_model = ModelCore("abalone-regression")
-    # Call the various methods
     # Let's do a check/validation of the Model
     print(f"Model Check: {my_model.exists()}")
     # Make sure the model is 'ready'
-    # my_model.onboard()
+    my_model.onboard()
     # Get the ARN of the Model Group
     print(f"Model Group ARN: {my_model.group_arn()}")
@@ -1221,5 +1263,10 @@ if __name__ == "__main__":
     # Delete the Model
     # ModelCore.managed_delete("wine-classification")
+    # Check the training view logic
+    model = ModelCore("wine-class-test-251112-BW")
+    training_view = model.training_view()
+    print(f"Training View Name: {training_view.name}")
     # Check for a model that doesn't exist
     my_model = ModelCore("empty-model-group")

workbench 0.8.162__py3-none-any.whl → 0.8.202__py3-none-any.whl

Potentially problematic release.

workbench 0.8.162py3-none-any.whl → 0.8.202py3-none-any.whl