PyPI - workbench - Versions diffs - 0.8.213__py3-none-any.whl → 0.8.219__py3-none-any.whl - Mend

workbench 0.8.213py3-none-any.whl → 0.8.219py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

workbench/algorithms/dataframe/feature_space_proximity.py +168 -75
workbench/algorithms/dataframe/fingerprint_proximity.py +257 -80
workbench/algorithms/dataframe/projection_2d.py +38 -21
workbench/algorithms/dataframe/proximity.py +75 -150
workbench/algorithms/graph/light/proximity_graph.py +5 -5
workbench/algorithms/models/cleanlab_model.py +382 -0
workbench/algorithms/models/noise_model.py +2 -2
workbench/algorithms/sql/outliers.py +3 -3
workbench/api/__init__.py +3 -0
workbench/api/endpoint.py +10 -5
workbench/api/feature_set.py +76 -6
workbench/api/meta_model.py +289 -0
workbench/api/model.py +43 -4
workbench/core/artifacts/endpoint_core.py +65 -117
workbench/core/artifacts/feature_set_core.py +3 -3
workbench/core/artifacts/model_core.py +6 -4
workbench/core/pipelines/pipeline_executor.py +1 -1
workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +30 -10
workbench/model_script_utils/model_script_utils.py +15 -11
workbench/model_script_utils/pytorch_utils.py +11 -1
workbench/model_scripts/chemprop/chemprop.template +147 -71
workbench/model_scripts/chemprop/generated_model_script.py +151 -75
workbench/model_scripts/chemprop/model_script_utils.py +15 -11
workbench/model_scripts/custom_models/chem_info/fingerprints.py +87 -46
workbench/model_scripts/custom_models/proximity/feature_space_proximity.py +194 -0
workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +6 -6
workbench/model_scripts/custom_models/uq_models/feature_space_proximity.py +194 -0
workbench/model_scripts/meta_model/generated_model_script.py +209 -0
workbench/model_scripts/meta_model/meta_model.template +209 -0
workbench/model_scripts/pytorch_model/generated_model_script.py +45 -27
workbench/model_scripts/pytorch_model/model_script_utils.py +15 -11
workbench/model_scripts/pytorch_model/pytorch.template +42 -24
workbench/model_scripts/pytorch_model/pytorch_utils.py +11 -1
workbench/model_scripts/script_generation.py +4 -0
workbench/model_scripts/xgb_model/generated_model_script.py +167 -156
workbench/model_scripts/xgb_model/model_script_utils.py +15 -11
workbench/model_scripts/xgb_model/xgb_model.template +163 -152
workbench/repl/workbench_shell.py +0 -5
workbench/scripts/endpoint_test.py +2 -2
workbench/scripts/meta_model_sim.py +35 -0
workbench/utils/chem_utils/fingerprints.py +87 -46
workbench/utils/chemprop_utils.py +23 -5
workbench/utils/meta_model_simulator.py +499 -0
workbench/utils/metrics_utils.py +94 -10
workbench/utils/model_utils.py +91 -9
workbench/utils/pytorch_utils.py +1 -1
workbench/utils/shap_utils.py +1 -55
workbench/web_interface/components/plugins/scatter_plot.py +4 -8
{workbench-0.8.213.dist-info → workbench-0.8.219.dist-info}/METADATA +2 -1
{workbench-0.8.213.dist-info → workbench-0.8.219.dist-info}/RECORD +54 -50
{workbench-0.8.213.dist-info → workbench-0.8.219.dist-info}/entry_points.txt +1 -0
workbench/model_scripts/custom_models/meta_endpoints/example.py +0 -53
workbench/model_scripts/custom_models/proximity/proximity.py +0 -410
workbench/model_scripts/custom_models/uq_models/meta_uq.template +0 -377
workbench/model_scripts/custom_models/uq_models/proximity.py +0 -410
{workbench-0.8.213.dist-info → workbench-0.8.219.dist-info}/WHEEL +0 -0
{workbench-0.8.213.dist-info → workbench-0.8.219.dist-info}/licenses/LICENSE +0 -0
{workbench-0.8.213.dist-info → workbench-0.8.219.dist-info}/top_level.txt +0 -0

workbench/algorithms/models/cleanlab_model.py ADDED Viewed

@@ -0,0 +1,382 @@
+"""Cleanlab-based label quality detection for regression and classification.
+Note: Users must install cleanlab separately: pip install cleanlab
+"""
+import logging
+from typing import List, Optional
+import datasets
+import pandas as pd
+from sklearn.ensemble import HistGradientBoostingRegressor, HistGradientBoostingClassifier
+from sklearn.preprocessing import LabelEncoder
+from workbench.core.artifacts.model_core import ModelType
+# Check datasets version - Datalab has a bug with datasets>=4.0.0
+# See: https://github.com/cleanlab/cleanlab/issues/1253
+_datasets_major = int(datasets.__version__.split(".")[0])
+if _datasets_major >= 4:
+    raise ImportError(
+        "cleanlab's Datalab requires datasets<4.0.0 due to a known bug.\n"
+        "See: https://github.com/cleanlab/cleanlab/issues/1253\n"
+        "Fix: pip install 'datasets<4.0.0'"
+    )
+# Check for cleanlab package
+try:
+    from cleanlab.regression.learn import CleanLearning as CleanLearningRegressor
+    from cleanlab.classification import CleanLearning as CleanLearningClassifier
+    from cleanlab import Datalab
+    CLEANLAB_AVAILABLE = True
+except ImportError:
+    CLEANLAB_AVAILABLE = False
+    CleanLearningRegressor = None
+    CleanLearningClassifier = None
+    Datalab = None
+# Regressor types for convenience
+REGRESSOR_TYPES = [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]
+# Set up logging
+log = logging.getLogger("workbench")
+class CleanlabModels:
+    """Factory class for cleanlab models with shared data preparation.
+    This class handles data preparation once and provides lazy-loaded access
+    to both CleanLearning and Datalab models. Each model is only created
+    when first requested, and the prepared data is shared between them.
+    Attributes:
+        id_column: Name of the ID column in the data.
+        features: List of feature column names.
+        target: Name of the target column.
+        model_type: ModelType (REGRESSOR, CLASSIFIER, etc.).
+    Example:
+        ```python
+        cleanlab = CleanlabModels(df, "id", features, "target", ModelType.REGRESSOR)
+        # Get CleanLearning model for label issues and uncertainty
+        cl = cleanlab.clean_learning()
+        issues = cl.get_label_issues()
+        # Get Datalab for comprehensive data quality report
+        lab = cleanlab.datalab()
+        lab.report()
+        ```
+    """
+    def __init__(
+        self,
+        df: pd.DataFrame,
+        id_column: str,
+        features: List[str],
+        target: str,
+        model_type: ModelType = ModelType.REGRESSOR,
+    ):
+        """Initialize CleanlabModels with data preparation.
+        Args:
+            df: DataFrame containing data for analysis.
+            id_column: Name of the column used as the identifier.
+            features: List of feature column names.
+            target: Name of the target column.
+            model_type: ModelType (REGRESSOR, CLASSIFIER, etc.).
+        """
+        if not CLEANLAB_AVAILABLE:
+            raise ImportError("cleanlab is not installed. Install with: pip install 'cleanlab[datalab]'")
+        self.id_column = id_column
+        self.target = target
+        self.model_type = model_type
+        # Filter to numeric features only
+        numeric_cols = df.select_dtypes(include=["number"]).columns
+        non_numeric = [f for f in features if f not in numeric_cols]
+        if non_numeric:
+            log.warning(f"Excluding non-numeric features: {non_numeric}")
+            features = [f for f in features if f in numeric_cols]
+        self.features = features
+        # Prepare clean data (shared by both models)
+        self._clean_df = df.dropna(subset=features + [target])[[id_column] + features + [target]].copy()
+        self._clean_df = self._clean_df.reset_index(drop=True)
+        self._X = self._clean_df[features].values
+        self._y = self._clean_df[target].values
+        # For classification, encode labels
+        self._label_encoder: Optional[LabelEncoder] = None
+        self._y_encoded = self._y
+        if model_type == ModelType.CLASSIFIER:
+            self._label_encoder = LabelEncoder()
+            self._y_encoded = self._label_encoder.fit_transform(self._y)
+        # Lazy-loaded models
+        self._clean_learning = None
+        self._datalab = None
+    def clean_learning(self):
+        """Get the CleanLearning model (fitted, with label issues computed).
+        Returns the cleanlab CleanLearning model with enhanced get_label_issues()
+        that includes the ID column, sorts by label quality, and decodes labels.
+        Returns:
+            CleanLearning: Fitted cleanlab model with methods like:
+                - get_label_issues(): DataFrame with id_column, sorted by label_quality
+                - predict(X): Make predictions
+                - For regression: get_epistemic_uncertainty(), get_aleatoric_uncertainty()
+        """
+        if self._clean_learning is not None:
+            return self._clean_learning
+        if self.model_type == ModelType.CLASSIFIER:
+            log.info("Building CleanLearning model (classification)...")
+            cl_model = CleanLearningClassifier(
+                HistGradientBoostingClassifier(),
+                find_label_issues_kwargs={"n_jobs": 1},
+            )
+            cl_model.fit(self._X, self._y_encoded)
+        else:
+            log.info("Building CleanLearning model (regression)...")
+            cl_model = CleanLearningRegressor(HistGradientBoostingRegressor())
+            cl_model.fit(self._X, self._y)
+        # Enhance get_label_issues to include id column, sort, and decode labels
+        original_get_label_issues = cl_model.get_label_issues
+        id_column = self.id_column
+        clean_df = self._clean_df
+        model_type = self.model_type
+        label_encoder = self._label_encoder
+        def get_label_issues_enhanced():
+            issues = original_get_label_issues().copy()
+            issues.insert(0, id_column, clean_df[id_column].values)
+            if model_type == ModelType.CLASSIFIER and label_encoder is not None:
+                for col in ["given_label", "predicted_label"]:
+                    if col in issues.columns:
+                        issues[col] = label_encoder.inverse_transform(issues[col])
+            return issues.sort_values("label_quality").reset_index(drop=True)
+        cl_model.get_label_issues = get_label_issues_enhanced
+        # For regression, enhance uncertainty methods to use stored data and return DataFrames
+        if model_type != ModelType.CLASSIFIER:
+            X = self._X
+            y = self._y
+            original_get_aleatoric = cl_model.get_aleatoric_uncertainty
+            original_get_epistemic = cl_model.get_epistemic_uncertainty
+            def get_aleatoric_uncertainty_enhanced():
+                residual = cl_model.predict(X) - y
+                return original_get_aleatoric(X, residual)
+            def get_epistemic_uncertainty_enhanced():
+                values = original_get_epistemic(X, y)
+                return (
+                    pd.DataFrame(
+                        {
+                            id_column: clean_df[id_column].values,
+                            "epistemic_uncertainty": values,
+                        }
+                    )
+                    .sort_values("epistemic_uncertainty", ascending=False)
+                    .reset_index(drop=True)
+                )
+            cl_model.get_aleatoric_uncertainty = get_aleatoric_uncertainty_enhanced
+            cl_model.get_epistemic_uncertainty = get_epistemic_uncertainty_enhanced
+        n_issues = original_get_label_issues()["is_label_issue"].sum()
+        log.info(f"CleanLearning: {n_issues} potential label issues out of {len(self._clean_df)} samples")
+        self._clean_learning = cl_model
+        return cl_model
+    def datalab(self):
+        """Get the Datalab instance (with find_issues already called).
+        Returns the native cleanlab Datalab for comprehensive data quality
+        analysis. Issues have already been detected.
+        Note: For classification, this will build the CleanLearning model first
+        (if not already built) to reuse its classifier for pred_probs.
+        Returns:
+            Datalab: Cleanlab Datalab instance with methods like:
+                - report(): Print comprehensive data quality report
+                - get_issues(): DataFrame with all detected issues
+                - get_issue_summary(): Summary statistics
+        """
+        if self._datalab is not None:
+            return self._datalab
+        log.info("Building Datalab model...")
+        # Create DataFrame with only numeric columns (features + target) for Datalab
+        datalab_df = self._clean_df[self.features + [self.target]]
+        # Create Datalab instance
+        if self.model_type == ModelType.CLASSIFIER:
+            lab = Datalab(data=datalab_df, label_name=self.target)
+            # Build CleanLearning first to reuse its classifier for pred_probs
+            cl = self.clean_learning()
+            pred_probs = cl.clf.predict_proba(self._X)
+            lab.find_issues(features=self._X, pred_probs=pred_probs)
+        else:
+            lab = Datalab(data=datalab_df, label_name=self.target, task="regression")
+            lab.find_issues(features=self._X)
+        self._datalab = lab
+        return lab
+# Keep the old function for backwards compatibility
+def create_cleanlab_model(
+    df: pd.DataFrame,
+    id_column: str,
+    features: List[str],
+    target: str,
+    model_type: ModelType = ModelType.REGRESSOR,
+):
+    """Create a CleanlabModels instance for label quality detection.
+    Args:
+        df: DataFrame containing data for label quality detection.
+        id_column: Name of the column used as the identifier.
+        features: List of feature column names.
+        target: Name of the target column.
+        model_type: ModelType (REGRESSOR, CLASSIFIER, etc.).
+    Returns:
+        CleanlabModels: Factory providing access to CleanLearning and Datalab models.
+    Example:
+        ```python
+        cleanlab = create_cleanlab_model(df, "id", features, "target")
+        # Get CleanLearning model and label issues
+        cl = cleanlab.clean_learning()
+        issues = cl.get_label_issues()  # Includes ID column, sorted by quality
+        # Get Datalab for comprehensive data quality report
+        lab = cleanlab.datalab()
+        lab.report()
+        ```
+    References:
+        cleanlab: https://github.com/cleanlab/cleanlab
+    """
+    return CleanlabModels(df, id_column, features, target, model_type)
+if __name__ == "__main__":
+    from workbench.api import FeatureSet, Model
+    import numpy as np
+    pd.set_option("display.max_columns", None)
+    pd.set_option("display.width", 1000)
+    # Create a sample DataFrame with some noisy points
+    np.random.seed(42)
+    n_samples = 100
+    # Generate clean data: y = 2*x1 + 3*x2 + noise
+    x1 = np.random.randn(n_samples)
+    x2 = np.random.randn(n_samples)
+    y_clean = 2 * x1 + 3 * x2 + np.random.randn(n_samples) * 0.1
+    # Add some noisy points (last 10 samples)
+    y_noisy = y_clean.copy()
+    y_noisy[-10:] += np.random.randn(10) * 20  # Large noise
+    data = {
+        "ID": [f"sample_{i}" for i in range(n_samples)],
+        "Feature1": x1,
+        "Feature2": x2,
+        "target": y_noisy,
+    }
+    df = pd.DataFrame(data)
+    print("=" * 80)
+    print("Testing CleanlabModels with synthetic data...")
+    print("=" * 80)
+    # Create CleanlabModels instance
+    cleanlab_models = create_cleanlab_model(
+        df,
+        id_column="ID",
+        features=["Feature1", "Feature2"],
+        target="target",
+    )
+    # Get CleanLearning model and test get_label_issues
+    cl = cleanlab_models.clean_learning()
+    print(f"CleanLearning type: {type(cl)}")
+    label_issues = cl.get_label_issues()
+    print("\nLabel issues (worst first, with ID column):")
+    print(label_issues.head(10))
+    # Check if our artificially noisy samples are detected
+    noisy_ids = [f"sample_{i}" for i in range(90, 100)]
+    worst_10 = label_issues.head(10)
+    detected = worst_10[worst_10["ID"].isin(noisy_ids)]
+    print(f"\nOf 10 noisy samples, {len(detected)} appear in worst 10")
+    # Test Datalab
+    print("\n" + "=" * 80)
+    print("Testing Datalab...")
+    print("=" * 80)
+    lab = cleanlab_models.datalab()
+    print(f"Datalab type: {type(lab)}")
+    print(f"Datalab issues shape: {lab.get_issues().shape}")
+    lab.report(num_examples=3)
+    # Test with real AQSol regression data
+    print("\n" + "=" * 80)
+    print("Testing with AQSol regression data...")
+    print("=" * 80)
+    fs = FeatureSet("aqsol_features")
+    df = fs.pull_dataframe()
+    model = Model("aqsol-regression")
+    features = model.features()
+    target = model.target()
+    cleanlab_models = create_cleanlab_model(
+        df,
+        id_column=fs.id_column,
+        features=features,
+        target=target,
+    )
+    # Get CleanLearning and label issues
+    cl = cleanlab_models.clean_learning()
+    label_issues = cl.get_label_issues()
+    print("\nLabel issues summary:")
+    print(f"Total samples: {len(label_issues)}")
+    print(f"Flagged as issues: {label_issues['is_label_issue'].sum()}")
+    print("\nWorst label quality samples:")
+    print(label_issues.head(10))
+    print("\nLabel quality distribution:")
+    print(label_issues["label_quality"].describe())
+    # Test uncertainty estimates (regression only)
+    print("\nTesting uncertainty estimates...")
+    aleatoric = cl.get_aleatoric_uncertainty(cleanlab_models._X, cl.predict(cleanlab_models._X) - cleanlab_models._y)
+    print(f"Aleatoric: Data noise (irreducible) = {aleatoric}")
+    epistemic = cl.get_epistemic_uncertainty(cleanlab_models._X, cleanlab_models._y)
+    print(f"Epistemic: Model uncertainty (reducible) = {epistemic[:10]} ...")
+    # Test Datalab report
+    print("\n" + "=" * 80)
+    print("Testing Datalab report (regression)...")
+    print("=" * 80)
+    lab = cleanlab_models.datalab()
+    lab.report(num_examples=3)

workbench/algorithms/models/noise_model.py CHANGED Viewed

@@ -4,7 +4,7 @@ from xgboost import XGBRegressor
 from typing import List
 import logging
-from workbench.algorithms.dataframe.proximity import Proximity
+from workbench.algorithms.dataframe.feature_space_proximity import FeatureSpaceProximity
 # Set up logging
 log = logging.getLogger("workbench")
@@ -228,7 +228,7 @@ class NoiseModel:
         # Proximity model for feature space analysis
         log.info("  Building proximity model...")
-        self.proximity = Proximity(
+        self.proximity = FeatureSpaceProximity(
             self.df,
             id_column=self.id_column,
             features=self.features,

workbench/algorithms/sql/outliers.py CHANGED Viewed

@@ -209,9 +209,9 @@ class Outliers:
             else:
                 return group.nlargest(n, col)
-        # Group by 'outlier_group' and apply the helper function, explicitly selecting columns
-        top_outliers = outlier_df.groupby("outlier_group", group_keys=False).apply(
-            get_extreme_values, include_groups=True
+        # Group by 'outlier_group' and apply the helper function, explicitly selecting columns to silence warning
+        top_outliers = outlier_df.groupby("outlier_group", group_keys=False)[outlier_df.columns].apply(
+            get_extreme_values
         )
         return top_outliers.reset_index(drop=True)

workbench/api/__init__.py CHANGED Viewed

@@ -5,6 +5,7 @@ These class provide high-level APIs for the Workbench package, offering easy acc
 - DataSource: Manages AWS Data Catalog and Athena
 - FeatureSet: Manages AWS Feature Store and Feature Groups
 - Model: Manages the training and deployment of AWS Model Groups and Packages
+- MetaModel: A Model that aggregates predictions from multiple child endpoints
 - ModelType: Enum for the different model types supported by Workbench
 - Endpoint: Manages the deployment and invocations/inference on AWS Endpoints
 - Meta: Provides an API to retrieve AWS Metadata for the above classes
@@ -15,6 +16,7 @@ These class provide high-level APIs for the Workbench package, offering easy acc
 from .data_source import DataSource
 from .feature_set import FeatureSet
 from .model import Model, ModelType, ModelFramework
+from .meta_model import MetaModel
 from .endpoint import Endpoint
 from .meta import Meta
 from .parameter_store import ParameterStore
@@ -24,6 +26,7 @@ __all__ = [
     "DataSource",
     "FeatureSet",
     "Model",
+    "MetaModel",
     "ModelType",
     "ModelFramework",
     "Endpoint",

workbench/api/endpoint.py CHANGED Viewed

@@ -44,16 +44,21 @@ class Endpoint(EndpointCore):
         """
         return super().inference(eval_df, capture_name, id_column, drop_error_rows)
-    def auto_inference(self, capture: bool = False) -> pd.DataFrame:
-        """Run inference on the Endpoint using the FeatureSet evaluation data
+    def auto_inference(self) -> pd.DataFrame:
+        """Run inference on the Endpoint using the test data from the model training view
-        Args:
-            capture (bool): Capture the inference results
+        Returns:
+            pd.DataFrame: The DataFrame with predictions
+        """
+        return super().auto_inference()
+    def full_inference(self) -> pd.DataFrame:
+        """Run inference on the Endpoint using the full data from the model training view
         Returns:
             pd.DataFrame: The DataFrame with predictions
         """
-        return super().auto_inference(capture)
+        return super().full_inference()
     def fast_inference(self, eval_df: pd.DataFrame, threads: int = 4) -> pd.DataFrame:
         """Run inference on the Endpoint using the provided DataFrame

workbench/api/feature_set.py CHANGED Viewed

@@ -154,23 +154,93 @@ class FeatureSet(FeatureSetCore):
         # Return the Model
         return Model(name)
-    def prox_model(self, target: str, features: list) -> "Proximity":  # noqa: F821
-        """Create a local Proximity Model for this Model
+    def prox_model(
+        self, target: str, features: list, include_all_columns: bool = False
+    ) -> "FeatureSpaceProximity":  # noqa: F821
+        """Create a local FeatureSpaceProximity Model for this FeatureSet
         Args:
            target (str): The target column name
            features (list): The list of feature column names
+           include_all_columns (bool): Include all DataFrame columns in results (default: False)
         Returns:
-           Proximity: A local Proximity Model
+           FeatureSpaceProximity: A local FeatureSpaceProximity Model
         """
-        from workbench.algorithms.dataframe.proximity import Proximity  # noqa: F401 (avoid circular import)
+        from workbench.algorithms.dataframe.feature_space_proximity import FeatureSpaceProximity  # noqa: F401
         # Create the Proximity Model from the full FeatureSet dataframe
         full_df = self.pull_dataframe()
-        # Create and return the Proximity Model
-        return Proximity(full_df, self.id_column, features, target, track_columns=features)
+        # Create and return the FeatureSpaceProximity Model
+        return FeatureSpaceProximity(
+            full_df, id_column=self.id_column, features=features, target=target, include_all_columns=include_all_columns
+        )
+    def fp_prox_model(
+        self,
+        target: str,
+        fingerprint_column: str = None,
+        include_all_columns: bool = False,
+        radius: int = 2,
+        n_bits: int = 1024,
+        counts: bool = False,
+    ) -> "FingerprintProximity":  # noqa: F821
+        """Create a local FingerprintProximity Model for this FeatureSet
+        Args:
+           target (str): The target column name
+           fingerprint_column (str): Column containing fingerprints. If None, uses existing 'fingerprint'
+                                     column or computes from SMILES column.
+           include_all_columns (bool): Include all DataFrame columns in results (default: False)
+           radius (int): Radius for Morgan fingerprint computation (default: 2)
+           n_bits (int): Number of bits for fingerprint (default: 1024)
+           counts (bool): Whether to use count simulation (default: False)
+        Returns:
+           FingerprintProximity: A local FingerprintProximity Model
+        """
+        from workbench.algorithms.dataframe.fingerprint_proximity import FingerprintProximity  # noqa: F401
+        # Create the Proximity Model from the full FeatureSet dataframe
+        full_df = self.pull_dataframe()
+        # Create and return the FingerprintProximity Model
+        return FingerprintProximity(
+            full_df,
+            id_column=self.id_column,
+            fingerprint_column=fingerprint_column,
+            target=target,
+            include_all_columns=include_all_columns,
+            radius=radius,
+            n_bits=n_bits,
+            counts=counts,
+        )
+    def cleanlab_model(
+        self,
+        target: str,
+        features: list,
+        model_type: ModelType = ModelType.REGRESSOR,
+    ) -> "CleanLearning":  # noqa: F821
+        """Create a CleanLearning model for detecting label issues in this FeatureSet
+        Args:
+           target (str): The target column name
+           features (list): The list of feature column names
+           model_type (ModelType): The model type (REGRESSOR or CLASSIFIER). Defaults to REGRESSOR.
+        Returns:
+           CleanLearning: A fitted cleanlab model. Use get_label_issues() to get
+           a DataFrame with id_column, label_quality, predicted_label, given_label, is_label_issue.
+        """
+        from workbench.algorithms.models.cleanlab_model import create_cleanlab_model  # noqa: F401
+        # Get the full FeatureSet dataframe
+        full_df = self.pull_dataframe()
+        # Create and return the CleanLearning model
+        return create_cleanlab_model(full_df, self.id_column, features, target, model_type=model_type)
 if __name__ == "__main__":

workbench 0.8.213__py3-none-any.whl → 0.8.219__py3-none-any.whl

workbench 0.8.213py3-none-any.whl → 0.8.219py3-none-any.whl