PyPI - workbench - Versions diffs - 0.8.162__py3-none-any.whl → 0.8.202__py3-none-any.whl - Mend

workbench 0.8.162py3-none-any.whl → 0.8.202py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of workbench might be problematic. Click here for more details.

Files changed (113) hide show

workbench/algorithms/dataframe/__init__.py +1 -2
workbench/algorithms/dataframe/fingerprint_proximity.py +2 -2
workbench/algorithms/dataframe/proximity.py +261 -235
workbench/algorithms/graph/light/proximity_graph.py +10 -8
workbench/api/__init__.py +2 -1
workbench/api/compound.py +1 -1
workbench/api/endpoint.py +11 -0
workbench/api/feature_set.py +11 -8
workbench/api/meta.py +5 -2
workbench/api/model.py +16 -15
workbench/api/monitor.py +1 -16
workbench/core/artifacts/__init__.py +11 -2
workbench/core/artifacts/artifact.py +11 -3
workbench/core/artifacts/data_capture_core.py +355 -0
workbench/core/artifacts/endpoint_core.py +256 -118
workbench/core/artifacts/feature_set_core.py +265 -16
workbench/core/artifacts/model_core.py +107 -60
workbench/core/artifacts/monitor_core.py +33 -248
workbench/core/cloud_platform/aws/aws_account_clamp.py +50 -1
workbench/core/cloud_platform/aws/aws_meta.py +12 -5
workbench/core/cloud_platform/aws/aws_parameter_store.py +18 -2
workbench/core/cloud_platform/aws/aws_session.py +4 -4
workbench/core/transforms/data_to_features/light/molecular_descriptors.py +4 -4
workbench/core/transforms/features_to_model/features_to_model.py +42 -32
workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +36 -6
workbench/core/transforms/pandas_transforms/pandas_to_features.py +27 -0
workbench/core/views/training_view.py +113 -42
workbench/core/views/view.py +53 -3
workbench/core/views/view_utils.py +4 -4
workbench/model_scripts/chemprop/chemprop.template +852 -0
workbench/model_scripts/chemprop/generated_model_script.py +852 -0
workbench/model_scripts/chemprop/requirements.txt +11 -0
workbench/model_scripts/custom_models/chem_info/fingerprints.py +134 -0
workbench/model_scripts/custom_models/chem_info/mol_descriptors.py +483 -0
workbench/model_scripts/custom_models/chem_info/mol_standardize.py +450 -0
workbench/model_scripts/custom_models/chem_info/molecular_descriptors.py +7 -9
workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py +1 -1
workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +3 -5
workbench/model_scripts/custom_models/proximity/proximity.py +261 -235
workbench/model_scripts/custom_models/uq_models/bayesian_ridge.template +7 -8
workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +20 -21
workbench/model_scripts/custom_models/uq_models/gaussian_process.template +5 -11
workbench/model_scripts/custom_models/uq_models/meta_uq.template +166 -62
workbench/model_scripts/custom_models/uq_models/ngboost.template +30 -18
workbench/model_scripts/custom_models/uq_models/proximity.py +261 -235
workbench/model_scripts/custom_models/uq_models/requirements.txt +1 -3
workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +15 -17
workbench/model_scripts/pytorch_model/generated_model_script.py +373 -190
workbench/model_scripts/pytorch_model/pytorch.template +370 -187
workbench/model_scripts/scikit_learn/generated_model_script.py +7 -12
workbench/model_scripts/scikit_learn/scikit_learn.template +4 -9
workbench/model_scripts/script_generation.py +17 -9
workbench/model_scripts/uq_models/generated_model_script.py +605 -0
workbench/model_scripts/uq_models/mapie.template +605 -0
workbench/model_scripts/uq_models/requirements.txt +1 -0
workbench/model_scripts/xgb_model/generated_model_script.py +37 -46
workbench/model_scripts/xgb_model/xgb_model.template +44 -46
workbench/repl/workbench_shell.py +28 -14
workbench/scripts/endpoint_test.py +162 -0
workbench/scripts/lambda_test.py +73 -0
workbench/scripts/ml_pipeline_batch.py +137 -0
workbench/scripts/ml_pipeline_sqs.py +186 -0
workbench/scripts/monitor_cloud_watch.py +20 -100
workbench/utils/aws_utils.py +4 -3
workbench/utils/chem_utils/__init__.py +0 -0
workbench/utils/chem_utils/fingerprints.py +134 -0
workbench/utils/chem_utils/misc.py +194 -0
workbench/utils/chem_utils/mol_descriptors.py +483 -0
workbench/utils/chem_utils/mol_standardize.py +450 -0
workbench/utils/chem_utils/mol_tagging.py +348 -0
workbench/utils/chem_utils/projections.py +209 -0
workbench/utils/chem_utils/salts.py +256 -0
workbench/utils/chem_utils/sdf.py +292 -0
workbench/utils/chem_utils/toxicity.py +250 -0
workbench/utils/chem_utils/vis.py +253 -0
workbench/utils/chemprop_utils.py +760 -0
workbench/utils/cloudwatch_handler.py +1 -1
workbench/utils/cloudwatch_utils.py +137 -0
workbench/utils/config_manager.py +3 -7
workbench/utils/endpoint_utils.py +5 -7
workbench/utils/license_manager.py +2 -6
workbench/utils/model_utils.py +95 -34
workbench/utils/monitor_utils.py +44 -62
workbench/utils/pandas_utils.py +3 -3
workbench/utils/pytorch_utils.py +526 -0
workbench/utils/shap_utils.py +10 -2
workbench/utils/workbench_logging.py +0 -3
workbench/utils/workbench_sqs.py +1 -1
workbench/utils/xgboost_model_utils.py +371 -156
workbench/web_interface/components/model_plot.py +7 -1
workbench/web_interface/components/plugin_unit_test.py +5 -2
workbench/web_interface/components/plugins/dashboard_status.py +3 -1
workbench/web_interface/components/plugins/generated_compounds.py +1 -1
workbench/web_interface/components/plugins/model_details.py +9 -7
workbench/web_interface/components/plugins/scatter_plot.py +3 -3
{workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/METADATA +27 -6
{workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/RECORD +101 -85
{workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/entry_points.txt +4 -0
{workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/licenses/LICENSE +1 -1
workbench/model_scripts/custom_models/chem_info/local_utils.py +0 -769
workbench/model_scripts/custom_models/chem_info/tautomerize.py +0 -83
workbench/model_scripts/custom_models/proximity/generated_model_script.py +0 -138
workbench/model_scripts/custom_models/uq_models/generated_model_script.py +0 -393
workbench/model_scripts/custom_models/uq_models/mapie_xgb.template +0 -203
workbench/model_scripts/ensemble_xgb/generated_model_script.py +0 -279
workbench/model_scripts/quant_regression/quant_regression.template +0 -279
workbench/model_scripts/quant_regression/requirements.txt +0 -1
workbench/utils/chem_utils.py +0 -1556
workbench/utils/execution_environment.py +0 -211
workbench/utils/fast_inference.py +0 -167
workbench/utils/resource_utils.py +0 -39
{workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/WHEEL +0 -0
{workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/top_level.txt +0 -0

workbench/algorithms/graph/light/proximity_graph.py CHANGED Viewed

@@ -4,7 +4,7 @@ from typing import Union
 import logging
 # Workbench Imports
-from workbench.algorithms.dataframe import Proximity, ProximityType
+from workbench.algorithms.dataframe import Proximity
 from workbench.api.graph_store import GraphStore
 # Set up logging
@@ -50,12 +50,13 @@ class ProximityGraph:
         self._nx_graph.add_nodes_from(node_df.set_index(id_column, drop=False).to_dict("index").items())
         # Determine edge weights based on proximity type
-        if prox.proximity_type == ProximityType.SIMILARITY:
-            all_neighbors_df["weight"] = all_neighbors_df["similarity"]
-        elif prox.proximity_type == ProximityType.DISTANCE:
-            # Normalize and invert distance
-            max_distance = all_neighbors_df["distance"].max()
-            all_neighbors_df["weight"] = 1.0 - all_neighbors_df["distance"] / max_distance
+        # if prox.proximity_type == ProximityType.SIMILARITY:
+        #     all_neighbors_df["weight"] = all_neighbors_df["similarity"]
+        # elif prox.proximity_type == ProximityType.DISTANCE:
+        # Normalize and invert distance
+        max_distance = all_neighbors_df["distance"].max()
+        all_neighbors_df["weight"] = 1.0 - all_neighbors_df["distance"] / max_distance
         # Add edges to the graph
         log.info("Adding edges to the graph...")
@@ -135,7 +136,8 @@ if __name__ == "__main__":
     from workbench.algorithms.dataframe.fingerprint_proximity import FingerprintProximity
     from workbench.web_interface.components.plugins.graph_plot import GraphPlot
     from workbench.api import DFStore
-    from workbench.utils.chem_utils import compute_morgan_fingerprints, project_fingerprints
+    from workbench.utils.chem_utils.fingerprints import compute_morgan_fingerprints
+    from workbench.utils.chem_utils.projections import project_fingerprints
     from workbench.utils.graph_utils import connected_sample, graph_layout
     def show_graph(graph, id_column):

workbench/api/__init__.py CHANGED Viewed

@@ -14,7 +14,7 @@ These class provide high-level APIs for the Workbench package, offering easy acc
 from .data_source import DataSource
 from .feature_set import FeatureSet
-from .model import Model, ModelType
+from .model import Model, ModelType, ModelFramework
 from .endpoint import Endpoint
 from .meta import Meta
 from .parameter_store import ParameterStore
@@ -25,6 +25,7 @@ __all__ = [
     "FeatureSet",
     "Model",
     "ModelType",
+    "ModelFramework",
     "Endpoint",
     "Meta",
     "ParameterStore",

workbench/api/compound.py CHANGED Viewed

@@ -3,7 +3,7 @@ import logging
 from typing import List
 # Workbench Imports
-from workbench.utils.chem_utils import svg_from_smiles
+from workbench.utils.chem_utils.vis import svg_from_smiles
 @dataclass

workbench/api/endpoint.py CHANGED Viewed

@@ -70,6 +70,17 @@ class Endpoint(EndpointCore):
         """
         return super().fast_inference(eval_df, threads=threads)
+    def cross_fold_inference(self, nfolds: int = 5) -> pd.DataFrame:
+        """Run cross-fold inference (only works for XGBoost models)
+        Args:
+            nfolds (int): The number of folds to use for cross-validation (default: 5)
+        Returns:
+            pd.DataFrame: A DataFrame with cross fold predictions
+        """
+        return super().cross_fold_inference(nfolds)
 if __name__ == "__main__":
     """Exercise the Endpoint Class"""

workbench/api/feature_set.py CHANGED Viewed

@@ -12,7 +12,7 @@ import pandas as pd
 from workbench.core.artifacts.artifact import Artifact
 from workbench.core.artifacts.feature_set_core import FeatureSetCore
 from workbench.core.transforms.features_to_model.features_to_model import FeaturesToModel
-from workbench.api.model import Model, ModelType
+from workbench.api.model import Model, ModelType, ModelFramework
 class FeatureSet(FeatureSetCore):
@@ -79,6 +79,7 @@ class FeatureSet(FeatureSetCore):
         self,
         name: str,
         model_type: ModelType,
+        model_framework: ModelFramework = ModelFramework.XGBOOST,
         tags: list = None,
         description: str = None,
         feature_list: list = None,
@@ -87,8 +88,8 @@ class FeatureSet(FeatureSetCore):
         model_import_str: str = None,
         custom_script: Union[str, Path] = None,
         custom_args: dict = None,
-        training_image: str = "xgb_training",
-        inference_image: str = "xgb_inference",
+        training_image: str = "training",
+        inference_image: str = "inference",
         inference_arch: str = "x86_64",
         **kwargs,
     ) -> Union[Model, None]:
@@ -98,15 +99,16 @@ class FeatureSet(FeatureSetCore):
             name (str): The name of the Model to create
             model_type (ModelType): The type of model to create (See workbench.model.ModelType)
+            model_framework (ModelFramework, optional): The framework to use for the model (default: XGBOOST)
             tags (list, optional): Set the tags for the model.  If not given tags will be generated.
             description (str, optional): Set the description for the model. If not give a description is generated.
             feature_list (list, optional): Set the feature list for the model. If not given a feature list is generated.
             target_column (str, optional): The target column for the model (use None for unsupervised model)
-            model_class (str, optional): Model class to use (e.g. "KMeans", "PyTorch", default: None)
+            model_class (str, optional): Model class to use (e.g. "KMeans", default: None)
             model_import_str (str, optional): The import for the model (e.g. "from sklearn.cluster import KMeans")
             custom_script (str, optional): The custom script to use for the model (default: None)
-            training_image (str, optional): The training image to use (default: "xgb_training")
-            inference_image (str, optional): The inference image to use (default: "xgb_inference")
+            training_image (str, optional): The training image to use (default: "training")
+            inference_image (str, optional): The inference image to use (default: "inference")
             inference_arch (str, optional): The architecture to use for inference (default: "x86_64")
             kwargs (dict, optional): Additional keyword arguments to pass to the model
@@ -128,8 +130,8 @@ class FeatureSet(FeatureSetCore):
         # Create the Model Tags
         tags = [name] if tags is None else tags
-        # If the model_class is PyTorch, ensure we set the training and inference images
-        if model_class and model_class.lower() == "pytorch":
+        # If the model framework is PyTorch or ChemProp, ensure we set the training and inference images
+        if model_framework in (ModelFramework.PYTORCH_TABULAR, ModelFramework.CHEMPROP):
             training_image = "pytorch_training"
             inference_image = "pytorch_inference"
@@ -138,6 +140,7 @@ class FeatureSet(FeatureSetCore):
             feature_name=self.name,
             model_name=name,
             model_type=model_type,
+            model_framework=model_framework,
             model_class=model_class,
             model_import_str=model_import_str,
             custom_script=custom_script,

workbench/api/meta.py CHANGED Viewed

@@ -113,13 +113,16 @@ class Meta(CloudMeta):
         """
         return super().models(details=details)
-    def endpoints(self) -> pd.DataFrame:
+    def endpoints(self, details: bool = False) -> pd.DataFrame:
         """Get a summary of the Endpoints deployed in the Cloud Platform
+        Args:
+            details (bool, optional): Include detailed information. Defaults to False.
         Returns:
             pd.DataFrame: A summary of the Endpoints in the Cloud Platform
         """
-        return super().endpoints()
+        return super().endpoints(details=details)
     def pipelines(self) -> pd.DataFrame:
         """Get a summary of the ML Pipelines deployed in the Cloud Platform

workbench/api/model.py CHANGED Viewed

@@ -7,10 +7,10 @@ Dashboard UI, which provides additional model details and performance metrics
 # Workbench Imports
 from workbench.core.artifacts.artifact import Artifact
-from workbench.core.artifacts.model_core import ModelCore, ModelType  # noqa: F401
+from workbench.core.artifacts.model_core import ModelCore, ModelType, ModelFramework  # noqa: F401
 from workbench.core.transforms.model_to_endpoint.model_to_endpoint import ModelToEndpoint
 from workbench.api.endpoint import Endpoint
-from workbench.utils.model_utils import proximity_model, uq_model
+from workbench.utils.model_utils import proximity_model_local, uq_model
 class Model(ModelCore):
@@ -40,6 +40,7 @@ class Model(ModelCore):
         mem_size: int = 2048,
         max_concurrency: int = 5,
         instance: str = "ml.t2.medium",
+        data_capture: bool = False,
     ) -> Endpoint:
         """Create an Endpoint from the Model.
@@ -50,6 +51,7 @@ class Model(ModelCore):
             mem_size (int): The memory size for the Endpoint in MB (default: 2048)
             max_concurrency (int): The maximum concurrency for the Endpoint (default: 5)
             instance (str): The instance type to use for Realtime(serverless=False) Endpoints (default: "ml.t2.medium")
+            data_capture (bool): Enable data capture for the Endpoint (default: False)
         Returns:
             Endpoint: The Endpoint created from the Model
@@ -73,6 +75,7 @@ class Model(ModelCore):
         model_to_endpoint.transform(
             mem_size=mem_size,
             max_concurrency=max_concurrency,
+            data_capture=data_capture,
         )
         # Set the Endpoint Owner and Return the Endpoint
@@ -80,19 +83,13 @@ class Model(ModelCore):
         end.set_owner(self.get_owner())
         return end
-    def prox_model(self, prox_model_name: str = None, track_columns: list = None) -> "Model":
-        """Create a Proximity Model for this Model
-        Args:
-            prox_model_name (str, optional): Name of the Proximity Model (if not specified, a name will be generated)
-            track_columns (list, optional): List of columns to track in the Proximity Model.
+    def prox_model(self):
+        """Create a local Proximity Model for this Model
         Returns:
-            Model: The Proximity Model
+           Proximity: A local Proximity Model
         """
-        if prox_model_name is None:
-            prox_model_name = self.model_name + "-prox"
-        return proximity_model(self, prox_model_name, track_columns=track_columns)
+        return proximity_model_local(self)
     def uq_model(self, uq_model_name: str = None, train_all_data: bool = False) -> "Model":
         """Create a Uncertainty Quantification Model for this Model
@@ -118,6 +115,10 @@ if __name__ == "__main__":
     pprint(my_model.summary())
     pprint(my_model.details())
-    # Create an Endpoint from the Model
-    my_endpoint = my_model.to_endpoint()
-    pprint(my_endpoint.summary())
+    # Create an Endpoint from the Model (commented out for now)
+    # my_endpoint = my_model.to_endpoint()
+    # pprint(my_endpoint.summary())
+    # Create a local Proximity Model for this Model
+    prox_model = my_model.prox_model()
+    print(prox_model.neighbors(3398))

workbench/api/monitor.py CHANGED Viewed

@@ -15,7 +15,7 @@ class Monitor(MonitorCore):
     Common Usage:
        ```
-       mon = Endpoint(name).get_monitor()  # Pull from endpoint OR
+       mon = Endpoint(name).monitor()  # Pull from endpoint OR
        mon = Monitor(name)                 # Create using Endpoint Name
        mon.summary()
        mon.details()
@@ -29,7 +29,6 @@ class Monitor(MonitorCore):
        baseline_df = mon.get_baseline()
        constraints_df = mon.get_constraints()
        stats_df = mon.get_statistics()
-       input_df, output_df = mon.get_captured_data()
        ```
     """
@@ -81,15 +80,6 @@ class Monitor(MonitorCore):
         """
         super().create_monitoring_schedule(schedule)
-    def get_captured_data(self) -> (pd.DataFrame, pd.DataFrame):
-        """
-        Get the latest data capture input and output from S3.
-        Returns:
-            DataFrame (input), DataFrame(output): Flattened and processed DataFrames for input and output data.
-        """
-        return super().get_captured_data()
     def get_baseline(self) -> Union[pd.DataFrame, None]:
         """Code to get the baseline CSV from the S3 baseline directory
@@ -155,8 +145,3 @@ if __name__ == "__main__":
     print("\nStatistics...")
     print(mm.get_statistics())
-    # Get the latest data capture
-    input_df, output_df = mm.get_captured_data()
-    print(input_df.head())
-    print(output_df.head())

workbench/core/artifacts/__init__.py CHANGED Viewed

@@ -15,7 +15,16 @@ from .artifact import Artifact
 from .athena_source import AthenaSource
 from .data_source_abstract import DataSourceAbstract
 from .feature_set_core import FeatureSetCore
-from .model_core import ModelCore, ModelType
+from .model_core import ModelCore, ModelType, ModelFramework
 from .endpoint_core import EndpointCore
-__all__ = ["Artifact", "AthenaSource", "DataSourceAbstract", "FeatureSetCore", "ModelCore", "ModelType", "EndpointCore"]
+__all__ = [
+    "Artifact",
+    "AthenaSource",
+    "DataSourceAbstract",
+    "FeatureSetCore",
+    "ModelCore",
+    "ModelType",
+    "ModelFramework",
+    "EndpointCore",
+]

workbench/core/artifacts/artifact.py CHANGED Viewed

@@ -236,6 +236,12 @@ class Artifact(ABC):
             This functionality will work for FeatureSets, Models, and Endpoints
             but not for DataSources. The DataSource class overrides this method.
         """
+        # Check for ReadOnly Role
+        if self.aws_account_clamp.read_only:
+            self.log.info("Cannot add metadata with a ReadOnly Permissions...")
+            return
         # Sanity check
         aws_arn = self.arn()
         if aws_arn is None:
@@ -444,10 +450,12 @@ class Artifact(ABC):
 if __name__ == "__main__":
     """Exercise the Artifact Class"""
-    from workbench.api.data_source import DataSource
-    from workbench.api.feature_set import FeatureSet
+    from workbench.api import DataSource, FeatureSet, Endpoint
+    # Grab an Endpoint (which is a subclass of Artifact)
+    end = Endpoint("wine-classification")
-    # Create a DataSource (which is a subclass of Artifact)
+    # Grab a DataSource (which is a subclass of Artifact)
     data_source = DataSource("test_data")
     # Just some random tests

workbench 0.8.162__py3-none-any.whl → 0.8.202__py3-none-any.whl

Potentially problematic release.

workbench 0.8.162py3-none-any.whl → 0.8.202py3-none-any.whl