PyPI - workbench - Versions diffs - 0.8.197__py3-none-any.whl → 0.8.201__py3-none-any.whl - Mend

workbench 0.8.197py3-none-any.whl → 0.8.201py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

workbench/algorithms/dataframe/proximity.py +19 -12
workbench/api/__init__.py +2 -1
workbench/api/feature_set.py +7 -4
workbench/api/model.py +1 -1
workbench/core/artifacts/__init__.py +11 -2
workbench/core/artifacts/endpoint_core.py +84 -46
workbench/core/artifacts/feature_set_core.py +69 -1
workbench/core/artifacts/model_core.py +37 -7
workbench/core/cloud_platform/aws/aws_parameter_store.py +18 -2
workbench/core/transforms/features_to_model/features_to_model.py +23 -20
workbench/core/views/view.py +2 -2
workbench/model_scripts/chemprop/chemprop.template +931 -0
workbench/model_scripts/chemprop/generated_model_script.py +931 -0
workbench/model_scripts/chemprop/requirements.txt +11 -0
workbench/model_scripts/custom_models/chem_info/fingerprints.py +134 -0
workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py +1 -1
workbench/model_scripts/custom_models/proximity/proximity.py +19 -12
workbench/model_scripts/custom_models/uq_models/proximity.py +19 -12
workbench/model_scripts/pytorch_model/generated_model_script.py +130 -88
workbench/model_scripts/pytorch_model/pytorch.template +128 -86
workbench/model_scripts/scikit_learn/generated_model_script.py +302 -0
workbench/model_scripts/script_generation.py +10 -7
workbench/model_scripts/uq_models/generated_model_script.py +25 -18
workbench/model_scripts/uq_models/mapie.template +23 -16
workbench/model_scripts/xgb_model/generated_model_script.py +6 -6
workbench/model_scripts/xgb_model/xgb_model.template +2 -2
workbench/repl/workbench_shell.py +14 -5
workbench/scripts/endpoint_test.py +162 -0
workbench/scripts/{lambda_launcher.py → lambda_test.py} +10 -0
workbench/utils/chemprop_utils.py +724 -0
workbench/utils/pytorch_utils.py +497 -0
workbench/utils/xgboost_model_utils.py +12 -5
{workbench-0.8.197.dist-info → workbench-0.8.201.dist-info}/METADATA +2 -2
{workbench-0.8.197.dist-info → workbench-0.8.201.dist-info}/RECORD +38 -30
{workbench-0.8.197.dist-info → workbench-0.8.201.dist-info}/entry_points.txt +2 -1
{workbench-0.8.197.dist-info → workbench-0.8.201.dist-info}/WHEEL +0 -0
{workbench-0.8.197.dist-info → workbench-0.8.201.dist-info}/licenses/LICENSE +0 -0
{workbench-0.8.197.dist-info → workbench-0.8.201.dist-info}/top_level.txt +0 -0

workbench/core/transforms/features_to_model/features_to_model.py CHANGED Viewed

@@ -9,7 +9,7 @@ import time
 # Local Imports
 from workbench.core.transforms.transform import Transform, TransformInput, TransformOutput
 from workbench.core.artifacts.feature_set_core import FeatureSetCore
-from workbench.core.artifacts.model_core import ModelCore, ModelType, ModelImages
+from workbench.core.artifacts.model_core import ModelCore, ModelType, ModelFramework, ModelImages
 from workbench.core.artifacts.artifact import Artifact
 from workbench.model_scripts.script_generation import generate_model_script, fill_template
 from workbench.utils.model_utils import supported_instance_types
@@ -33,6 +33,7 @@ class FeaturesToModel(Transform):
         feature_name: str,
         model_name: str,
         model_type: ModelType,
+        model_framework=ModelFramework.XGBOOST,
         model_class=None,
         model_import_str=None,
         custom_script=None,
@@ -46,6 +47,7 @@ class FeaturesToModel(Transform):
             feature_name (str): Name of the FeatureSet to use as input
             model_name (str): Name of the Model to create as output
             model_type (ModelType): ModelType.REGRESSOR or ModelType.CLASSIFIER, etc.
+            model_framework (ModelFramework, optional): The model framework (default ModelFramework.XGBOOST)
             model_class (str, optional): The scikit model (e.g. KNeighborsRegressor) (default None)
             model_import_str (str, optional): The import string for the model (default None)
             custom_script (str, optional): Custom script to use for the model (default None)
@@ -65,6 +67,7 @@ class FeaturesToModel(Transform):
         self.input_type = TransformInput.FEATURE_SET
         self.output_type = TransformOutput.MODEL
         self.model_type = model_type
+        self.model_framework = model_framework
         self.model_class = model_class
         self.model_import_str = model_import_str
         self.custom_script = str(custom_script) if custom_script else None
@@ -157,6 +160,7 @@ class FeaturesToModel(Transform):
         template_params = {
             "model_imports": self.model_import_str,
             "model_type": self.model_type,
+            "model_framework": self.model_framework,
             "model_class": self.model_class,
             "target_column": self.target_column,
             "feature_list": self.model_feature_list,
@@ -234,12 +238,20 @@ class FeaturesToModel(Transform):
         # Create a Sagemaker Model with our script
         image = ModelImages.get_image_uri(self.sm_session.boto_region_name, self.training_image)
+        # Use GPU instance for ChemProp/PyTorch, CPU for others
+        if self.model_framework in [ModelFramework.CHEMPROP, ModelFramework.PYTORCH_TABULAR]:
+            train_instance_type = "ml.g6.xlarge"  # NVIDIA L4 GPU, ~$0.80/hr
+            self.log.important(f"Using GPU instance {train_instance_type} for {self.model_framework.value}")
+        else:
+            train_instance_type = "ml.m5.xlarge"
         self.estimator = Estimator(
             entry_point=entry_point,
             source_dir=source_dir,
             role=self.workbench_role_arn,
             instance_count=1,
-            instance_type="ml.m5.xlarge",
+            instance_type=train_instance_type,
             sagemaker_session=self.sm_session,
             image_uri=image,
             metric_definitions=metric_definitions,
@@ -274,8 +286,10 @@ class FeaturesToModel(Transform):
         self.log.info("Post-Transform: Calling onboard() on the Model...")
         time.sleep(3)  # Give AWS time to complete Model register
-        # Store the model feature_list and target_column in the workbench_meta
-        output_model = ModelCore(self.output_name, model_type=self.model_type)
+        # Store the model metadata information
+        output_model = ModelCore(self.output_name)
+        output_model._set_model_type(self.model_type)
+        output_model._set_model_framework(self.model_framework)
         output_model.upsert_workbench_meta({"workbench_model_features": self.model_feature_list})
         output_model.upsert_workbench_meta({"workbench_model_target": self.target_column})
@@ -330,12 +344,11 @@ if __name__ == "__main__":
     # Regression Model
     input_name = "abalone_features"
-    output_name = "test-abalone-regression"
+    output_name = "abalone-regression"
     to_model = FeaturesToModel(input_name, output_name, model_type=ModelType.REGRESSOR)
     to_model.set_output_tags(["test"])
     to_model.transform(target_column="class_number_of_rings", description="Test Abalone Regression")
-    """
     # Classification Model
     input_name = "wine_features"
     output_name = "wine-classification"
@@ -345,10 +358,10 @@ if __name__ == "__main__":
     # Quantile Regression Model (Abalone)
     input_name = "abalone_features"
-    output_name = "abalone-quantile-reg"
+    output_name = "abalone-regression-uq"
     to_model = FeaturesToModel(input_name, output_name, ModelType.UQ_REGRESSOR)
-    to_model.set_output_tags(["abalone", "quantiles"])
-    to_model.transform(target_column="class_number_of_rings", description="Abalone Quantile Regression")
+    to_model.set_output_tags(["abalone", "uq"])
+    to_model.transform(target_column="class_number_of_rings", description="Abalone UQ Regression")
     # Scikit-Learn Kmeans Clustering Model
     input_name = "wine_features"
@@ -402,7 +415,7 @@ if __name__ == "__main__":
     scripts_root = Path(__file__).resolve().parents[3] / "model_scripts"
     my_script = scripts_root / "custom_models" / "chem_info" / "molecular_descriptors.py"
     input_name = "aqsol_features"
-    output_name = "smiles-to-taut-md-stereo-v0"
+    output_name = "test-smiles-to-taut-md-stereo"
     to_model = FeaturesToModel(input_name, output_name, model_type=ModelType.TRANSFORMER, custom_script=my_script)
     to_model.set_output_tags(["smiles", "molecular descriptors"])
     to_model.transform(target_column=None, feature_list=["smiles"], description="Smiles to Molecular Descriptors")
@@ -415,13 +428,3 @@ if __name__ == "__main__":
     to_model = FeaturesToModel(input_name, output_name, model_type=ModelType.TRANSFORMER, custom_script=my_script)
     to_model.set_output_tags(["smiles", "morgan fingerprints"])
     to_model.transform(target_column=None, feature_list=["smiles"], description="Smiles to Morgan Fingerprints")
-    # Tautomerization Model
-    scripts_root = Path(__file__).resolve().parents[3] / "model_scripts"
-    my_script = scripts_root / "custom_models" / "chem_info" / "tautomerize.py"
-    input_name = "aqsol_features"
-    output_name = "tautomerize-v0"
-    to_model = FeaturesToModel(input_name, output_name, model_type=ModelType.TRANSFORMER, custom_script=my_script)
-    to_model.set_output_tags(["smiles", "tautomerization"])
-    to_model.transform(target_column=None, feature_list=["smiles"], description="Tautomerize Smiles")
-    """

workbench/core/views/view.py CHANGED Viewed

@@ -91,11 +91,11 @@ class View:
             self.table, self.data_source.database, self.data_source.boto3_session
         )
-    def pull_dataframe(self, limit: int = 50000) -> Union[pd.DataFrame, None]:
+    def pull_dataframe(self, limit: int = 100000) -> Union[pd.DataFrame, None]:
         """Pull a DataFrame based on the view type
         Args:
-            limit (int): The maximum number of rows to pull (default: 50000)
+            limit (int): The maximum number of rows to pull (default: 100000)
         Returns:
             Union[pd.DataFrame, None]: The DataFrame for the view or None if it doesn't exist

workbench 0.8.197__py3-none-any.whl → 0.8.201__py3-none-any.whl

workbench 0.8.197py3-none-any.whl → 0.8.201py3-none-any.whl