workbench 0.8.197__py3-none-any.whl → 0.8.201__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. workbench/algorithms/dataframe/proximity.py +19 -12
  2. workbench/api/__init__.py +2 -1
  3. workbench/api/feature_set.py +7 -4
  4. workbench/api/model.py +1 -1
  5. workbench/core/artifacts/__init__.py +11 -2
  6. workbench/core/artifacts/endpoint_core.py +84 -46
  7. workbench/core/artifacts/feature_set_core.py +69 -1
  8. workbench/core/artifacts/model_core.py +37 -7
  9. workbench/core/cloud_platform/aws/aws_parameter_store.py +18 -2
  10. workbench/core/transforms/features_to_model/features_to_model.py +23 -20
  11. workbench/core/views/view.py +2 -2
  12. workbench/model_scripts/chemprop/chemprop.template +931 -0
  13. workbench/model_scripts/chemprop/generated_model_script.py +931 -0
  14. workbench/model_scripts/chemprop/requirements.txt +11 -0
  15. workbench/model_scripts/custom_models/chem_info/fingerprints.py +134 -0
  16. workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py +1 -1
  17. workbench/model_scripts/custom_models/proximity/proximity.py +19 -12
  18. workbench/model_scripts/custom_models/uq_models/proximity.py +19 -12
  19. workbench/model_scripts/pytorch_model/generated_model_script.py +130 -88
  20. workbench/model_scripts/pytorch_model/pytorch.template +128 -86
  21. workbench/model_scripts/scikit_learn/generated_model_script.py +302 -0
  22. workbench/model_scripts/script_generation.py +10 -7
  23. workbench/model_scripts/uq_models/generated_model_script.py +25 -18
  24. workbench/model_scripts/uq_models/mapie.template +23 -16
  25. workbench/model_scripts/xgb_model/generated_model_script.py +6 -6
  26. workbench/model_scripts/xgb_model/xgb_model.template +2 -2
  27. workbench/repl/workbench_shell.py +14 -5
  28. workbench/scripts/endpoint_test.py +162 -0
  29. workbench/scripts/{lambda_launcher.py → lambda_test.py} +10 -0
  30. workbench/utils/chemprop_utils.py +724 -0
  31. workbench/utils/pytorch_utils.py +497 -0
  32. workbench/utils/xgboost_model_utils.py +12 -5
  33. {workbench-0.8.197.dist-info → workbench-0.8.201.dist-info}/METADATA +2 -2
  34. {workbench-0.8.197.dist-info → workbench-0.8.201.dist-info}/RECORD +38 -30
  35. {workbench-0.8.197.dist-info → workbench-0.8.201.dist-info}/entry_points.txt +2 -1
  36. {workbench-0.8.197.dist-info → workbench-0.8.201.dist-info}/WHEEL +0 -0
  37. {workbench-0.8.197.dist-info → workbench-0.8.201.dist-info}/licenses/LICENSE +0 -0
  38. {workbench-0.8.197.dist-info → workbench-0.8.201.dist-info}/top_level.txt +0 -0
@@ -9,7 +9,7 @@ import time
9
9
  # Local Imports
10
10
  from workbench.core.transforms.transform import Transform, TransformInput, TransformOutput
11
11
  from workbench.core.artifacts.feature_set_core import FeatureSetCore
12
- from workbench.core.artifacts.model_core import ModelCore, ModelType, ModelImages
12
+ from workbench.core.artifacts.model_core import ModelCore, ModelType, ModelFramework, ModelImages
13
13
  from workbench.core.artifacts.artifact import Artifact
14
14
  from workbench.model_scripts.script_generation import generate_model_script, fill_template
15
15
  from workbench.utils.model_utils import supported_instance_types
@@ -33,6 +33,7 @@ class FeaturesToModel(Transform):
33
33
  feature_name: str,
34
34
  model_name: str,
35
35
  model_type: ModelType,
36
+ model_framework=ModelFramework.XGBOOST,
36
37
  model_class=None,
37
38
  model_import_str=None,
38
39
  custom_script=None,
@@ -46,6 +47,7 @@ class FeaturesToModel(Transform):
46
47
  feature_name (str): Name of the FeatureSet to use as input
47
48
  model_name (str): Name of the Model to create as output
48
49
  model_type (ModelType): ModelType.REGRESSOR or ModelType.CLASSIFIER, etc.
50
+ model_framework (ModelFramework, optional): The model framework (default ModelFramework.XGBOOST)
49
51
  model_class (str, optional): The scikit model (e.g. KNeighborsRegressor) (default None)
50
52
  model_import_str (str, optional): The import string for the model (default None)
51
53
  custom_script (str, optional): Custom script to use for the model (default None)
@@ -65,6 +67,7 @@ class FeaturesToModel(Transform):
65
67
  self.input_type = TransformInput.FEATURE_SET
66
68
  self.output_type = TransformOutput.MODEL
67
69
  self.model_type = model_type
70
+ self.model_framework = model_framework
68
71
  self.model_class = model_class
69
72
  self.model_import_str = model_import_str
70
73
  self.custom_script = str(custom_script) if custom_script else None
@@ -157,6 +160,7 @@ class FeaturesToModel(Transform):
157
160
  template_params = {
158
161
  "model_imports": self.model_import_str,
159
162
  "model_type": self.model_type,
163
+ "model_framework": self.model_framework,
160
164
  "model_class": self.model_class,
161
165
  "target_column": self.target_column,
162
166
  "feature_list": self.model_feature_list,
@@ -234,12 +238,20 @@ class FeaturesToModel(Transform):
234
238
 
235
239
  # Create a Sagemaker Model with our script
236
240
  image = ModelImages.get_image_uri(self.sm_session.boto_region_name, self.training_image)
241
+
242
+ # Use GPU instance for ChemProp/PyTorch, CPU for others
243
+ if self.model_framework in [ModelFramework.CHEMPROP, ModelFramework.PYTORCH_TABULAR]:
244
+ train_instance_type = "ml.g6.xlarge" # NVIDIA L4 GPU, ~$0.80/hr
245
+ self.log.important(f"Using GPU instance {train_instance_type} for {self.model_framework.value}")
246
+ else:
247
+ train_instance_type = "ml.m5.xlarge"
248
+
237
249
  self.estimator = Estimator(
238
250
  entry_point=entry_point,
239
251
  source_dir=source_dir,
240
252
  role=self.workbench_role_arn,
241
253
  instance_count=1,
242
- instance_type="ml.m5.xlarge",
254
+ instance_type=train_instance_type,
243
255
  sagemaker_session=self.sm_session,
244
256
  image_uri=image,
245
257
  metric_definitions=metric_definitions,
@@ -274,8 +286,10 @@ class FeaturesToModel(Transform):
274
286
  self.log.info("Post-Transform: Calling onboard() on the Model...")
275
287
  time.sleep(3) # Give AWS time to complete Model register
276
288
 
277
- # Store the model feature_list and target_column in the workbench_meta
278
- output_model = ModelCore(self.output_name, model_type=self.model_type)
289
+ # Store the model metadata information
290
+ output_model = ModelCore(self.output_name)
291
+ output_model._set_model_type(self.model_type)
292
+ output_model._set_model_framework(self.model_framework)
279
293
  output_model.upsert_workbench_meta({"workbench_model_features": self.model_feature_list})
280
294
  output_model.upsert_workbench_meta({"workbench_model_target": self.target_column})
281
295
 
@@ -330,12 +344,11 @@ if __name__ == "__main__":
330
344
 
331
345
  # Regression Model
332
346
  input_name = "abalone_features"
333
- output_name = "test-abalone-regression"
347
+ output_name = "abalone-regression"
334
348
  to_model = FeaturesToModel(input_name, output_name, model_type=ModelType.REGRESSOR)
335
349
  to_model.set_output_tags(["test"])
336
350
  to_model.transform(target_column="class_number_of_rings", description="Test Abalone Regression")
337
351
 
338
- """
339
352
  # Classification Model
340
353
  input_name = "wine_features"
341
354
  output_name = "wine-classification"
@@ -345,10 +358,10 @@ if __name__ == "__main__":
345
358
 
346
359
  # Quantile Regression Model (Abalone)
347
360
  input_name = "abalone_features"
348
- output_name = "abalone-quantile-reg"
361
+ output_name = "abalone-regression-uq"
349
362
  to_model = FeaturesToModel(input_name, output_name, ModelType.UQ_REGRESSOR)
350
- to_model.set_output_tags(["abalone", "quantiles"])
351
- to_model.transform(target_column="class_number_of_rings", description="Abalone Quantile Regression")
363
+ to_model.set_output_tags(["abalone", "uq"])
364
+ to_model.transform(target_column="class_number_of_rings", description="Abalone UQ Regression")
352
365
 
353
366
  # Scikit-Learn Kmeans Clustering Model
354
367
  input_name = "wine_features"
@@ -402,7 +415,7 @@ if __name__ == "__main__":
402
415
  scripts_root = Path(__file__).resolve().parents[3] / "model_scripts"
403
416
  my_script = scripts_root / "custom_models" / "chem_info" / "molecular_descriptors.py"
404
417
  input_name = "aqsol_features"
405
- output_name = "smiles-to-taut-md-stereo-v0"
418
+ output_name = "test-smiles-to-taut-md-stereo"
406
419
  to_model = FeaturesToModel(input_name, output_name, model_type=ModelType.TRANSFORMER, custom_script=my_script)
407
420
  to_model.set_output_tags(["smiles", "molecular descriptors"])
408
421
  to_model.transform(target_column=None, feature_list=["smiles"], description="Smiles to Molecular Descriptors")
@@ -415,13 +428,3 @@ if __name__ == "__main__":
415
428
  to_model = FeaturesToModel(input_name, output_name, model_type=ModelType.TRANSFORMER, custom_script=my_script)
416
429
  to_model.set_output_tags(["smiles", "morgan fingerprints"])
417
430
  to_model.transform(target_column=None, feature_list=["smiles"], description="Smiles to Morgan Fingerprints")
418
-
419
- # Tautomerization Model
420
- scripts_root = Path(__file__).resolve().parents[3] / "model_scripts"
421
- my_script = scripts_root / "custom_models" / "chem_info" / "tautomerize.py"
422
- input_name = "aqsol_features"
423
- output_name = "tautomerize-v0"
424
- to_model = FeaturesToModel(input_name, output_name, model_type=ModelType.TRANSFORMER, custom_script=my_script)
425
- to_model.set_output_tags(["smiles", "tautomerization"])
426
- to_model.transform(target_column=None, feature_list=["smiles"], description="Tautomerize Smiles")
427
- """
@@ -91,11 +91,11 @@ class View:
91
91
  self.table, self.data_source.database, self.data_source.boto3_session
92
92
  )
93
93
 
94
- def pull_dataframe(self, limit: int = 50000) -> Union[pd.DataFrame, None]:
94
+ def pull_dataframe(self, limit: int = 100000) -> Union[pd.DataFrame, None]:
95
95
  """Pull a DataFrame based on the view type
96
96
 
97
97
  Args:
98
- limit (int): The maximum number of rows to pull (default: 50000)
98
+ limit (int): The maximum number of rows to pull (default: 100000)
99
99
 
100
100
  Returns:
101
101
  Union[pd.DataFrame, None]: The DataFrame for the view or None if it doesn't exist