workbench 0.8.162__py3-none-any.whl → 0.8.202__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of workbench might be problematic. Click here for more details.

Files changed (113) hide show
  1. workbench/algorithms/dataframe/__init__.py +1 -2
  2. workbench/algorithms/dataframe/fingerprint_proximity.py +2 -2
  3. workbench/algorithms/dataframe/proximity.py +261 -235
  4. workbench/algorithms/graph/light/proximity_graph.py +10 -8
  5. workbench/api/__init__.py +2 -1
  6. workbench/api/compound.py +1 -1
  7. workbench/api/endpoint.py +11 -0
  8. workbench/api/feature_set.py +11 -8
  9. workbench/api/meta.py +5 -2
  10. workbench/api/model.py +16 -15
  11. workbench/api/monitor.py +1 -16
  12. workbench/core/artifacts/__init__.py +11 -2
  13. workbench/core/artifacts/artifact.py +11 -3
  14. workbench/core/artifacts/data_capture_core.py +355 -0
  15. workbench/core/artifacts/endpoint_core.py +256 -118
  16. workbench/core/artifacts/feature_set_core.py +265 -16
  17. workbench/core/artifacts/model_core.py +107 -60
  18. workbench/core/artifacts/monitor_core.py +33 -248
  19. workbench/core/cloud_platform/aws/aws_account_clamp.py +50 -1
  20. workbench/core/cloud_platform/aws/aws_meta.py +12 -5
  21. workbench/core/cloud_platform/aws/aws_parameter_store.py +18 -2
  22. workbench/core/cloud_platform/aws/aws_session.py +4 -4
  23. workbench/core/transforms/data_to_features/light/molecular_descriptors.py +4 -4
  24. workbench/core/transforms/features_to_model/features_to_model.py +42 -32
  25. workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +36 -6
  26. workbench/core/transforms/pandas_transforms/pandas_to_features.py +27 -0
  27. workbench/core/views/training_view.py +113 -42
  28. workbench/core/views/view.py +53 -3
  29. workbench/core/views/view_utils.py +4 -4
  30. workbench/model_scripts/chemprop/chemprop.template +852 -0
  31. workbench/model_scripts/chemprop/generated_model_script.py +852 -0
  32. workbench/model_scripts/chemprop/requirements.txt +11 -0
  33. workbench/model_scripts/custom_models/chem_info/fingerprints.py +134 -0
  34. workbench/model_scripts/custom_models/chem_info/mol_descriptors.py +483 -0
  35. workbench/model_scripts/custom_models/chem_info/mol_standardize.py +450 -0
  36. workbench/model_scripts/custom_models/chem_info/molecular_descriptors.py +7 -9
  37. workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py +1 -1
  38. workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +3 -5
  39. workbench/model_scripts/custom_models/proximity/proximity.py +261 -235
  40. workbench/model_scripts/custom_models/uq_models/bayesian_ridge.template +7 -8
  41. workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +20 -21
  42. workbench/model_scripts/custom_models/uq_models/gaussian_process.template +5 -11
  43. workbench/model_scripts/custom_models/uq_models/meta_uq.template +166 -62
  44. workbench/model_scripts/custom_models/uq_models/ngboost.template +30 -18
  45. workbench/model_scripts/custom_models/uq_models/proximity.py +261 -235
  46. workbench/model_scripts/custom_models/uq_models/requirements.txt +1 -3
  47. workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +15 -17
  48. workbench/model_scripts/pytorch_model/generated_model_script.py +373 -190
  49. workbench/model_scripts/pytorch_model/pytorch.template +370 -187
  50. workbench/model_scripts/scikit_learn/generated_model_script.py +7 -12
  51. workbench/model_scripts/scikit_learn/scikit_learn.template +4 -9
  52. workbench/model_scripts/script_generation.py +17 -9
  53. workbench/model_scripts/uq_models/generated_model_script.py +605 -0
  54. workbench/model_scripts/uq_models/mapie.template +605 -0
  55. workbench/model_scripts/uq_models/requirements.txt +1 -0
  56. workbench/model_scripts/xgb_model/generated_model_script.py +37 -46
  57. workbench/model_scripts/xgb_model/xgb_model.template +44 -46
  58. workbench/repl/workbench_shell.py +28 -14
  59. workbench/scripts/endpoint_test.py +162 -0
  60. workbench/scripts/lambda_test.py +73 -0
  61. workbench/scripts/ml_pipeline_batch.py +137 -0
  62. workbench/scripts/ml_pipeline_sqs.py +186 -0
  63. workbench/scripts/monitor_cloud_watch.py +20 -100
  64. workbench/utils/aws_utils.py +4 -3
  65. workbench/utils/chem_utils/__init__.py +0 -0
  66. workbench/utils/chem_utils/fingerprints.py +134 -0
  67. workbench/utils/chem_utils/misc.py +194 -0
  68. workbench/utils/chem_utils/mol_descriptors.py +483 -0
  69. workbench/utils/chem_utils/mol_standardize.py +450 -0
  70. workbench/utils/chem_utils/mol_tagging.py +348 -0
  71. workbench/utils/chem_utils/projections.py +209 -0
  72. workbench/utils/chem_utils/salts.py +256 -0
  73. workbench/utils/chem_utils/sdf.py +292 -0
  74. workbench/utils/chem_utils/toxicity.py +250 -0
  75. workbench/utils/chem_utils/vis.py +253 -0
  76. workbench/utils/chemprop_utils.py +760 -0
  77. workbench/utils/cloudwatch_handler.py +1 -1
  78. workbench/utils/cloudwatch_utils.py +137 -0
  79. workbench/utils/config_manager.py +3 -7
  80. workbench/utils/endpoint_utils.py +5 -7
  81. workbench/utils/license_manager.py +2 -6
  82. workbench/utils/model_utils.py +95 -34
  83. workbench/utils/monitor_utils.py +44 -62
  84. workbench/utils/pandas_utils.py +3 -3
  85. workbench/utils/pytorch_utils.py +526 -0
  86. workbench/utils/shap_utils.py +10 -2
  87. workbench/utils/workbench_logging.py +0 -3
  88. workbench/utils/workbench_sqs.py +1 -1
  89. workbench/utils/xgboost_model_utils.py +371 -156
  90. workbench/web_interface/components/model_plot.py +7 -1
  91. workbench/web_interface/components/plugin_unit_test.py +5 -2
  92. workbench/web_interface/components/plugins/dashboard_status.py +3 -1
  93. workbench/web_interface/components/plugins/generated_compounds.py +1 -1
  94. workbench/web_interface/components/plugins/model_details.py +9 -7
  95. workbench/web_interface/components/plugins/scatter_plot.py +3 -3
  96. {workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/METADATA +27 -6
  97. {workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/RECORD +101 -85
  98. {workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/entry_points.txt +4 -0
  99. {workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/licenses/LICENSE +1 -1
  100. workbench/model_scripts/custom_models/chem_info/local_utils.py +0 -769
  101. workbench/model_scripts/custom_models/chem_info/tautomerize.py +0 -83
  102. workbench/model_scripts/custom_models/proximity/generated_model_script.py +0 -138
  103. workbench/model_scripts/custom_models/uq_models/generated_model_script.py +0 -393
  104. workbench/model_scripts/custom_models/uq_models/mapie_xgb.template +0 -203
  105. workbench/model_scripts/ensemble_xgb/generated_model_script.py +0 -279
  106. workbench/model_scripts/quant_regression/quant_regression.template +0 -279
  107. workbench/model_scripts/quant_regression/requirements.txt +0 -1
  108. workbench/utils/chem_utils.py +0 -1556
  109. workbench/utils/execution_environment.py +0 -211
  110. workbench/utils/fast_inference.py +0 -167
  111. workbench/utils/resource_utils.py +0 -39
  112. {workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/WHEEL +0 -0
  113. {workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/top_level.txt +0 -0
@@ -4,7 +4,7 @@ from typing import Union
4
4
  import logging
5
5
 
6
6
  # Workbench Imports
7
- from workbench.algorithms.dataframe import Proximity, ProximityType
7
+ from workbench.algorithms.dataframe import Proximity
8
8
  from workbench.api.graph_store import GraphStore
9
9
 
10
10
  # Set up logging
@@ -50,12 +50,13 @@ class ProximityGraph:
50
50
  self._nx_graph.add_nodes_from(node_df.set_index(id_column, drop=False).to_dict("index").items())
51
51
 
52
52
  # Determine edge weights based on proximity type
53
- if prox.proximity_type == ProximityType.SIMILARITY:
54
- all_neighbors_df["weight"] = all_neighbors_df["similarity"]
55
- elif prox.proximity_type == ProximityType.DISTANCE:
56
- # Normalize and invert distance
57
- max_distance = all_neighbors_df["distance"].max()
58
- all_neighbors_df["weight"] = 1.0 - all_neighbors_df["distance"] / max_distance
53
+ # if prox.proximity_type == ProximityType.SIMILARITY:
54
+ # all_neighbors_df["weight"] = all_neighbors_df["similarity"]
55
+ # elif prox.proximity_type == ProximityType.DISTANCE:
56
+
57
+ # Normalize and invert distance
58
+ max_distance = all_neighbors_df["distance"].max()
59
+ all_neighbors_df["weight"] = 1.0 - all_neighbors_df["distance"] / max_distance
59
60
 
60
61
  # Add edges to the graph
61
62
  log.info("Adding edges to the graph...")
@@ -135,7 +136,8 @@ if __name__ == "__main__":
135
136
  from workbench.algorithms.dataframe.fingerprint_proximity import FingerprintProximity
136
137
  from workbench.web_interface.components.plugins.graph_plot import GraphPlot
137
138
  from workbench.api import DFStore
138
- from workbench.utils.chem_utils import compute_morgan_fingerprints, project_fingerprints
139
+ from workbench.utils.chem_utils.fingerprints import compute_morgan_fingerprints
140
+ from workbench.utils.chem_utils.projections import project_fingerprints
139
141
  from workbench.utils.graph_utils import connected_sample, graph_layout
140
142
 
141
143
  def show_graph(graph, id_column):
workbench/api/__init__.py CHANGED
@@ -14,7 +14,7 @@ These class provide high-level APIs for the Workbench package, offering easy acc
14
14
 
15
15
  from .data_source import DataSource
16
16
  from .feature_set import FeatureSet
17
- from .model import Model, ModelType
17
+ from .model import Model, ModelType, ModelFramework
18
18
  from .endpoint import Endpoint
19
19
  from .meta import Meta
20
20
  from .parameter_store import ParameterStore
@@ -25,6 +25,7 @@ __all__ = [
25
25
  "FeatureSet",
26
26
  "Model",
27
27
  "ModelType",
28
+ "ModelFramework",
28
29
  "Endpoint",
29
30
  "Meta",
30
31
  "ParameterStore",
workbench/api/compound.py CHANGED
@@ -3,7 +3,7 @@ import logging
3
3
  from typing import List
4
4
 
5
5
  # Workbench Imports
6
- from workbench.utils.chem_utils import svg_from_smiles
6
+ from workbench.utils.chem_utils.vis import svg_from_smiles
7
7
 
8
8
 
9
9
  @dataclass
workbench/api/endpoint.py CHANGED
@@ -70,6 +70,17 @@ class Endpoint(EndpointCore):
70
70
  """
71
71
  return super().fast_inference(eval_df, threads=threads)
72
72
 
73
+ def cross_fold_inference(self, nfolds: int = 5) -> pd.DataFrame:
74
+ """Run cross-fold inference (only works for XGBoost models)
75
+
76
+ Args:
77
+ nfolds (int): The number of folds to use for cross-validation (default: 5)
78
+
79
+ Returns:
80
+ pd.DataFrame: A DataFrame with cross fold predictions
81
+ """
82
+ return super().cross_fold_inference(nfolds)
83
+
73
84
 
74
85
  if __name__ == "__main__":
75
86
  """Exercise the Endpoint Class"""
@@ -12,7 +12,7 @@ import pandas as pd
12
12
  from workbench.core.artifacts.artifact import Artifact
13
13
  from workbench.core.artifacts.feature_set_core import FeatureSetCore
14
14
  from workbench.core.transforms.features_to_model.features_to_model import FeaturesToModel
15
- from workbench.api.model import Model, ModelType
15
+ from workbench.api.model import Model, ModelType, ModelFramework
16
16
 
17
17
 
18
18
  class FeatureSet(FeatureSetCore):
@@ -79,6 +79,7 @@ class FeatureSet(FeatureSetCore):
79
79
  self,
80
80
  name: str,
81
81
  model_type: ModelType,
82
+ model_framework: ModelFramework = ModelFramework.XGBOOST,
82
83
  tags: list = None,
83
84
  description: str = None,
84
85
  feature_list: list = None,
@@ -87,8 +88,8 @@ class FeatureSet(FeatureSetCore):
87
88
  model_import_str: str = None,
88
89
  custom_script: Union[str, Path] = None,
89
90
  custom_args: dict = None,
90
- training_image: str = "xgb_training",
91
- inference_image: str = "xgb_inference",
91
+ training_image: str = "training",
92
+ inference_image: str = "inference",
92
93
  inference_arch: str = "x86_64",
93
94
  **kwargs,
94
95
  ) -> Union[Model, None]:
@@ -98,15 +99,16 @@ class FeatureSet(FeatureSetCore):
98
99
 
99
100
  name (str): The name of the Model to create
100
101
  model_type (ModelType): The type of model to create (See workbench.model.ModelType)
102
+ model_framework (ModelFramework, optional): The framework to use for the model (default: XGBOOST)
101
103
  tags (list, optional): Set the tags for the model. If not given tags will be generated.
102
104
  description (str, optional): Set the description for the model. If not give a description is generated.
103
105
  feature_list (list, optional): Set the feature list for the model. If not given a feature list is generated.
104
106
  target_column (str, optional): The target column for the model (use None for unsupervised model)
105
- model_class (str, optional): Model class to use (e.g. "KMeans", "PyTorch", default: None)
107
+ model_class (str, optional): Model class to use (e.g. "KMeans", default: None)
106
108
  model_import_str (str, optional): The import for the model (e.g. "from sklearn.cluster import KMeans")
107
109
  custom_script (str, optional): The custom script to use for the model (default: None)
108
- training_image (str, optional): The training image to use (default: "xgb_training")
109
- inference_image (str, optional): The inference image to use (default: "xgb_inference")
110
+ training_image (str, optional): The training image to use (default: "training")
111
+ inference_image (str, optional): The inference image to use (default: "inference")
110
112
  inference_arch (str, optional): The architecture to use for inference (default: "x86_64")
111
113
  kwargs (dict, optional): Additional keyword arguments to pass to the model
112
114
 
@@ -128,8 +130,8 @@ class FeatureSet(FeatureSetCore):
128
130
  # Create the Model Tags
129
131
  tags = [name] if tags is None else tags
130
132
 
131
- # If the model_class is PyTorch, ensure we set the training and inference images
132
- if model_class and model_class.lower() == "pytorch":
133
+ # If the model framework is PyTorch or ChemProp, ensure we set the training and inference images
134
+ if model_framework in (ModelFramework.PYTORCH_TABULAR, ModelFramework.CHEMPROP):
133
135
  training_image = "pytorch_training"
134
136
  inference_image = "pytorch_inference"
135
137
 
@@ -138,6 +140,7 @@ class FeatureSet(FeatureSetCore):
138
140
  feature_name=self.name,
139
141
  model_name=name,
140
142
  model_type=model_type,
143
+ model_framework=model_framework,
141
144
  model_class=model_class,
142
145
  model_import_str=model_import_str,
143
146
  custom_script=custom_script,
workbench/api/meta.py CHANGED
@@ -113,13 +113,16 @@ class Meta(CloudMeta):
113
113
  """
114
114
  return super().models(details=details)
115
115
 
116
- def endpoints(self) -> pd.DataFrame:
116
+ def endpoints(self, details: bool = False) -> pd.DataFrame:
117
117
  """Get a summary of the Endpoints deployed in the Cloud Platform
118
118
 
119
+ Args:
120
+ details (bool, optional): Include detailed information. Defaults to False.
121
+
119
122
  Returns:
120
123
  pd.DataFrame: A summary of the Endpoints in the Cloud Platform
121
124
  """
122
- return super().endpoints()
125
+ return super().endpoints(details=details)
123
126
 
124
127
  def pipelines(self) -> pd.DataFrame:
125
128
  """Get a summary of the ML Pipelines deployed in the Cloud Platform
workbench/api/model.py CHANGED
@@ -7,10 +7,10 @@ Dashboard UI, which provides additional model details and performance metrics
7
7
 
8
8
  # Workbench Imports
9
9
  from workbench.core.artifacts.artifact import Artifact
10
- from workbench.core.artifacts.model_core import ModelCore, ModelType # noqa: F401
10
+ from workbench.core.artifacts.model_core import ModelCore, ModelType, ModelFramework # noqa: F401
11
11
  from workbench.core.transforms.model_to_endpoint.model_to_endpoint import ModelToEndpoint
12
12
  from workbench.api.endpoint import Endpoint
13
- from workbench.utils.model_utils import proximity_model, uq_model
13
+ from workbench.utils.model_utils import proximity_model_local, uq_model
14
14
 
15
15
 
16
16
  class Model(ModelCore):
@@ -40,6 +40,7 @@ class Model(ModelCore):
40
40
  mem_size: int = 2048,
41
41
  max_concurrency: int = 5,
42
42
  instance: str = "ml.t2.medium",
43
+ data_capture: bool = False,
43
44
  ) -> Endpoint:
44
45
  """Create an Endpoint from the Model.
45
46
 
@@ -50,6 +51,7 @@ class Model(ModelCore):
50
51
  mem_size (int): The memory size for the Endpoint in MB (default: 2048)
51
52
  max_concurrency (int): The maximum concurrency for the Endpoint (default: 5)
52
53
  instance (str): The instance type to use for Realtime(serverless=False) Endpoints (default: "ml.t2.medium")
54
+ data_capture (bool): Enable data capture for the Endpoint (default: False)
53
55
 
54
56
  Returns:
55
57
  Endpoint: The Endpoint created from the Model
@@ -73,6 +75,7 @@ class Model(ModelCore):
73
75
  model_to_endpoint.transform(
74
76
  mem_size=mem_size,
75
77
  max_concurrency=max_concurrency,
78
+ data_capture=data_capture,
76
79
  )
77
80
 
78
81
  # Set the Endpoint Owner and Return the Endpoint
@@ -80,19 +83,13 @@ class Model(ModelCore):
80
83
  end.set_owner(self.get_owner())
81
84
  return end
82
85
 
83
- def prox_model(self, prox_model_name: str = None, track_columns: list = None) -> "Model":
84
- """Create a Proximity Model for this Model
85
-
86
- Args:
87
- prox_model_name (str, optional): Name of the Proximity Model (if not specified, a name will be generated)
88
- track_columns (list, optional): List of columns to track in the Proximity Model.
86
+ def prox_model(self):
87
+ """Create a local Proximity Model for this Model
89
88
 
90
89
  Returns:
91
- Model: The Proximity Model
90
+ Proximity: A local Proximity Model
92
91
  """
93
- if prox_model_name is None:
94
- prox_model_name = self.model_name + "-prox"
95
- return proximity_model(self, prox_model_name, track_columns=track_columns)
92
+ return proximity_model_local(self)
96
93
 
97
94
  def uq_model(self, uq_model_name: str = None, train_all_data: bool = False) -> "Model":
98
95
  """Create a Uncertainty Quantification Model for this Model
@@ -118,6 +115,10 @@ if __name__ == "__main__":
118
115
  pprint(my_model.summary())
119
116
  pprint(my_model.details())
120
117
 
121
- # Create an Endpoint from the Model
122
- my_endpoint = my_model.to_endpoint()
123
- pprint(my_endpoint.summary())
118
+ # Create an Endpoint from the Model (commented out for now)
119
+ # my_endpoint = my_model.to_endpoint()
120
+ # pprint(my_endpoint.summary())
121
+
122
+ # Create a local Proximity Model for this Model
123
+ prox_model = my_model.prox_model()
124
+ print(prox_model.neighbors(3398))
workbench/api/monitor.py CHANGED
@@ -15,7 +15,7 @@ class Monitor(MonitorCore):
15
15
 
16
16
  Common Usage:
17
17
  ```
18
- mon = Endpoint(name).get_monitor() # Pull from endpoint OR
18
+ mon = Endpoint(name).monitor() # Pull from endpoint OR
19
19
  mon = Monitor(name) # Create using Endpoint Name
20
20
  mon.summary()
21
21
  mon.details()
@@ -29,7 +29,6 @@ class Monitor(MonitorCore):
29
29
  baseline_df = mon.get_baseline()
30
30
  constraints_df = mon.get_constraints()
31
31
  stats_df = mon.get_statistics()
32
- input_df, output_df = mon.get_captured_data()
33
32
  ```
34
33
  """
35
34
 
@@ -81,15 +80,6 @@ class Monitor(MonitorCore):
81
80
  """
82
81
  super().create_monitoring_schedule(schedule)
83
82
 
84
- def get_captured_data(self) -> (pd.DataFrame, pd.DataFrame):
85
- """
86
- Get the latest data capture input and output from S3.
87
-
88
- Returns:
89
- DataFrame (input), DataFrame(output): Flattened and processed DataFrames for input and output data.
90
- """
91
- return super().get_captured_data()
92
-
93
83
  def get_baseline(self) -> Union[pd.DataFrame, None]:
94
84
  """Code to get the baseline CSV from the S3 baseline directory
95
85
 
@@ -155,8 +145,3 @@ if __name__ == "__main__":
155
145
 
156
146
  print("\nStatistics...")
157
147
  print(mm.get_statistics())
158
-
159
- # Get the latest data capture
160
- input_df, output_df = mm.get_captured_data()
161
- print(input_df.head())
162
- print(output_df.head())
@@ -15,7 +15,16 @@ from .artifact import Artifact
15
15
  from .athena_source import AthenaSource
16
16
  from .data_source_abstract import DataSourceAbstract
17
17
  from .feature_set_core import FeatureSetCore
18
- from .model_core import ModelCore, ModelType
18
+ from .model_core import ModelCore, ModelType, ModelFramework
19
19
  from .endpoint_core import EndpointCore
20
20
 
21
- __all__ = ["Artifact", "AthenaSource", "DataSourceAbstract", "FeatureSetCore", "ModelCore", "ModelType", "EndpointCore"]
21
+ __all__ = [
22
+ "Artifact",
23
+ "AthenaSource",
24
+ "DataSourceAbstract",
25
+ "FeatureSetCore",
26
+ "ModelCore",
27
+ "ModelType",
28
+ "ModelFramework",
29
+ "EndpointCore",
30
+ ]
@@ -236,6 +236,12 @@ class Artifact(ABC):
236
236
  This functionality will work for FeatureSets, Models, and Endpoints
237
237
  but not for DataSources. The DataSource class overrides this method.
238
238
  """
239
+
240
+ # Check for ReadOnly Role
241
+ if self.aws_account_clamp.read_only:
242
+ self.log.info("Cannot add metadata with a ReadOnly Permissions...")
243
+ return
244
+
239
245
  # Sanity check
240
246
  aws_arn = self.arn()
241
247
  if aws_arn is None:
@@ -444,10 +450,12 @@ class Artifact(ABC):
444
450
 
445
451
  if __name__ == "__main__":
446
452
  """Exercise the Artifact Class"""
447
- from workbench.api.data_source import DataSource
448
- from workbench.api.feature_set import FeatureSet
453
+ from workbench.api import DataSource, FeatureSet, Endpoint
454
+
455
+ # Grab an Endpoint (which is a subclass of Artifact)
456
+ end = Endpoint("wine-classification")
449
457
 
450
- # Create a DataSource (which is a subclass of Artifact)
458
+ # Grab a DataSource (which is a subclass of Artifact)
451
459
  data_source = DataSource("test_data")
452
460
 
453
461
  # Just some random tests