workbench 0.8.201__py3-none-any.whl → 0.8.204__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. workbench/api/df_store.py +17 -108
  2. workbench/api/feature_set.py +41 -7
  3. workbench/api/parameter_store.py +3 -52
  4. workbench/core/artifacts/artifact.py +5 -5
  5. workbench/core/artifacts/df_store_core.py +114 -0
  6. workbench/core/artifacts/endpoint_core.py +184 -75
  7. workbench/core/artifacts/model_core.py +11 -7
  8. workbench/core/artifacts/parameter_store_core.py +98 -0
  9. workbench/core/transforms/features_to_model/features_to_model.py +27 -13
  10. workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +11 -0
  11. workbench/core/transforms/pandas_transforms/pandas_to_features.py +11 -2
  12. workbench/model_scripts/chemprop/chemprop.template +312 -293
  13. workbench/model_scripts/chemprop/generated_model_script.py +316 -297
  14. workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +11 -5
  15. workbench/model_scripts/custom_models/uq_models/meta_uq.template +11 -5
  16. workbench/model_scripts/custom_models/uq_models/ngboost.template +11 -5
  17. workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +11 -5
  18. workbench/model_scripts/pytorch_model/generated_model_script.py +278 -128
  19. workbench/model_scripts/pytorch_model/pytorch.template +273 -123
  20. workbench/model_scripts/uq_models/generated_model_script.py +20 -11
  21. workbench/model_scripts/uq_models/mapie.template +17 -8
  22. workbench/model_scripts/xgb_model/generated_model_script.py +38 -9
  23. workbench/model_scripts/xgb_model/xgb_model.template +34 -5
  24. workbench/resources/open_source_api.key +1 -1
  25. workbench/utils/chemprop_utils.py +38 -1
  26. workbench/utils/pytorch_utils.py +38 -8
  27. workbench/web_interface/components/model_plot.py +7 -1
  28. {workbench-0.8.201.dist-info → workbench-0.8.204.dist-info}/METADATA +2 -2
  29. {workbench-0.8.201.dist-info → workbench-0.8.204.dist-info}/RECORD +33 -33
  30. workbench/core/cloud_platform/aws/aws_df_store.py +0 -404
  31. workbench/core/cloud_platform/aws/aws_parameter_store.py +0 -296
  32. {workbench-0.8.201.dist-info → workbench-0.8.204.dist-info}/WHEEL +0 -0
  33. {workbench-0.8.201.dist-info → workbench-0.8.204.dist-info}/entry_points.txt +0 -0
  34. {workbench-0.8.201.dist-info → workbench-0.8.204.dist-info}/licenses/LICENSE +0 -0
  35. {workbench-0.8.201.dist-info → workbench-0.8.204.dist-info}/top_level.txt +0 -0
workbench/api/df_store.py CHANGED
@@ -1,35 +1,32 @@
1
1
  """DFStore: Fast/efficient storage of DataFrames using AWS S3/Parquet/Snappy"""
2
2
 
3
- from datetime import datetime
4
3
  from typing import Union
5
- import logging
6
- import pandas as pd
7
4
 
8
5
  # Workbench Imports
9
- from workbench.core.cloud_platform.aws.aws_df_store import AWSDFStore
6
+ from workbench.core.artifacts.df_store_core import DFStoreCore
10
7
 
11
8
 
12
- class DFStore(AWSDFStore):
9
+ class DFStore(DFStoreCore):
13
10
  """DFStore: Fast/efficient storage of DataFrames using AWS S3/Parquet/Snappy
14
11
 
15
- Common Usage:
16
- ```python
17
- df_store = DFStore()
12
+ Common Usage:
13
+ ```python
14
+ df_store = DFStore()
18
15
 
19
- # List Data
20
- df_store.list()
16
+ # List Data
17
+ df_store.list()
21
18
 
22
- # Add DataFrame
23
- df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
24
- df_store.upsert("/test/my_data", df)
19
+ # Add DataFrame
20
+ df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
21
+ df_store.upsert("/test/my_data", df)
25
22
 
26
- # Retrieve DataFrame
27
- df = df_store.get("/test/my_data")
28
- print(df)
23
+ # Retrieve DataFrame
24
+ df = df_store.get("/test/my_data")
25
+ print(df)
29
26
 
30
- # Delete Data
31
- df_store.delete("/test/my_data")
32
- ```
27
+ # Delete Data
28
+ df_store.delete("/test/my_data")
29
+ ```
33
30
  """
34
31
 
35
32
  def __init__(self, path_prefix: Union[str, None] = None):
@@ -38,101 +35,13 @@ class DFStore(AWSDFStore):
38
35
  Args:
39
36
  path_prefix (Union[str, None], optional): Add a path prefix to storage locations (Defaults to None)
40
37
  """
41
- self.log = logging.getLogger("workbench")
42
-
43
- # Initialize the SuperClass
44
38
  super().__init__(path_prefix=path_prefix)
45
39
 
46
- def list(self, include_cache: bool = False) -> list:
47
- """List all the objects in the data_store prefix.
48
-
49
- Args:
50
- include_cache (bool, optional): Include cache objects in the list (Defaults to False).
51
-
52
- Returns:
53
- list: A list of all the objects in the data_store prefix.
54
- """
55
- return super().list(include_cache=include_cache)
56
-
57
- def summary(self, include_cache: bool = False) -> pd.DataFrame:
58
- """Return a nicely formatted summary of object locations, sizes (in MB), and modified dates.
59
-
60
- Args:
61
- include_cache (bool, optional): Include cache objects in the summary (Defaults to False).
62
-
63
- Returns:
64
- pd.DataFrame: A formatted DataFrame with the summary details.
65
- """
66
- return super().summary(include_cache=include_cache)
67
-
68
- def details(self, include_cache: bool = False) -> pd.DataFrame:
69
- """Return a DataFrame with detailed metadata for all objects in the data_store prefix.
70
-
71
- Args:
72
- include_cache (bool, optional): Include cache objects in the details (Defaults to False).
73
-
74
- Returns:
75
- pd.DataFrame: A DataFrame with detailed metadata for all objects in the data_store prefix.
76
- """
77
- return super().details(include_cache=include_cache)
78
-
79
- def check(self, location: str) -> bool:
80
- """Check if a DataFrame exists at the specified location
81
-
82
- Args:
83
- location (str): The location of the data to check.
84
-
85
- Returns:
86
- bool: True if the data exists, False otherwise.
87
- """
88
- return super().check(location)
89
-
90
- def get(self, location: str) -> Union[pd.DataFrame, None]:
91
- """Retrieve a DataFrame from AWS S3.
92
-
93
- Args:
94
- location (str): The location of the data to retrieve.
95
-
96
- Returns:
97
- pd.DataFrame: The retrieved DataFrame or None if not found.
98
- """
99
- _df = super().get(location)
100
- if _df is None:
101
- self.log.error(f"Dataframe not found at location: {location}")
102
- return _df
103
-
104
- def upsert(self, location: str, data: Union[pd.DataFrame, pd.Series]):
105
- """Insert or update a DataFrame or Series in the AWS S3.
106
-
107
- Args:
108
- location (str): The location of the data.
109
- data (Union[pd.DataFrame, pd.Series]): The data to be stored.
110
- """
111
- super().upsert(location, data)
112
-
113
- def last_modified(self, location: str) -> Union[datetime, None]:
114
- """Get the last modified date of the DataFrame at the specified location.
115
-
116
- Args:
117
- location (str): The location of the data to check.
118
-
119
- Returns:
120
- Union[datetime, None]: The last modified date of the DataFrame or None if not found.
121
- """
122
- return super().last_modified(location)
123
-
124
- def delete(self, location: str):
125
- """Delete a DataFrame from the AWS S3.
126
-
127
- Args:
128
- location (str): The location of the data to delete.
129
- """
130
- super().delete(location)
131
-
132
40
 
133
41
  if __name__ == "__main__":
134
42
  """Exercise the DFStore Class"""
135
43
  import time
44
+ import pandas as pd
136
45
 
137
46
  # Create a DFStore manager
138
47
  df_store = DFStore()
@@ -58,10 +58,7 @@ class FeatureSet(FeatureSetCore):
58
58
  include_aws_columns (bool): Include the AWS columns in the DataFrame (default: False)
59
59
 
60
60
  Returns:
61
- pd.DataFrame: A DataFrame of ALL the data from this FeatureSet
62
-
63
- Note:
64
- Obviously this is not recommended for large datasets :)
61
+ pd.DataFrame: A DataFrame of all the data from this FeatureSet up to the limit
65
62
  """
66
63
 
67
64
  # Get the table associated with the data
@@ -83,7 +80,7 @@ class FeatureSet(FeatureSetCore):
83
80
  tags: list = None,
84
81
  description: str = None,
85
82
  feature_list: list = None,
86
- target_column: str = None,
83
+ target_column: Union[str, list[str]] = None,
87
84
  model_class: str = None,
88
85
  model_import_str: str = None,
89
86
  custom_script: Union[str, Path] = None,
@@ -103,7 +100,7 @@ class FeatureSet(FeatureSetCore):
103
100
  tags (list, optional): Set the tags for the model. If not given tags will be generated.
104
101
  description (str, optional): Set the description for the model. If not give a description is generated.
105
102
  feature_list (list, optional): Set the feature list for the model. If not given a feature list is generated.
106
- target_column (str, optional): The target column for the model (use None for unsupervised model)
103
+ target_column (str or list[str], optional): Target column(s) for the model (use None for unsupervised model)
107
104
  model_class (str, optional): Model class to use (e.g. "KMeans", default: None)
108
105
  model_import_str (str, optional): The import for the model (e.g. "from sklearn.cluster import KMeans")
109
106
  custom_script (str, optional): The custom script to use for the model (default: None)
@@ -157,6 +154,24 @@ class FeatureSet(FeatureSetCore):
157
154
  # Return the Model
158
155
  return Model(name)
159
156
 
157
+ def prox_model(self, target: str, features: list) -> "Proximity": # noqa: F821
158
+ """Create a local Proximity Model for this Model
159
+
160
+ Args:
161
+ target (str): The target column name
162
+ features (list): The list of feature column names
163
+
164
+ Returns:
165
+ Proximity: A local Proximity Model
166
+ """
167
+ from workbench.algorithms.dataframe.proximity import Proximity # noqa: F401 (avoid circular import)
168
+
169
+ # Create the Proximity Model from the full FeatureSet dataframe
170
+ full_df = self.pull_dataframe()
171
+
172
+ # Create and return the Proximity Model
173
+ return Proximity(full_df, self.id_column, features, target, track_columns=features)
174
+
160
175
 
161
176
  if __name__ == "__main__":
162
177
  """Exercise the FeatureSet Class"""
@@ -167,5 +182,24 @@ if __name__ == "__main__":
167
182
  pprint(my_features.summary())
168
183
  pprint(my_features.details())
169
184
 
185
+ # Pull the full DataFrame
186
+ df = my_features.pull_dataframe()
187
+ print(df.head())
188
+
189
+ # Create a Proximity Model from the FeatureSet
190
+ features = ["height", "weight", "age", "iq_score", "likes_dogs", "food"]
191
+ my_prox = my_features.prox_model(target="salary", features=features)
192
+ neighbors = my_prox.neighbors(42)
193
+ print("Neighbors for ID 42:")
194
+ print(neighbors)
195
+
170
196
  # Create a Model from the FeatureSet
171
- my_model = my_features.to_model(name="test-model", model_type=ModelType.REGRESSOR, target_column="iq_score")
197
+ """
198
+ my_model = my_features.to_model(
199
+ name="test-model",
200
+ model_type=ModelType.REGRESSOR,
201
+ target_column="salary",
202
+ feature_list=features
203
+ )
204
+ pprint(my_model.summary())
205
+ """
@@ -1,13 +1,10 @@
1
1
  """ParameterStore: Manages Workbench parameters in a Cloud Based Parameter Store."""
2
2
 
3
- from typing import Union
4
- import logging
5
-
6
3
  # Workbench Imports
7
- from workbench.core.cloud_platform.aws.aws_parameter_store import AWSParameterStore
4
+ from workbench.core.artifacts.parameter_store_core import ParameterStoreCore
8
5
 
9
6
 
10
- class ParameterStore(AWSParameterStore):
7
+ class ParameterStore(ParameterStoreCore):
11
8
  """ParameterStore: Manages Workbench parameters in a Cloud Based Parameter Store.
12
9
 
13
10
  Common Usage:
@@ -43,56 +40,10 @@ class ParameterStore(AWSParameterStore):
43
40
 
44
41
  def __init__(self):
45
42
  """ParameterStore Init Method"""
46
- self.log = logging.getLogger("workbench")
47
43
 
48
- # Initialize the SuperClass
44
+ # Initialize parent class
49
45
  super().__init__()
50
46
 
51
- def list(self, prefix: str = None) -> list:
52
- """List all parameters in the AWS Parameter Store, optionally filtering by a prefix.
53
-
54
- Args:
55
- prefix (str, optional): A prefix to filter the parameters by. Defaults to None.
56
-
57
- Returns:
58
- list: A list of parameter names and details.
59
- """
60
- return super().list(prefix=prefix)
61
-
62
- def get(self, name: str, warn: bool = True, decrypt: bool = True) -> Union[str, list, dict, None]:
63
- """Retrieve a parameter value from the AWS Parameter Store.
64
-
65
- Args:
66
- name (str): The name of the parameter to retrieve.
67
- warn (bool): Whether to log a warning if the parameter is not found.
68
- decrypt (bool): Whether to decrypt secure string parameters.
69
-
70
- Returns:
71
- Union[str, list, dict, None]: The value of the parameter or None if not found.
72
- """
73
- return super().get(name=name, warn=warn, decrypt=decrypt)
74
-
75
- def upsert(self, name: str, value):
76
- """Insert or update a parameter in the AWS Parameter Store.
77
-
78
- Args:
79
- name (str): The name of the parameter.
80
- value (str | list | dict): The value of the parameter.
81
- """
82
- super().upsert(name=name, value=value)
83
-
84
- def delete(self, name: str):
85
- """Delete a parameter from the AWS Parameter Store.
86
-
87
- Args:
88
- name (str): The name of the parameter to delete.
89
- """
90
- super().delete(name=name)
91
-
92
- def __repr__(self):
93
- """Return a string representation of the ParameterStore object."""
94
- return super().__repr__()
95
-
96
47
 
97
48
  if __name__ == "__main__":
98
49
  """Exercise the ParameterStore Class"""
@@ -8,8 +8,8 @@ from typing import Union
8
8
 
9
9
  # Workbench Imports
10
10
  from workbench.core.cloud_platform.aws.aws_account_clamp import AWSAccountClamp
11
- from workbench.core.cloud_platform.aws.aws_parameter_store import AWSParameterStore as ParameterStore
12
- from workbench.core.cloud_platform.aws.aws_df_store import AWSDFStore as DFStore
11
+ from workbench.core.artifacts.parameter_store_core import ParameterStoreCore
12
+ from workbench.core.artifacts.df_store_core import DFStoreCore
13
13
  from workbench.utils.aws_utils import dict_to_aws_tags
14
14
  from workbench.utils.config_manager import ConfigManager, FatalConfigError
15
15
  from workbench.core.cloud_platform.cloud_meta import CloudMeta
@@ -48,11 +48,11 @@ class Artifact(ABC):
48
48
  tag_delimiter = "::"
49
49
 
50
50
  # Grab our Dataframe Cache Storage
51
- df_cache = DFStore(path_prefix="/workbench/dataframe_cache")
51
+ df_cache = DFStoreCore(path_prefix="/workbench/dataframe_cache")
52
52
 
53
53
  # Artifact may want to use the Parameter Store or Dataframe Store
54
- param_store = ParameterStore()
55
- df_store = DFStore()
54
+ param_store = ParameterStoreCore()
55
+ df_store = DFStoreCore()
56
56
 
57
57
  def __init__(self, name: str, use_cached_meta: bool = False):
58
58
  """Initialize the Artifact Base Class
@@ -0,0 +1,114 @@
1
+ """DFStoreCore: Fast/efficient storage of DataFrames using AWS S3/Parquet/Snappy"""
2
+
3
+ import logging
4
+ from typing import Union
5
+
6
+ # Workbench Imports
7
+ from workbench.utils.config_manager import ConfigManager
8
+ from workbench.core.cloud_platform.aws.aws_account_clamp import AWSAccountClamp
9
+
10
+ # Workbench Bridges Import
11
+ from workbench_bridges.api import DFStore as BridgesDFStore
12
+
13
+
14
+ class DFStoreCore(BridgesDFStore):
15
+ """DFStoreCore: Fast/efficient storage of DataFrames using AWS S3/Parquet/Snappy
16
+
17
+ Common Usage:
18
+ ```python
19
+ df_store = DFStoreCore()
20
+
21
+ # List Data
22
+ df_store.list()
23
+
24
+ # Add DataFrame
25
+ df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
26
+ df_store.upsert("/test/my_data", df)
27
+
28
+ # Retrieve DataFrame
29
+ df = df_store.get("/test/my_data")
30
+ print(df)
31
+
32
+ # Delete Data
33
+ df_store.delete("/test/my_data")
34
+ ```
35
+ """
36
+
37
+ def __init__(self, path_prefix: Union[str, None] = None):
38
+ """DFStoreCore Init Method
39
+
40
+ Args:
41
+ path_prefix (Union[str, None], optional): Add a path prefix to storage locations (Defaults to None)
42
+ """
43
+ # Get config from workbench's systems
44
+ bucket = ConfigManager().get_config("WORKBENCH_BUCKET")
45
+ session = AWSAccountClamp().boto3_session
46
+
47
+ # Initialize parent with workbench config
48
+ super().__init__(path_prefix=path_prefix, s3_bucket=bucket, boto3_session=session)
49
+ self.log = logging.getLogger("workbench")
50
+
51
+
52
+ if __name__ == "__main__":
53
+ """Exercise the DFStoreCore Class"""
54
+ import time
55
+ import pandas as pd
56
+
57
+ # Create a DFStoreCore manager
58
+ df_store = DFStoreCore()
59
+
60
+ # Details of the Dataframe Store
61
+ print("Detailed Data...")
62
+ print(df_store.details())
63
+
64
+ # Add a new DataFrame
65
+ my_df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
66
+ df_store.upsert("/testing/test_data", my_df)
67
+
68
+ # Get the DataFrame
69
+ print(f"Getting data 'test_data':\n{df_store.get('/testing/test_data')}")
70
+
71
+ # Now let's test adding a Series
72
+ series = pd.Series([1, 2, 3, 4], name="Series")
73
+ df_store.upsert("/testing/test_series", series)
74
+ print(f"Getting data 'test_series':\n{df_store.get('/testing/test_series')}")
75
+
76
+ # Summary of the data
77
+ print("Summary Data...")
78
+ print(df_store.summary())
79
+
80
+ # Repr of the DFStoreCore object
81
+ print("DFStoreCore Object:")
82
+ print(df_store)
83
+
84
+ # Check if the data exists
85
+ print("Check if data exists...")
86
+ print(df_store.check("/testing/test_data"))
87
+ print(df_store.check("/testing/test_series"))
88
+
89
+ # Time the check
90
+ start_time = time.time()
91
+ print(df_store.check("/testing/test_data"))
92
+ print("--- Check %s seconds ---" % (time.time() - start_time))
93
+
94
+ # Now delete the test data
95
+ df_store.delete("/testing/test_data")
96
+ df_store.delete("/testing/test_series")
97
+
98
+ # Check if the data exists
99
+ print("Check if data exists...")
100
+ print(df_store.check("/testing/test_data"))
101
+ print(df_store.check("/testing/test_series"))
102
+
103
+ # Add a bunch of dataframes and then test recursive delete
104
+ for i in range(10):
105
+ df_store.upsert(f"/testing/data_{i}", pd.DataFrame({"A": [1, 2], "B": [3, 4]}))
106
+ print("Before Recursive Delete:")
107
+ print(df_store.summary())
108
+ df_store.delete_recursive("/testing")
109
+ print("After Recursive Delete:")
110
+ print(df_store.summary())
111
+
112
+ # Get a non-existent DataFrame
113
+ print("Getting non-existent data...")
114
+ print(df_store.get("/testing/no_where"))