workbench 0.8.198__py3-none-any.whl → 0.8.203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. workbench/algorithms/dataframe/proximity.py +11 -4
  2. workbench/api/__init__.py +2 -1
  3. workbench/api/df_store.py +17 -108
  4. workbench/api/feature_set.py +48 -11
  5. workbench/api/model.py +1 -1
  6. workbench/api/parameter_store.py +3 -52
  7. workbench/core/artifacts/__init__.py +11 -2
  8. workbench/core/artifacts/artifact.py +5 -5
  9. workbench/core/artifacts/df_store_core.py +114 -0
  10. workbench/core/artifacts/endpoint_core.py +261 -78
  11. workbench/core/artifacts/feature_set_core.py +69 -1
  12. workbench/core/artifacts/model_core.py +48 -14
  13. workbench/core/artifacts/parameter_store_core.py +98 -0
  14. workbench/core/transforms/features_to_model/features_to_model.py +50 -33
  15. workbench/core/transforms/pandas_transforms/pandas_to_features.py +11 -2
  16. workbench/core/views/view.py +2 -2
  17. workbench/model_scripts/chemprop/chemprop.template +933 -0
  18. workbench/model_scripts/chemprop/generated_model_script.py +933 -0
  19. workbench/model_scripts/chemprop/requirements.txt +11 -0
  20. workbench/model_scripts/custom_models/chem_info/fingerprints.py +134 -0
  21. workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py +1 -1
  22. workbench/model_scripts/custom_models/proximity/proximity.py +11 -4
  23. workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +11 -5
  24. workbench/model_scripts/custom_models/uq_models/meta_uq.template +11 -5
  25. workbench/model_scripts/custom_models/uq_models/ngboost.template +11 -5
  26. workbench/model_scripts/custom_models/uq_models/proximity.py +11 -4
  27. workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +11 -5
  28. workbench/model_scripts/pytorch_model/generated_model_script.py +365 -173
  29. workbench/model_scripts/pytorch_model/pytorch.template +362 -170
  30. workbench/model_scripts/scikit_learn/generated_model_script.py +302 -0
  31. workbench/model_scripts/script_generation.py +10 -7
  32. workbench/model_scripts/uq_models/generated_model_script.py +43 -27
  33. workbench/model_scripts/uq_models/mapie.template +40 -24
  34. workbench/model_scripts/xgb_model/generated_model_script.py +36 -7
  35. workbench/model_scripts/xgb_model/xgb_model.template +36 -7
  36. workbench/repl/workbench_shell.py +14 -5
  37. workbench/resources/open_source_api.key +1 -1
  38. workbench/scripts/endpoint_test.py +162 -0
  39. workbench/scripts/{lambda_launcher.py → lambda_test.py} +10 -0
  40. workbench/utils/chemprop_utils.py +761 -0
  41. workbench/utils/pytorch_utils.py +527 -0
  42. workbench/utils/xgboost_model_utils.py +10 -5
  43. workbench/web_interface/components/model_plot.py +7 -1
  44. {workbench-0.8.198.dist-info → workbench-0.8.203.dist-info}/METADATA +3 -3
  45. {workbench-0.8.198.dist-info → workbench-0.8.203.dist-info}/RECORD +49 -43
  46. {workbench-0.8.198.dist-info → workbench-0.8.203.dist-info}/entry_points.txt +2 -1
  47. workbench/core/cloud_platform/aws/aws_df_store.py +0 -404
  48. workbench/core/cloud_platform/aws/aws_parameter_store.py +0 -280
  49. workbench/model_scripts/__pycache__/script_generation.cpython-312.pyc +0 -0
  50. workbench/model_scripts/__pycache__/script_generation.cpython-313.pyc +0 -0
  51. {workbench-0.8.198.dist-info → workbench-0.8.203.dist-info}/WHEEL +0 -0
  52. {workbench-0.8.198.dist-info → workbench-0.8.203.dist-info}/licenses/LICENSE +0 -0
  53. {workbench-0.8.198.dist-info → workbench-0.8.203.dist-info}/top_level.txt +0 -0
@@ -69,6 +69,7 @@ class Proximity:
69
69
  top_percent: float = 1.0,
70
70
  min_delta: Optional[float] = None,
71
71
  k_neighbors: int = 4,
72
+ only_coincident: bool = False,
72
73
  ) -> pd.DataFrame:
73
74
  """
74
75
  Find compounds with steep target gradients (data quality issues and activity cliffs).
@@ -81,6 +82,7 @@ class Proximity:
81
82
  top_percent: Percentage of compounds with steepest gradients to return (e.g., 1.0 = top 1%)
82
83
  min_delta: Minimum absolute target difference to consider. If None, defaults to target_range/100
83
84
  k_neighbors: Number of neighbors to use for median calculation (default: 4)
85
+ only_coincident: If True, only consider compounds that are coincident (default: False)
84
86
 
85
87
  Returns:
86
88
  DataFrame of compounds with steepest gradients, sorted by gradient (descending)
@@ -99,10 +101,15 @@ class Proximity:
99
101
  min_delta = self.target_range / 100.0 if self.target_range > 0 else 0.0
100
102
  candidates = candidates[candidates["nn_target_diff"] >= min_delta]
101
103
 
102
- # Get top X% by initial gradient
103
- percentile = 100 - top_percent
104
- threshold = np.percentile(candidates["gradient"], percentile)
105
- candidates = candidates[candidates["gradient"] >= threshold].copy()
104
+ # Filter based on mode
105
+ if only_coincident:
106
+ # Only keep coincident points (nn_distance ~= 0)
107
+ candidates = candidates[candidates["nn_distance"] < epsilon].copy()
108
+ else:
109
+ # Get top X% by initial gradient
110
+ percentile = 100 - top_percent
111
+ threshold = np.percentile(candidates["gradient"], percentile)
112
+ candidates = candidates[candidates["gradient"] >= threshold].copy()
106
113
 
107
114
  # Phase 2: Verify with k-neighbor median to filter out cases where nearest neighbor is the outlier
108
115
  results = []
workbench/api/__init__.py CHANGED
@@ -14,7 +14,7 @@ These class provide high-level APIs for the Workbench package, offering easy acc
14
14
 
15
15
  from .data_source import DataSource
16
16
  from .feature_set import FeatureSet
17
- from .model import Model, ModelType
17
+ from .model import Model, ModelType, ModelFramework
18
18
  from .endpoint import Endpoint
19
19
  from .meta import Meta
20
20
  from .parameter_store import ParameterStore
@@ -25,6 +25,7 @@ __all__ = [
25
25
  "FeatureSet",
26
26
  "Model",
27
27
  "ModelType",
28
+ "ModelFramework",
28
29
  "Endpoint",
29
30
  "Meta",
30
31
  "ParameterStore",
workbench/api/df_store.py CHANGED
@@ -1,35 +1,32 @@
1
1
  """DFStore: Fast/efficient storage of DataFrames using AWS S3/Parquet/Snappy"""
2
2
 
3
- from datetime import datetime
4
3
  from typing import Union
5
- import logging
6
- import pandas as pd
7
4
 
8
5
  # Workbench Imports
9
- from workbench.core.cloud_platform.aws.aws_df_store import AWSDFStore
6
+ from workbench.core.artifacts.df_store_core import DFStoreCore
10
7
 
11
8
 
12
- class DFStore(AWSDFStore):
9
+ class DFStore(DFStoreCore):
13
10
  """DFStore: Fast/efficient storage of DataFrames using AWS S3/Parquet/Snappy
14
11
 
15
- Common Usage:
16
- ```python
17
- df_store = DFStore()
12
+ Common Usage:
13
+ ```python
14
+ df_store = DFStore()
18
15
 
19
- # List Data
20
- df_store.list()
16
+ # List Data
17
+ df_store.list()
21
18
 
22
- # Add DataFrame
23
- df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
24
- df_store.upsert("/test/my_data", df)
19
+ # Add DataFrame
20
+ df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
21
+ df_store.upsert("/test/my_data", df)
25
22
 
26
- # Retrieve DataFrame
27
- df = df_store.get("/test/my_data")
28
- print(df)
23
+ # Retrieve DataFrame
24
+ df = df_store.get("/test/my_data")
25
+ print(df)
29
26
 
30
- # Delete Data
31
- df_store.delete("/test/my_data")
32
- ```
27
+ # Delete Data
28
+ df_store.delete("/test/my_data")
29
+ ```
33
30
  """
34
31
 
35
32
  def __init__(self, path_prefix: Union[str, None] = None):
@@ -38,101 +35,13 @@ class DFStore(AWSDFStore):
38
35
  Args:
39
36
  path_prefix (Union[str, None], optional): Add a path prefix to storage locations (Defaults to None)
40
37
  """
41
- self.log = logging.getLogger("workbench")
42
-
43
- # Initialize the SuperClass
44
38
  super().__init__(path_prefix=path_prefix)
45
39
 
46
- def list(self, include_cache: bool = False) -> list:
47
- """List all the objects in the data_store prefix.
48
-
49
- Args:
50
- include_cache (bool, optional): Include cache objects in the list (Defaults to False).
51
-
52
- Returns:
53
- list: A list of all the objects in the data_store prefix.
54
- """
55
- return super().list(include_cache=include_cache)
56
-
57
- def summary(self, include_cache: bool = False) -> pd.DataFrame:
58
- """Return a nicely formatted summary of object locations, sizes (in MB), and modified dates.
59
-
60
- Args:
61
- include_cache (bool, optional): Include cache objects in the summary (Defaults to False).
62
-
63
- Returns:
64
- pd.DataFrame: A formatted DataFrame with the summary details.
65
- """
66
- return super().summary(include_cache=include_cache)
67
-
68
- def details(self, include_cache: bool = False) -> pd.DataFrame:
69
- """Return a DataFrame with detailed metadata for all objects in the data_store prefix.
70
-
71
- Args:
72
- include_cache (bool, optional): Include cache objects in the details (Defaults to False).
73
-
74
- Returns:
75
- pd.DataFrame: A DataFrame with detailed metadata for all objects in the data_store prefix.
76
- """
77
- return super().details(include_cache=include_cache)
78
-
79
- def check(self, location: str) -> bool:
80
- """Check if a DataFrame exists at the specified location
81
-
82
- Args:
83
- location (str): The location of the data to check.
84
-
85
- Returns:
86
- bool: True if the data exists, False otherwise.
87
- """
88
- return super().check(location)
89
-
90
- def get(self, location: str) -> Union[pd.DataFrame, None]:
91
- """Retrieve a DataFrame from AWS S3.
92
-
93
- Args:
94
- location (str): The location of the data to retrieve.
95
-
96
- Returns:
97
- pd.DataFrame: The retrieved DataFrame or None if not found.
98
- """
99
- _df = super().get(location)
100
- if _df is None:
101
- self.log.error(f"Dataframe not found at location: {location}")
102
- return _df
103
-
104
- def upsert(self, location: str, data: Union[pd.DataFrame, pd.Series]):
105
- """Insert or update a DataFrame or Series in the AWS S3.
106
-
107
- Args:
108
- location (str): The location of the data.
109
- data (Union[pd.DataFrame, pd.Series]): The data to be stored.
110
- """
111
- super().upsert(location, data)
112
-
113
- def last_modified(self, location: str) -> Union[datetime, None]:
114
- """Get the last modified date of the DataFrame at the specified location.
115
-
116
- Args:
117
- location (str): The location of the data to check.
118
-
119
- Returns:
120
- Union[datetime, None]: The last modified date of the DataFrame or None if not found.
121
- """
122
- return super().last_modified(location)
123
-
124
- def delete(self, location: str):
125
- """Delete a DataFrame from the AWS S3.
126
-
127
- Args:
128
- location (str): The location of the data to delete.
129
- """
130
- super().delete(location)
131
-
132
40
 
133
41
  if __name__ == "__main__":
134
42
  """Exercise the DFStore Class"""
135
43
  import time
44
+ import pandas as pd
136
45
 
137
46
  # Create a DFStore manager
138
47
  df_store = DFStore()
@@ -12,7 +12,7 @@ import pandas as pd
12
12
  from workbench.core.artifacts.artifact import Artifact
13
13
  from workbench.core.artifacts.feature_set_core import FeatureSetCore
14
14
  from workbench.core.transforms.features_to_model.features_to_model import FeaturesToModel
15
- from workbench.api.model import Model, ModelType
15
+ from workbench.api.model import Model, ModelType, ModelFramework
16
16
 
17
17
 
18
18
  class FeatureSet(FeatureSetCore):
@@ -58,10 +58,7 @@ class FeatureSet(FeatureSetCore):
58
58
  include_aws_columns (bool): Include the AWS columns in the DataFrame (default: False)
59
59
 
60
60
  Returns:
61
- pd.DataFrame: A DataFrame of ALL the data from this FeatureSet
62
-
63
- Note:
64
- Obviously this is not recommended for large datasets :)
61
+ pd.DataFrame: A DataFrame of all the data from this FeatureSet up to the limit
65
62
  """
66
63
 
67
64
  # Get the table associated with the data
@@ -79,10 +76,11 @@ class FeatureSet(FeatureSetCore):
79
76
  self,
80
77
  name: str,
81
78
  model_type: ModelType,
79
+ model_framework: ModelFramework = ModelFramework.XGBOOST,
82
80
  tags: list = None,
83
81
  description: str = None,
84
82
  feature_list: list = None,
85
- target_column: str = None,
83
+ target_column: Union[str, list[str]] = None,
86
84
  model_class: str = None,
87
85
  model_import_str: str = None,
88
86
  custom_script: Union[str, Path] = None,
@@ -98,11 +96,12 @@ class FeatureSet(FeatureSetCore):
98
96
 
99
97
  name (str): The name of the Model to create
100
98
  model_type (ModelType): The type of model to create (See workbench.model.ModelType)
99
+ model_framework (ModelFramework, optional): The framework to use for the model (default: XGBOOST)
101
100
  tags (list, optional): Set the tags for the model. If not given tags will be generated.
102
101
  description (str, optional): Set the description for the model. If not give a description is generated.
103
102
  feature_list (list, optional): Set the feature list for the model. If not given a feature list is generated.
104
- target_column (str, optional): The target column for the model (use None for unsupervised model)
105
- model_class (str, optional): Model class to use (e.g. "KMeans", "PyTorch", default: None)
103
+ target_column (str or list[str], optional): Target column(s) for the model (use None for unsupervised model)
104
+ model_class (str, optional): Model class to use (e.g. "KMeans", default: None)
106
105
  model_import_str (str, optional): The import for the model (e.g. "from sklearn.cluster import KMeans")
107
106
  custom_script (str, optional): The custom script to use for the model (default: None)
108
107
  training_image (str, optional): The training image to use (default: "training")
@@ -128,8 +127,8 @@ class FeatureSet(FeatureSetCore):
128
127
  # Create the Model Tags
129
128
  tags = [name] if tags is None else tags
130
129
 
131
- # If the model_class is PyTorch, ensure we set the training and inference images
132
- if model_class and model_class.lower() == "pytorch":
130
+ # If the model framework is PyTorch or ChemProp, ensure we set the training and inference images
131
+ if model_framework in (ModelFramework.PYTORCH_TABULAR, ModelFramework.CHEMPROP):
133
132
  training_image = "pytorch_training"
134
133
  inference_image = "pytorch_inference"
135
134
 
@@ -138,6 +137,7 @@ class FeatureSet(FeatureSetCore):
138
137
  feature_name=self.name,
139
138
  model_name=name,
140
139
  model_type=model_type,
140
+ model_framework=model_framework,
141
141
  model_class=model_class,
142
142
  model_import_str=model_import_str,
143
143
  custom_script=custom_script,
@@ -154,6 +154,24 @@ class FeatureSet(FeatureSetCore):
154
154
  # Return the Model
155
155
  return Model(name)
156
156
 
157
+ def prox_model(self, target: str, features: list) -> "Proximity": # noqa: F821
158
+ """Create a local Proximity Model for this Model
159
+
160
+ Args:
161
+ target (str): The target column name
162
+ features (list): The list of feature column names
163
+
164
+ Returns:
165
+ Proximity: A local Proximity Model
166
+ """
167
+ from workbench.algorithms.dataframe.proximity import Proximity # noqa: F401 (avoid circular import)
168
+
169
+ # Create the Proximity Model from the full FeatureSet dataframe
170
+ full_df = self.pull_dataframe()
171
+
172
+ # Create and return the Proximity Model
173
+ return Proximity(full_df, self.id_column, features, target, track_columns=features)
174
+
157
175
 
158
176
  if __name__ == "__main__":
159
177
  """Exercise the FeatureSet Class"""
@@ -164,5 +182,24 @@ if __name__ == "__main__":
164
182
  pprint(my_features.summary())
165
183
  pprint(my_features.details())
166
184
 
185
+ # Pull the full DataFrame
186
+ df = my_features.pull_dataframe()
187
+ print(df.head())
188
+
189
+ # Create a Proximity Model from the FeatureSet
190
+ features = ["height", "weight", "age", "iq_score", "likes_dogs", "food"]
191
+ my_prox = my_features.prox_model(target="salary", features=features)
192
+ neighbors = my_prox.neighbors(42)
193
+ print("Neighbors for ID 42:")
194
+ print(neighbors)
195
+
167
196
  # Create a Model from the FeatureSet
168
- my_model = my_features.to_model(name="test-model", model_type=ModelType.REGRESSOR, target_column="iq_score")
197
+ """
198
+ my_model = my_features.to_model(
199
+ name="test-model",
200
+ model_type=ModelType.REGRESSOR,
201
+ target_column="salary",
202
+ feature_list=features
203
+ )
204
+ pprint(my_model.summary())
205
+ """
workbench/api/model.py CHANGED
@@ -7,7 +7,7 @@ Dashboard UI, which provides additional model details and performance metrics
7
7
 
8
8
  # Workbench Imports
9
9
  from workbench.core.artifacts.artifact import Artifact
10
- from workbench.core.artifacts.model_core import ModelCore, ModelType # noqa: F401
10
+ from workbench.core.artifacts.model_core import ModelCore, ModelType, ModelFramework # noqa: F401
11
11
  from workbench.core.transforms.model_to_endpoint.model_to_endpoint import ModelToEndpoint
12
12
  from workbench.api.endpoint import Endpoint
13
13
  from workbench.utils.model_utils import proximity_model_local, uq_model
@@ -1,13 +1,10 @@
1
1
  """ParameterStore: Manages Workbench parameters in a Cloud Based Parameter Store."""
2
2
 
3
- from typing import Union
4
- import logging
5
-
6
3
  # Workbench Imports
7
- from workbench.core.cloud_platform.aws.aws_parameter_store import AWSParameterStore
4
+ from workbench.core.artifacts.parameter_store_core import ParameterStoreCore
8
5
 
9
6
 
10
- class ParameterStore(AWSParameterStore):
7
+ class ParameterStore(ParameterStoreCore):
11
8
  """ParameterStore: Manages Workbench parameters in a Cloud Based Parameter Store.
12
9
 
13
10
  Common Usage:
@@ -43,56 +40,10 @@ class ParameterStore(AWSParameterStore):
43
40
 
44
41
  def __init__(self):
45
42
  """ParameterStore Init Method"""
46
- self.log = logging.getLogger("workbench")
47
43
 
48
- # Initialize the SuperClass
44
+ # Initialize parent class
49
45
  super().__init__()
50
46
 
51
- def list(self, prefix: str = None) -> list:
52
- """List all parameters in the AWS Parameter Store, optionally filtering by a prefix.
53
-
54
- Args:
55
- prefix (str, optional): A prefix to filter the parameters by. Defaults to None.
56
-
57
- Returns:
58
- list: A list of parameter names and details.
59
- """
60
- return super().list(prefix=prefix)
61
-
62
- def get(self, name: str, warn: bool = True, decrypt: bool = True) -> Union[str, list, dict, None]:
63
- """Retrieve a parameter value from the AWS Parameter Store.
64
-
65
- Args:
66
- name (str): The name of the parameter to retrieve.
67
- warn (bool): Whether to log a warning if the parameter is not found.
68
- decrypt (bool): Whether to decrypt secure string parameters.
69
-
70
- Returns:
71
- Union[str, list, dict, None]: The value of the parameter or None if not found.
72
- """
73
- return super().get(name=name, warn=warn, decrypt=decrypt)
74
-
75
- def upsert(self, name: str, value):
76
- """Insert or update a parameter in the AWS Parameter Store.
77
-
78
- Args:
79
- name (str): The name of the parameter.
80
- value (str | list | dict): The value of the parameter.
81
- """
82
- super().upsert(name=name, value=value)
83
-
84
- def delete(self, name: str):
85
- """Delete a parameter from the AWS Parameter Store.
86
-
87
- Args:
88
- name (str): The name of the parameter to delete.
89
- """
90
- super().delete(name=name)
91
-
92
- def __repr__(self):
93
- """Return a string representation of the ParameterStore object."""
94
- return super().__repr__()
95
-
96
47
 
97
48
  if __name__ == "__main__":
98
49
  """Exercise the ParameterStore Class"""
@@ -15,7 +15,16 @@ from .artifact import Artifact
15
15
  from .athena_source import AthenaSource
16
16
  from .data_source_abstract import DataSourceAbstract
17
17
  from .feature_set_core import FeatureSetCore
18
- from .model_core import ModelCore, ModelType
18
+ from .model_core import ModelCore, ModelType, ModelFramework
19
19
  from .endpoint_core import EndpointCore
20
20
 
21
- __all__ = ["Artifact", "AthenaSource", "DataSourceAbstract", "FeatureSetCore", "ModelCore", "ModelType", "EndpointCore"]
21
+ __all__ = [
22
+ "Artifact",
23
+ "AthenaSource",
24
+ "DataSourceAbstract",
25
+ "FeatureSetCore",
26
+ "ModelCore",
27
+ "ModelType",
28
+ "ModelFramework",
29
+ "EndpointCore",
30
+ ]
@@ -8,8 +8,8 @@ from typing import Union
8
8
 
9
9
  # Workbench Imports
10
10
  from workbench.core.cloud_platform.aws.aws_account_clamp import AWSAccountClamp
11
- from workbench.core.cloud_platform.aws.aws_parameter_store import AWSParameterStore as ParameterStore
12
- from workbench.core.cloud_platform.aws.aws_df_store import AWSDFStore as DFStore
11
+ from workbench.core.artifacts.parameter_store_core import ParameterStoreCore
12
+ from workbench.core.artifacts.df_store_core import DFStoreCore
13
13
  from workbench.utils.aws_utils import dict_to_aws_tags
14
14
  from workbench.utils.config_manager import ConfigManager, FatalConfigError
15
15
  from workbench.core.cloud_platform.cloud_meta import CloudMeta
@@ -48,11 +48,11 @@ class Artifact(ABC):
48
48
  tag_delimiter = "::"
49
49
 
50
50
  # Grab our Dataframe Cache Storage
51
- df_cache = DFStore(path_prefix="/workbench/dataframe_cache")
51
+ df_cache = DFStoreCore(path_prefix="/workbench/dataframe_cache")
52
52
 
53
53
  # Artifact may want to use the Parameter Store or Dataframe Store
54
- param_store = ParameterStore()
55
- df_store = DFStore()
54
+ param_store = ParameterStoreCore()
55
+ df_store = DFStoreCore()
56
56
 
57
57
  def __init__(self, name: str, use_cached_meta: bool = False):
58
58
  """Initialize the Artifact Base Class
@@ -0,0 +1,114 @@
1
+ """DFStoreCore: Fast/efficient storage of DataFrames using AWS S3/Parquet/Snappy"""
2
+
3
+ import logging
4
+ from typing import Union
5
+
6
+ # Workbench Imports
7
+ from workbench.utils.config_manager import ConfigManager
8
+ from workbench.core.cloud_platform.aws.aws_account_clamp import AWSAccountClamp
9
+
10
+ # Workbench Bridges Import
11
+ from workbench_bridges.api import DFStore as BridgesDFStore
12
+
13
+
14
+ class DFStoreCore(BridgesDFStore):
15
+ """DFStoreCore: Fast/efficient storage of DataFrames using AWS S3/Parquet/Snappy
16
+
17
+ Common Usage:
18
+ ```python
19
+ df_store = DFStoreCore()
20
+
21
+ # List Data
22
+ df_store.list()
23
+
24
+ # Add DataFrame
25
+ df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
26
+ df_store.upsert("/test/my_data", df)
27
+
28
+ # Retrieve DataFrame
29
+ df = df_store.get("/test/my_data")
30
+ print(df)
31
+
32
+ # Delete Data
33
+ df_store.delete("/test/my_data")
34
+ ```
35
+ """
36
+
37
+ def __init__(self, path_prefix: Union[str, None] = None):
38
+ """DFStoreCore Init Method
39
+
40
+ Args:
41
+ path_prefix (Union[str, None], optional): Add a path prefix to storage locations (Defaults to None)
42
+ """
43
+ # Get config from workbench's systems
44
+ bucket = ConfigManager().get_config("WORKBENCH_BUCKET")
45
+ session = AWSAccountClamp().boto3_session
46
+
47
+ # Initialize parent with workbench config
48
+ super().__init__(path_prefix=path_prefix, s3_bucket=bucket, boto3_session=session)
49
+ self.log = logging.getLogger("workbench")
50
+
51
+
52
+ if __name__ == "__main__":
53
+ """Exercise the DFStoreCore Class"""
54
+ import time
55
+ import pandas as pd
56
+
57
+ # Create a DFStoreCore manager
58
+ df_store = DFStoreCore()
59
+
60
+ # Details of the Dataframe Store
61
+ print("Detailed Data...")
62
+ print(df_store.details())
63
+
64
+ # Add a new DataFrame
65
+ my_df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
66
+ df_store.upsert("/testing/test_data", my_df)
67
+
68
+ # Get the DataFrame
69
+ print(f"Getting data 'test_data':\n{df_store.get('/testing/test_data')}")
70
+
71
+ # Now let's test adding a Series
72
+ series = pd.Series([1, 2, 3, 4], name="Series")
73
+ df_store.upsert("/testing/test_series", series)
74
+ print(f"Getting data 'test_series':\n{df_store.get('/testing/test_series')}")
75
+
76
+ # Summary of the data
77
+ print("Summary Data...")
78
+ print(df_store.summary())
79
+
80
+ # Repr of the DFStoreCore object
81
+ print("DFStoreCore Object:")
82
+ print(df_store)
83
+
84
+ # Check if the data exists
85
+ print("Check if data exists...")
86
+ print(df_store.check("/testing/test_data"))
87
+ print(df_store.check("/testing/test_series"))
88
+
89
+ # Time the check
90
+ start_time = time.time()
91
+ print(df_store.check("/testing/test_data"))
92
+ print("--- Check %s seconds ---" % (time.time() - start_time))
93
+
94
+ # Now delete the test data
95
+ df_store.delete("/testing/test_data")
96
+ df_store.delete("/testing/test_series")
97
+
98
+ # Check if the data exists
99
+ print("Check if data exists...")
100
+ print(df_store.check("/testing/test_data"))
101
+ print(df_store.check("/testing/test_series"))
102
+
103
+ # Add a bunch of dataframes and then test recursive delete
104
+ for i in range(10):
105
+ df_store.upsert(f"/testing/data_{i}", pd.DataFrame({"A": [1, 2], "B": [3, 4]}))
106
+ print("Before Recursive Delete:")
107
+ print(df_store.summary())
108
+ df_store.delete_recursive("/testing")
109
+ print("After Recursive Delete:")
110
+ print(df_store.summary())
111
+
112
+ # Get a non-existent DataFrame
113
+ print("Getting non-existent data...")
114
+ print(df_store.get("/testing/no_where"))