workbench 0.8.202__py3-none-any.whl → 0.8.220__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of workbench might be problematic. Click here for more details.
- workbench/algorithms/dataframe/compound_dataset_overlap.py +321 -0
- workbench/algorithms/dataframe/feature_space_proximity.py +168 -75
- workbench/algorithms/dataframe/fingerprint_proximity.py +421 -85
- workbench/algorithms/dataframe/projection_2d.py +44 -21
- workbench/algorithms/dataframe/proximity.py +78 -150
- workbench/algorithms/graph/light/proximity_graph.py +5 -5
- workbench/algorithms/models/cleanlab_model.py +382 -0
- workbench/algorithms/models/noise_model.py +388 -0
- workbench/algorithms/sql/outliers.py +3 -3
- workbench/api/__init__.py +3 -0
- workbench/api/df_store.py +17 -108
- workbench/api/endpoint.py +13 -11
- workbench/api/feature_set.py +111 -8
- workbench/api/meta_model.py +289 -0
- workbench/api/model.py +45 -12
- workbench/api/parameter_store.py +3 -52
- workbench/cached/cached_model.py +4 -4
- workbench/core/artifacts/artifact.py +5 -5
- workbench/core/artifacts/df_store_core.py +114 -0
- workbench/core/artifacts/endpoint_core.py +228 -237
- workbench/core/artifacts/feature_set_core.py +185 -230
- workbench/core/artifacts/model_core.py +34 -26
- workbench/core/artifacts/parameter_store_core.py +98 -0
- workbench/core/pipelines/pipeline_executor.py +1 -1
- workbench/core/transforms/features_to_model/features_to_model.py +22 -10
- workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +41 -10
- workbench/core/transforms/pandas_transforms/pandas_to_features.py +11 -2
- workbench/model_script_utils/model_script_utils.py +339 -0
- workbench/model_script_utils/pytorch_utils.py +405 -0
- workbench/model_script_utils/uq_harness.py +278 -0
- workbench/model_scripts/chemprop/chemprop.template +428 -631
- workbench/model_scripts/chemprop/generated_model_script.py +432 -635
- workbench/model_scripts/chemprop/model_script_utils.py +339 -0
- workbench/model_scripts/chemprop/requirements.txt +2 -10
- workbench/model_scripts/custom_models/chem_info/fingerprints.py +87 -46
- workbench/model_scripts/custom_models/proximity/feature_space_proximity.py +194 -0
- workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +6 -6
- workbench/model_scripts/custom_models/uq_models/feature_space_proximity.py +194 -0
- workbench/model_scripts/meta_model/generated_model_script.py +209 -0
- workbench/model_scripts/meta_model/meta_model.template +209 -0
- workbench/model_scripts/pytorch_model/generated_model_script.py +374 -613
- workbench/model_scripts/pytorch_model/model_script_utils.py +339 -0
- workbench/model_scripts/pytorch_model/pytorch.template +370 -609
- workbench/model_scripts/pytorch_model/pytorch_utils.py +405 -0
- workbench/model_scripts/pytorch_model/requirements.txt +1 -1
- workbench/model_scripts/pytorch_model/uq_harness.py +278 -0
- workbench/model_scripts/script_generation.py +6 -5
- workbench/model_scripts/uq_models/generated_model_script.py +65 -422
- workbench/model_scripts/xgb_model/generated_model_script.py +372 -395
- workbench/model_scripts/xgb_model/model_script_utils.py +339 -0
- workbench/model_scripts/xgb_model/uq_harness.py +278 -0
- workbench/model_scripts/xgb_model/xgb_model.template +366 -396
- workbench/repl/workbench_shell.py +0 -5
- workbench/resources/open_source_api.key +1 -1
- workbench/scripts/endpoint_test.py +2 -2
- workbench/scripts/meta_model_sim.py +35 -0
- workbench/scripts/training_test.py +85 -0
- workbench/utils/chem_utils/fingerprints.py +87 -46
- workbench/utils/chem_utils/projections.py +16 -6
- workbench/utils/chemprop_utils.py +36 -655
- workbench/utils/meta_model_simulator.py +499 -0
- workbench/utils/metrics_utils.py +256 -0
- workbench/utils/model_utils.py +192 -54
- workbench/utils/pytorch_utils.py +33 -472
- workbench/utils/shap_utils.py +1 -55
- workbench/utils/xgboost_local_crossfold.py +267 -0
- workbench/utils/xgboost_model_utils.py +49 -356
- workbench/web_interface/components/model_plot.py +7 -1
- workbench/web_interface/components/plugins/model_details.py +30 -68
- workbench/web_interface/components/plugins/scatter_plot.py +4 -8
- {workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/METADATA +6 -5
- {workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/RECORD +76 -60
- {workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/entry_points.txt +2 -0
- workbench/core/cloud_platform/aws/aws_df_store.py +0 -404
- workbench/core/cloud_platform/aws/aws_parameter_store.py +0 -296
- workbench/model_scripts/custom_models/meta_endpoints/example.py +0 -53
- workbench/model_scripts/custom_models/proximity/proximity.py +0 -410
- workbench/model_scripts/custom_models/uq_models/meta_uq.template +0 -377
- workbench/model_scripts/custom_models/uq_models/proximity.py +0 -410
- workbench/model_scripts/uq_models/mapie.template +0 -605
- workbench/model_scripts/uq_models/requirements.txt +0 -1
- {workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/WHEEL +0 -0
- {workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/licenses/LICENSE +0 -0
- {workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""DFStoreCore: Fast/efficient storage of DataFrames using AWS S3/Parquet/Snappy"""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Union
|
|
5
|
+
|
|
6
|
+
# Workbench Imports
|
|
7
|
+
from workbench.utils.config_manager import ConfigManager
|
|
8
|
+
from workbench.core.cloud_platform.aws.aws_account_clamp import AWSAccountClamp
|
|
9
|
+
|
|
10
|
+
# Workbench Bridges Import
|
|
11
|
+
from workbench_bridges.api import DFStore as BridgesDFStore
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DFStoreCore(BridgesDFStore):
|
|
15
|
+
"""DFStoreCore: Fast/efficient storage of DataFrames using AWS S3/Parquet/Snappy
|
|
16
|
+
|
|
17
|
+
Common Usage:
|
|
18
|
+
```python
|
|
19
|
+
df_store = DFStoreCore()
|
|
20
|
+
|
|
21
|
+
# List Data
|
|
22
|
+
df_store.list()
|
|
23
|
+
|
|
24
|
+
# Add DataFrame
|
|
25
|
+
df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
|
|
26
|
+
df_store.upsert("/test/my_data", df)
|
|
27
|
+
|
|
28
|
+
# Retrieve DataFrame
|
|
29
|
+
df = df_store.get("/test/my_data")
|
|
30
|
+
print(df)
|
|
31
|
+
|
|
32
|
+
# Delete Data
|
|
33
|
+
df_store.delete("/test/my_data")
|
|
34
|
+
```
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, path_prefix: Union[str, None] = None):
|
|
38
|
+
"""DFStoreCore Init Method
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
path_prefix (Union[str, None], optional): Add a path prefix to storage locations (Defaults to None)
|
|
42
|
+
"""
|
|
43
|
+
# Get config from workbench's systems
|
|
44
|
+
bucket = ConfigManager().get_config("WORKBENCH_BUCKET")
|
|
45
|
+
session = AWSAccountClamp().boto3_session
|
|
46
|
+
|
|
47
|
+
# Initialize parent with workbench config
|
|
48
|
+
super().__init__(path_prefix=path_prefix, s3_bucket=bucket, boto3_session=session)
|
|
49
|
+
self.log = logging.getLogger("workbench")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
if __name__ == "__main__":
|
|
53
|
+
"""Exercise the DFStoreCore Class"""
|
|
54
|
+
import time
|
|
55
|
+
import pandas as pd
|
|
56
|
+
|
|
57
|
+
# Create a DFStoreCore manager
|
|
58
|
+
df_store = DFStoreCore()
|
|
59
|
+
|
|
60
|
+
# Details of the Dataframe Store
|
|
61
|
+
print("Detailed Data...")
|
|
62
|
+
print(df_store.details())
|
|
63
|
+
|
|
64
|
+
# Add a new DataFrame
|
|
65
|
+
my_df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
|
|
66
|
+
df_store.upsert("/testing/test_data", my_df)
|
|
67
|
+
|
|
68
|
+
# Get the DataFrame
|
|
69
|
+
print(f"Getting data 'test_data':\n{df_store.get('/testing/test_data')}")
|
|
70
|
+
|
|
71
|
+
# Now let's test adding a Series
|
|
72
|
+
series = pd.Series([1, 2, 3, 4], name="Series")
|
|
73
|
+
df_store.upsert("/testing/test_series", series)
|
|
74
|
+
print(f"Getting data 'test_series':\n{df_store.get('/testing/test_series')}")
|
|
75
|
+
|
|
76
|
+
# Summary of the data
|
|
77
|
+
print("Summary Data...")
|
|
78
|
+
print(df_store.summary())
|
|
79
|
+
|
|
80
|
+
# Repr of the DFStoreCore object
|
|
81
|
+
print("DFStoreCore Object:")
|
|
82
|
+
print(df_store)
|
|
83
|
+
|
|
84
|
+
# Check if the data exists
|
|
85
|
+
print("Check if data exists...")
|
|
86
|
+
print(df_store.check("/testing/test_data"))
|
|
87
|
+
print(df_store.check("/testing/test_series"))
|
|
88
|
+
|
|
89
|
+
# Time the check
|
|
90
|
+
start_time = time.time()
|
|
91
|
+
print(df_store.check("/testing/test_data"))
|
|
92
|
+
print("--- Check %s seconds ---" % (time.time() - start_time))
|
|
93
|
+
|
|
94
|
+
# Now delete the test data
|
|
95
|
+
df_store.delete("/testing/test_data")
|
|
96
|
+
df_store.delete("/testing/test_series")
|
|
97
|
+
|
|
98
|
+
# Check if the data exists
|
|
99
|
+
print("Check if data exists...")
|
|
100
|
+
print(df_store.check("/testing/test_data"))
|
|
101
|
+
print(df_store.check("/testing/test_series"))
|
|
102
|
+
|
|
103
|
+
# Add a bunch of dataframes and then test recursive delete
|
|
104
|
+
for i in range(10):
|
|
105
|
+
df_store.upsert(f"/testing/data_{i}", pd.DataFrame({"A": [1, 2], "B": [3, 4]}))
|
|
106
|
+
print("Before Recursive Delete:")
|
|
107
|
+
print(df_store.summary())
|
|
108
|
+
df_store.delete_recursive("/testing")
|
|
109
|
+
print("After Recursive Delete:")
|
|
110
|
+
print(df_store.summary())
|
|
111
|
+
|
|
112
|
+
# Get a non-existent DataFrame
|
|
113
|
+
print("Getting non-existent data...")
|
|
114
|
+
print(df_store.get("/testing/no_where"))
|