workbench 0.8.162__py3-none-any.whl → 0.8.220__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of workbench might be problematic. Click here for more details.
- workbench/algorithms/dataframe/__init__.py +1 -2
- workbench/algorithms/dataframe/compound_dataset_overlap.py +321 -0
- workbench/algorithms/dataframe/feature_space_proximity.py +168 -75
- workbench/algorithms/dataframe/fingerprint_proximity.py +422 -86
- workbench/algorithms/dataframe/projection_2d.py +44 -21
- workbench/algorithms/dataframe/proximity.py +259 -305
- workbench/algorithms/graph/light/proximity_graph.py +14 -12
- workbench/algorithms/models/cleanlab_model.py +382 -0
- workbench/algorithms/models/noise_model.py +388 -0
- workbench/algorithms/sql/outliers.py +3 -3
- workbench/api/__init__.py +5 -1
- workbench/api/compound.py +1 -1
- workbench/api/df_store.py +17 -108
- workbench/api/endpoint.py +18 -5
- workbench/api/feature_set.py +121 -15
- workbench/api/meta.py +5 -2
- workbench/api/meta_model.py +289 -0
- workbench/api/model.py +55 -21
- workbench/api/monitor.py +1 -16
- workbench/api/parameter_store.py +3 -52
- workbench/cached/cached_model.py +4 -4
- workbench/core/artifacts/__init__.py +11 -2
- workbench/core/artifacts/artifact.py +16 -8
- workbench/core/artifacts/data_capture_core.py +355 -0
- workbench/core/artifacts/df_store_core.py +114 -0
- workbench/core/artifacts/endpoint_core.py +382 -253
- workbench/core/artifacts/feature_set_core.py +249 -45
- workbench/core/artifacts/model_core.py +135 -80
- workbench/core/artifacts/monitor_core.py +33 -248
- workbench/core/artifacts/parameter_store_core.py +98 -0
- workbench/core/cloud_platform/aws/aws_account_clamp.py +50 -1
- workbench/core/cloud_platform/aws/aws_meta.py +12 -5
- workbench/core/cloud_platform/aws/aws_session.py +4 -4
- workbench/core/pipelines/pipeline_executor.py +1 -1
- workbench/core/transforms/data_to_features/light/molecular_descriptors.py +4 -4
- workbench/core/transforms/features_to_model/features_to_model.py +62 -40
- workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +76 -15
- workbench/core/transforms/pandas_transforms/pandas_to_features.py +38 -2
- workbench/core/views/training_view.py +113 -42
- workbench/core/views/view.py +53 -3
- workbench/core/views/view_utils.py +4 -4
- workbench/model_script_utils/model_script_utils.py +339 -0
- workbench/model_script_utils/pytorch_utils.py +405 -0
- workbench/model_script_utils/uq_harness.py +278 -0
- workbench/model_scripts/chemprop/chemprop.template +649 -0
- workbench/model_scripts/chemprop/generated_model_script.py +649 -0
- workbench/model_scripts/chemprop/model_script_utils.py +339 -0
- workbench/model_scripts/chemprop/requirements.txt +3 -0
- workbench/model_scripts/custom_models/chem_info/fingerprints.py +175 -0
- workbench/model_scripts/custom_models/chem_info/mol_descriptors.py +483 -0
- workbench/model_scripts/custom_models/chem_info/mol_standardize.py +450 -0
- workbench/model_scripts/custom_models/chem_info/molecular_descriptors.py +7 -9
- workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py +1 -1
- workbench/model_scripts/custom_models/proximity/feature_space_proximity.py +194 -0
- workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +8 -10
- workbench/model_scripts/custom_models/uq_models/bayesian_ridge.template +7 -8
- workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +20 -21
- workbench/model_scripts/custom_models/uq_models/feature_space_proximity.py +194 -0
- workbench/model_scripts/custom_models/uq_models/gaussian_process.template +5 -11
- workbench/model_scripts/custom_models/uq_models/ngboost.template +30 -18
- workbench/model_scripts/custom_models/uq_models/requirements.txt +1 -3
- workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +15 -17
- workbench/model_scripts/meta_model/generated_model_script.py +209 -0
- workbench/model_scripts/meta_model/meta_model.template +209 -0
- workbench/model_scripts/pytorch_model/generated_model_script.py +444 -500
- workbench/model_scripts/pytorch_model/model_script_utils.py +339 -0
- workbench/model_scripts/pytorch_model/pytorch.template +440 -496
- workbench/model_scripts/pytorch_model/pytorch_utils.py +405 -0
- workbench/model_scripts/pytorch_model/requirements.txt +1 -1
- workbench/model_scripts/pytorch_model/uq_harness.py +278 -0
- workbench/model_scripts/scikit_learn/generated_model_script.py +7 -12
- workbench/model_scripts/scikit_learn/scikit_learn.template +4 -9
- workbench/model_scripts/script_generation.py +20 -11
- workbench/model_scripts/uq_models/generated_model_script.py +248 -0
- workbench/model_scripts/xgb_model/generated_model_script.py +372 -404
- workbench/model_scripts/xgb_model/model_script_utils.py +339 -0
- workbench/model_scripts/xgb_model/uq_harness.py +278 -0
- workbench/model_scripts/xgb_model/xgb_model.template +369 -401
- workbench/repl/workbench_shell.py +28 -19
- workbench/resources/open_source_api.key +1 -1
- workbench/scripts/endpoint_test.py +162 -0
- workbench/scripts/lambda_test.py +73 -0
- workbench/scripts/meta_model_sim.py +35 -0
- workbench/scripts/ml_pipeline_batch.py +137 -0
- workbench/scripts/ml_pipeline_sqs.py +186 -0
- workbench/scripts/monitor_cloud_watch.py +20 -100
- workbench/scripts/training_test.py +85 -0
- workbench/utils/aws_utils.py +4 -3
- workbench/utils/chem_utils/__init__.py +0 -0
- workbench/utils/chem_utils/fingerprints.py +175 -0
- workbench/utils/chem_utils/misc.py +194 -0
- workbench/utils/chem_utils/mol_descriptors.py +483 -0
- workbench/utils/chem_utils/mol_standardize.py +450 -0
- workbench/utils/chem_utils/mol_tagging.py +348 -0
- workbench/utils/chem_utils/projections.py +219 -0
- workbench/utils/chem_utils/salts.py +256 -0
- workbench/utils/chem_utils/sdf.py +292 -0
- workbench/utils/chem_utils/toxicity.py +250 -0
- workbench/utils/chem_utils/vis.py +253 -0
- workbench/utils/chemprop_utils.py +141 -0
- workbench/utils/cloudwatch_handler.py +1 -1
- workbench/utils/cloudwatch_utils.py +137 -0
- workbench/utils/config_manager.py +3 -7
- workbench/utils/endpoint_utils.py +5 -7
- workbench/utils/license_manager.py +2 -6
- workbench/utils/meta_model_simulator.py +499 -0
- workbench/utils/metrics_utils.py +256 -0
- workbench/utils/model_utils.py +278 -79
- workbench/utils/monitor_utils.py +44 -62
- workbench/utils/pandas_utils.py +3 -3
- workbench/utils/pytorch_utils.py +87 -0
- workbench/utils/shap_utils.py +11 -57
- workbench/utils/workbench_logging.py +0 -3
- workbench/utils/workbench_sqs.py +1 -1
- workbench/utils/xgboost_local_crossfold.py +267 -0
- workbench/utils/xgboost_model_utils.py +127 -219
- workbench/web_interface/components/model_plot.py +14 -2
- workbench/web_interface/components/plugin_unit_test.py +5 -2
- workbench/web_interface/components/plugins/dashboard_status.py +3 -1
- workbench/web_interface/components/plugins/generated_compounds.py +1 -1
- workbench/web_interface/components/plugins/model_details.py +38 -74
- workbench/web_interface/components/plugins/scatter_plot.py +6 -10
- {workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/METADATA +31 -9
- {workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/RECORD +128 -96
- workbench-0.8.220.dist-info/entry_points.txt +11 -0
- {workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/licenses/LICENSE +1 -1
- workbench/core/cloud_platform/aws/aws_df_store.py +0 -404
- workbench/core/cloud_platform/aws/aws_parameter_store.py +0 -280
- workbench/model_scripts/custom_models/chem_info/local_utils.py +0 -769
- workbench/model_scripts/custom_models/chem_info/tautomerize.py +0 -83
- workbench/model_scripts/custom_models/meta_endpoints/example.py +0 -53
- workbench/model_scripts/custom_models/proximity/generated_model_script.py +0 -138
- workbench/model_scripts/custom_models/proximity/proximity.py +0 -384
- workbench/model_scripts/custom_models/uq_models/generated_model_script.py +0 -393
- workbench/model_scripts/custom_models/uq_models/mapie_xgb.template +0 -203
- workbench/model_scripts/custom_models/uq_models/meta_uq.template +0 -273
- workbench/model_scripts/custom_models/uq_models/proximity.py +0 -384
- workbench/model_scripts/ensemble_xgb/generated_model_script.py +0 -279
- workbench/model_scripts/quant_regression/quant_regression.template +0 -279
- workbench/model_scripts/quant_regression/requirements.txt +0 -1
- workbench/utils/chem_utils.py +0 -1556
- workbench/utils/execution_environment.py +0 -211
- workbench/utils/fast_inference.py +0 -167
- workbench/utils/resource_utils.py +0 -39
- workbench-0.8.162.dist-info/entry_points.txt +0 -5
- {workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/WHEEL +0 -0
- {workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""AWS CloudWatch utility functions for Workbench."""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
import logging
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from typing import List, Optional, Dict, Generator
|
|
7
|
+
from urllib.parse import quote
|
|
8
|
+
from workbench.core.cloud_platform.aws.aws_account_clamp import AWSAccountClamp
|
|
9
|
+
|
|
10
|
+
log = logging.getLogger("workbench")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_cloudwatch_client():
|
|
14
|
+
"""Get the CloudWatch Logs client using the Workbench assumed role session."""
|
|
15
|
+
session = AWSAccountClamp().boto3_session
|
|
16
|
+
return session.client("logs")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_cloudwatch_logs_url(log_group: str, log_stream: str) -> Optional[str]:
|
|
20
|
+
"""
|
|
21
|
+
Generate CloudWatch logs URL for the specified log group and stream.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
log_group: Log group name (e.g., '/aws/batch/job')
|
|
25
|
+
log_stream: Log stream name
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
CloudWatch console URL or None if unable to generate
|
|
29
|
+
"""
|
|
30
|
+
try:
|
|
31
|
+
region = AWSAccountClamp().region
|
|
32
|
+
|
|
33
|
+
# URL encode the log group and stream
|
|
34
|
+
encoded_group = quote(log_group, safe="")
|
|
35
|
+
encoded_stream = quote(log_stream, safe="")
|
|
36
|
+
|
|
37
|
+
return (
|
|
38
|
+
f"https://{region}.console.aws.amazon.com/cloudwatch/home?"
|
|
39
|
+
f"region={region}#logsV2:log-groups/log-group/{encoded_group}"
|
|
40
|
+
f"/log-events/{encoded_stream}"
|
|
41
|
+
)
|
|
42
|
+
except Exception as e: # noqa: BLE001
|
|
43
|
+
log.warning(f"Failed to generate CloudWatch logs URL: {e}")
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def get_active_log_streams(
|
|
48
|
+
log_group_name: str, start_time_ms: int, stream_filter: Optional[str] = None, client=None
|
|
49
|
+
) -> List[str]:
|
|
50
|
+
"""Retrieve log streams that have events after the specified start time."""
|
|
51
|
+
if not client:
|
|
52
|
+
client = get_cloudwatch_client()
|
|
53
|
+
active_streams = []
|
|
54
|
+
stream_params = {
|
|
55
|
+
"logGroupName": log_group_name,
|
|
56
|
+
"orderBy": "LastEventTime",
|
|
57
|
+
"descending": True,
|
|
58
|
+
}
|
|
59
|
+
while True:
|
|
60
|
+
response = client.describe_log_streams(**stream_params)
|
|
61
|
+
log_streams = response.get("logStreams", [])
|
|
62
|
+
for log_stream in log_streams:
|
|
63
|
+
log_stream_name = log_stream["logStreamName"]
|
|
64
|
+
last_event_timestamp = log_stream.get("lastEventTimestamp", 0)
|
|
65
|
+
if last_event_timestamp >= start_time_ms:
|
|
66
|
+
active_streams.append(log_stream_name)
|
|
67
|
+
else:
|
|
68
|
+
break
|
|
69
|
+
if "nextToken" in response:
|
|
70
|
+
stream_params["nextToken"] = response["nextToken"]
|
|
71
|
+
else:
|
|
72
|
+
break
|
|
73
|
+
# Sort and filter streams
|
|
74
|
+
active_streams.sort()
|
|
75
|
+
if stream_filter and active_streams:
|
|
76
|
+
active_streams = [stream for stream in active_streams if stream_filter in stream]
|
|
77
|
+
return active_streams
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def stream_log_events(
|
|
81
|
+
log_group_name: str,
|
|
82
|
+
log_stream_name: str,
|
|
83
|
+
start_time: Optional[datetime] = None,
|
|
84
|
+
end_time: Optional[datetime] = None,
|
|
85
|
+
follow: bool = False,
|
|
86
|
+
client=None,
|
|
87
|
+
) -> Generator[Dict, None, None]:
|
|
88
|
+
"""
|
|
89
|
+
Stream log events from a specific log stream.
|
|
90
|
+
Yields:
|
|
91
|
+
Log events as dictionaries
|
|
92
|
+
"""
|
|
93
|
+
if not client:
|
|
94
|
+
client = get_cloudwatch_client()
|
|
95
|
+
params = {"logGroupName": log_group_name, "logStreamName": log_stream_name, "startFromHead": True}
|
|
96
|
+
if start_time:
|
|
97
|
+
params["startTime"] = int(start_time.timestamp() * 1000)
|
|
98
|
+
if end_time:
|
|
99
|
+
params["endTime"] = int(end_time.timestamp() * 1000)
|
|
100
|
+
next_token = None
|
|
101
|
+
while True:
|
|
102
|
+
if next_token:
|
|
103
|
+
params["nextToken"] = next_token
|
|
104
|
+
params.pop("startTime", None)
|
|
105
|
+
try:
|
|
106
|
+
response = client.get_log_events(**params)
|
|
107
|
+
events = response.get("events", [])
|
|
108
|
+
for event in events:
|
|
109
|
+
event["logStreamName"] = log_stream_name
|
|
110
|
+
yield event
|
|
111
|
+
next_token = response.get("nextForwardToken")
|
|
112
|
+
# Break if no more events or same token
|
|
113
|
+
if not next_token or next_token == params.get("nextToken"):
|
|
114
|
+
if not follow:
|
|
115
|
+
break
|
|
116
|
+
time.sleep(2)
|
|
117
|
+
except client.exceptions.ResourceNotFoundException:
|
|
118
|
+
if not follow:
|
|
119
|
+
break
|
|
120
|
+
time.sleep(2)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def print_log_event(
|
|
124
|
+
event: dict, show_stream: bool = True, local_time: bool = True, custom_format: Optional[str] = None
|
|
125
|
+
):
|
|
126
|
+
"""Print a formatted log event."""
|
|
127
|
+
timestamp = datetime.fromtimestamp(event["timestamp"] / 1000, tz=timezone.utc)
|
|
128
|
+
if local_time:
|
|
129
|
+
timestamp = timestamp.astimezone()
|
|
130
|
+
message = event["message"].rstrip()
|
|
131
|
+
if custom_format:
|
|
132
|
+
# Allow custom formatting
|
|
133
|
+
print(custom_format.format(stream=event.get("logStreamName", ""), time=timestamp, message=message))
|
|
134
|
+
elif show_stream and "logStreamName" in event:
|
|
135
|
+
print(f"[{event['logStreamName']}] [{timestamp:%Y-%m-%d %I:%M%p}] {message}")
|
|
136
|
+
else:
|
|
137
|
+
print(f"[{timestamp:%H:%M:%S}] {message}")
|
|
@@ -4,15 +4,12 @@ import os
|
|
|
4
4
|
import sys
|
|
5
5
|
import platform
|
|
6
6
|
import logging
|
|
7
|
-
import importlib.resources as resources # noqa: F401 Python 3.9 compatibility
|
|
8
7
|
from typing import Any, Dict
|
|
8
|
+
from importlib.resources import files, as_file
|
|
9
9
|
|
|
10
10
|
# Workbench imports
|
|
11
11
|
from workbench.utils.license_manager import LicenseManager
|
|
12
|
-
from
|
|
13
|
-
|
|
14
|
-
# Python 3.9 compatibility
|
|
15
|
-
from workbench.utils.resource_utils import get_resource_path
|
|
12
|
+
from workbench_bridges.utils.execution_environment import running_as_service
|
|
16
13
|
|
|
17
14
|
|
|
18
15
|
class FatalConfigError(Exception):
|
|
@@ -172,8 +169,7 @@ class ConfigManager:
|
|
|
172
169
|
Returns:
|
|
173
170
|
str: The open source API key.
|
|
174
171
|
"""
|
|
175
|
-
|
|
176
|
-
with get_resource_path("workbench.resources", "open_source_api.key") as open_source_key_path:
|
|
172
|
+
with as_file(files("workbench.resources").joinpath("open_source_api.key")) as open_source_key_path:
|
|
177
173
|
with open(open_source_key_path, "r") as key_file:
|
|
178
174
|
return key_file.read().strip()
|
|
179
175
|
|
|
@@ -7,9 +7,7 @@ from typing import Union, Optional
|
|
|
7
7
|
import pandas as pd
|
|
8
8
|
|
|
9
9
|
# Workbench Imports
|
|
10
|
-
from workbench.api
|
|
11
|
-
from workbench.api.model import Model
|
|
12
|
-
from workbench.api.endpoint import Endpoint
|
|
10
|
+
from workbench.api import FeatureSet, Model, Endpoint
|
|
13
11
|
|
|
14
12
|
# Set up the log
|
|
15
13
|
log = logging.getLogger("workbench")
|
|
@@ -77,7 +75,7 @@ def internal_model_data_url(endpoint_config_name: str, session: boto3.Session) -
|
|
|
77
75
|
return None
|
|
78
76
|
|
|
79
77
|
|
|
80
|
-
def
|
|
78
|
+
def get_training_data(end: Endpoint) -> pd.DataFrame:
|
|
81
79
|
"""Code to get the training data from the FeatureSet used to train the Model
|
|
82
80
|
|
|
83
81
|
Args:
|
|
@@ -100,7 +98,7 @@ def fs_training_data(end: Endpoint) -> pd.DataFrame:
|
|
|
100
98
|
return train_df
|
|
101
99
|
|
|
102
100
|
|
|
103
|
-
def
|
|
101
|
+
def get_evaluation_data(end: Endpoint) -> pd.DataFrame:
|
|
104
102
|
"""Code to get the evaluation data from the FeatureSet NOT used for training
|
|
105
103
|
|
|
106
104
|
Args:
|
|
@@ -178,11 +176,11 @@ if __name__ == "__main__":
|
|
|
178
176
|
print(model_data_url)
|
|
179
177
|
|
|
180
178
|
# Get the training data
|
|
181
|
-
my_train_df =
|
|
179
|
+
my_train_df = get_training_data(my_endpoint)
|
|
182
180
|
print(my_train_df)
|
|
183
181
|
|
|
184
182
|
# Get the evaluation data
|
|
185
|
-
my_eval_df =
|
|
183
|
+
my_eval_df = get_evaluation_data(my_endpoint)
|
|
186
184
|
print(my_eval_df)
|
|
187
185
|
|
|
188
186
|
# Backtrack to the FeatureSet
|
|
@@ -6,15 +6,12 @@ import json
|
|
|
6
6
|
import logging
|
|
7
7
|
import requests
|
|
8
8
|
from typing import Union
|
|
9
|
-
import importlib.resources as resources # noqa: F401 Python 3.9 compatibility
|
|
10
9
|
from datetime import datetime
|
|
11
10
|
from cryptography.hazmat.primitives import hashes
|
|
12
11
|
from cryptography.hazmat.primitives.asymmetric import padding
|
|
13
12
|
from cryptography.hazmat.primitives import serialization
|
|
14
13
|
from cryptography.hazmat.backends import default_backend
|
|
15
|
-
|
|
16
|
-
# Python 3.9 compatibility
|
|
17
|
-
from workbench.utils.resource_utils import get_resource_path
|
|
14
|
+
from importlib.resources import files, as_file
|
|
18
15
|
|
|
19
16
|
|
|
20
17
|
class FatalLicenseError(Exception):
|
|
@@ -140,8 +137,7 @@ class LicenseManager:
|
|
|
140
137
|
Returns:
|
|
141
138
|
The public key as an object.
|
|
142
139
|
"""
|
|
143
|
-
|
|
144
|
-
with get_resource_path("workbench.resources", "signature_verify_pub.pem") as public_key_path:
|
|
140
|
+
with as_file(files("workbench.resources").joinpath("signature_verify_pub.pem")) as public_key_path:
|
|
145
141
|
with open(public_key_path, "rb") as key_file:
|
|
146
142
|
public_key_data = key_file.read()
|
|
147
143
|
|