workbench 0.8.162__py3-none-any.whl → 0.8.220__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of workbench might be problematic. Click here for more details.

Files changed (147) hide show
  1. workbench/algorithms/dataframe/__init__.py +1 -2
  2. workbench/algorithms/dataframe/compound_dataset_overlap.py +321 -0
  3. workbench/algorithms/dataframe/feature_space_proximity.py +168 -75
  4. workbench/algorithms/dataframe/fingerprint_proximity.py +422 -86
  5. workbench/algorithms/dataframe/projection_2d.py +44 -21
  6. workbench/algorithms/dataframe/proximity.py +259 -305
  7. workbench/algorithms/graph/light/proximity_graph.py +14 -12
  8. workbench/algorithms/models/cleanlab_model.py +382 -0
  9. workbench/algorithms/models/noise_model.py +388 -0
  10. workbench/algorithms/sql/outliers.py +3 -3
  11. workbench/api/__init__.py +5 -1
  12. workbench/api/compound.py +1 -1
  13. workbench/api/df_store.py +17 -108
  14. workbench/api/endpoint.py +18 -5
  15. workbench/api/feature_set.py +121 -15
  16. workbench/api/meta.py +5 -2
  17. workbench/api/meta_model.py +289 -0
  18. workbench/api/model.py +55 -21
  19. workbench/api/monitor.py +1 -16
  20. workbench/api/parameter_store.py +3 -52
  21. workbench/cached/cached_model.py +4 -4
  22. workbench/core/artifacts/__init__.py +11 -2
  23. workbench/core/artifacts/artifact.py +16 -8
  24. workbench/core/artifacts/data_capture_core.py +355 -0
  25. workbench/core/artifacts/df_store_core.py +114 -0
  26. workbench/core/artifacts/endpoint_core.py +382 -253
  27. workbench/core/artifacts/feature_set_core.py +249 -45
  28. workbench/core/artifacts/model_core.py +135 -80
  29. workbench/core/artifacts/monitor_core.py +33 -248
  30. workbench/core/artifacts/parameter_store_core.py +98 -0
  31. workbench/core/cloud_platform/aws/aws_account_clamp.py +50 -1
  32. workbench/core/cloud_platform/aws/aws_meta.py +12 -5
  33. workbench/core/cloud_platform/aws/aws_session.py +4 -4
  34. workbench/core/pipelines/pipeline_executor.py +1 -1
  35. workbench/core/transforms/data_to_features/light/molecular_descriptors.py +4 -4
  36. workbench/core/transforms/features_to_model/features_to_model.py +62 -40
  37. workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +76 -15
  38. workbench/core/transforms/pandas_transforms/pandas_to_features.py +38 -2
  39. workbench/core/views/training_view.py +113 -42
  40. workbench/core/views/view.py +53 -3
  41. workbench/core/views/view_utils.py +4 -4
  42. workbench/model_script_utils/model_script_utils.py +339 -0
  43. workbench/model_script_utils/pytorch_utils.py +405 -0
  44. workbench/model_script_utils/uq_harness.py +278 -0
  45. workbench/model_scripts/chemprop/chemprop.template +649 -0
  46. workbench/model_scripts/chemprop/generated_model_script.py +649 -0
  47. workbench/model_scripts/chemprop/model_script_utils.py +339 -0
  48. workbench/model_scripts/chemprop/requirements.txt +3 -0
  49. workbench/model_scripts/custom_models/chem_info/fingerprints.py +175 -0
  50. workbench/model_scripts/custom_models/chem_info/mol_descriptors.py +483 -0
  51. workbench/model_scripts/custom_models/chem_info/mol_standardize.py +450 -0
  52. workbench/model_scripts/custom_models/chem_info/molecular_descriptors.py +7 -9
  53. workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py +1 -1
  54. workbench/model_scripts/custom_models/proximity/feature_space_proximity.py +194 -0
  55. workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +8 -10
  56. workbench/model_scripts/custom_models/uq_models/bayesian_ridge.template +7 -8
  57. workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +20 -21
  58. workbench/model_scripts/custom_models/uq_models/feature_space_proximity.py +194 -0
  59. workbench/model_scripts/custom_models/uq_models/gaussian_process.template +5 -11
  60. workbench/model_scripts/custom_models/uq_models/ngboost.template +30 -18
  61. workbench/model_scripts/custom_models/uq_models/requirements.txt +1 -3
  62. workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +15 -17
  63. workbench/model_scripts/meta_model/generated_model_script.py +209 -0
  64. workbench/model_scripts/meta_model/meta_model.template +209 -0
  65. workbench/model_scripts/pytorch_model/generated_model_script.py +444 -500
  66. workbench/model_scripts/pytorch_model/model_script_utils.py +339 -0
  67. workbench/model_scripts/pytorch_model/pytorch.template +440 -496
  68. workbench/model_scripts/pytorch_model/pytorch_utils.py +405 -0
  69. workbench/model_scripts/pytorch_model/requirements.txt +1 -1
  70. workbench/model_scripts/pytorch_model/uq_harness.py +278 -0
  71. workbench/model_scripts/scikit_learn/generated_model_script.py +7 -12
  72. workbench/model_scripts/scikit_learn/scikit_learn.template +4 -9
  73. workbench/model_scripts/script_generation.py +20 -11
  74. workbench/model_scripts/uq_models/generated_model_script.py +248 -0
  75. workbench/model_scripts/xgb_model/generated_model_script.py +372 -404
  76. workbench/model_scripts/xgb_model/model_script_utils.py +339 -0
  77. workbench/model_scripts/xgb_model/uq_harness.py +278 -0
  78. workbench/model_scripts/xgb_model/xgb_model.template +369 -401
  79. workbench/repl/workbench_shell.py +28 -19
  80. workbench/resources/open_source_api.key +1 -1
  81. workbench/scripts/endpoint_test.py +162 -0
  82. workbench/scripts/lambda_test.py +73 -0
  83. workbench/scripts/meta_model_sim.py +35 -0
  84. workbench/scripts/ml_pipeline_batch.py +137 -0
  85. workbench/scripts/ml_pipeline_sqs.py +186 -0
  86. workbench/scripts/monitor_cloud_watch.py +20 -100
  87. workbench/scripts/training_test.py +85 -0
  88. workbench/utils/aws_utils.py +4 -3
  89. workbench/utils/chem_utils/__init__.py +0 -0
  90. workbench/utils/chem_utils/fingerprints.py +175 -0
  91. workbench/utils/chem_utils/misc.py +194 -0
  92. workbench/utils/chem_utils/mol_descriptors.py +483 -0
  93. workbench/utils/chem_utils/mol_standardize.py +450 -0
  94. workbench/utils/chem_utils/mol_tagging.py +348 -0
  95. workbench/utils/chem_utils/projections.py +219 -0
  96. workbench/utils/chem_utils/salts.py +256 -0
  97. workbench/utils/chem_utils/sdf.py +292 -0
  98. workbench/utils/chem_utils/toxicity.py +250 -0
  99. workbench/utils/chem_utils/vis.py +253 -0
  100. workbench/utils/chemprop_utils.py +141 -0
  101. workbench/utils/cloudwatch_handler.py +1 -1
  102. workbench/utils/cloudwatch_utils.py +137 -0
  103. workbench/utils/config_manager.py +3 -7
  104. workbench/utils/endpoint_utils.py +5 -7
  105. workbench/utils/license_manager.py +2 -6
  106. workbench/utils/meta_model_simulator.py +499 -0
  107. workbench/utils/metrics_utils.py +256 -0
  108. workbench/utils/model_utils.py +278 -79
  109. workbench/utils/monitor_utils.py +44 -62
  110. workbench/utils/pandas_utils.py +3 -3
  111. workbench/utils/pytorch_utils.py +87 -0
  112. workbench/utils/shap_utils.py +11 -57
  113. workbench/utils/workbench_logging.py +0 -3
  114. workbench/utils/workbench_sqs.py +1 -1
  115. workbench/utils/xgboost_local_crossfold.py +267 -0
  116. workbench/utils/xgboost_model_utils.py +127 -219
  117. workbench/web_interface/components/model_plot.py +14 -2
  118. workbench/web_interface/components/plugin_unit_test.py +5 -2
  119. workbench/web_interface/components/plugins/dashboard_status.py +3 -1
  120. workbench/web_interface/components/plugins/generated_compounds.py +1 -1
  121. workbench/web_interface/components/plugins/model_details.py +38 -74
  122. workbench/web_interface/components/plugins/scatter_plot.py +6 -10
  123. {workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/METADATA +31 -9
  124. {workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/RECORD +128 -96
  125. workbench-0.8.220.dist-info/entry_points.txt +11 -0
  126. {workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/licenses/LICENSE +1 -1
  127. workbench/core/cloud_platform/aws/aws_df_store.py +0 -404
  128. workbench/core/cloud_platform/aws/aws_parameter_store.py +0 -280
  129. workbench/model_scripts/custom_models/chem_info/local_utils.py +0 -769
  130. workbench/model_scripts/custom_models/chem_info/tautomerize.py +0 -83
  131. workbench/model_scripts/custom_models/meta_endpoints/example.py +0 -53
  132. workbench/model_scripts/custom_models/proximity/generated_model_script.py +0 -138
  133. workbench/model_scripts/custom_models/proximity/proximity.py +0 -384
  134. workbench/model_scripts/custom_models/uq_models/generated_model_script.py +0 -393
  135. workbench/model_scripts/custom_models/uq_models/mapie_xgb.template +0 -203
  136. workbench/model_scripts/custom_models/uq_models/meta_uq.template +0 -273
  137. workbench/model_scripts/custom_models/uq_models/proximity.py +0 -384
  138. workbench/model_scripts/ensemble_xgb/generated_model_script.py +0 -279
  139. workbench/model_scripts/quant_regression/quant_regression.template +0 -279
  140. workbench/model_scripts/quant_regression/requirements.txt +0 -1
  141. workbench/utils/chem_utils.py +0 -1556
  142. workbench/utils/execution_environment.py +0 -211
  143. workbench/utils/fast_inference.py +0 -167
  144. workbench/utils/resource_utils.py +0 -39
  145. workbench-0.8.162.dist-info/entry_points.txt +0 -5
  146. {workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/WHEEL +0 -0
  147. {workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,137 @@
1
+ """AWS CloudWatch utility functions for Workbench."""
2
+
3
+ import time
4
+ import logging
5
+ from datetime import datetime, timezone
6
+ from typing import List, Optional, Dict, Generator
7
+ from urllib.parse import quote
8
+ from workbench.core.cloud_platform.aws.aws_account_clamp import AWSAccountClamp
9
+
10
+ log = logging.getLogger("workbench")
11
+
12
+
13
+ def get_cloudwatch_client():
14
+ """Get the CloudWatch Logs client using the Workbench assumed role session."""
15
+ session = AWSAccountClamp().boto3_session
16
+ return session.client("logs")
17
+
18
+
19
+ def get_cloudwatch_logs_url(log_group: str, log_stream: str) -> Optional[str]:
20
+ """
21
+ Generate CloudWatch logs URL for the specified log group and stream.
22
+
23
+ Args:
24
+ log_group: Log group name (e.g., '/aws/batch/job')
25
+ log_stream: Log stream name
26
+
27
+ Returns:
28
+ CloudWatch console URL or None if unable to generate
29
+ """
30
+ try:
31
+ region = AWSAccountClamp().region
32
+
33
+ # URL encode the log group and stream
34
+ encoded_group = quote(log_group, safe="")
35
+ encoded_stream = quote(log_stream, safe="")
36
+
37
+ return (
38
+ f"https://{region}.console.aws.amazon.com/cloudwatch/home?"
39
+ f"region={region}#logsV2:log-groups/log-group/{encoded_group}"
40
+ f"/log-events/{encoded_stream}"
41
+ )
42
+ except Exception as e: # noqa: BLE001
43
+ log.warning(f"Failed to generate CloudWatch logs URL: {e}")
44
+ return None
45
+
46
+
47
+ def get_active_log_streams(
48
+ log_group_name: str, start_time_ms: int, stream_filter: Optional[str] = None, client=None
49
+ ) -> List[str]:
50
+ """Retrieve log streams that have events after the specified start time."""
51
+ if not client:
52
+ client = get_cloudwatch_client()
53
+ active_streams = []
54
+ stream_params = {
55
+ "logGroupName": log_group_name,
56
+ "orderBy": "LastEventTime",
57
+ "descending": True,
58
+ }
59
+ while True:
60
+ response = client.describe_log_streams(**stream_params)
61
+ log_streams = response.get("logStreams", [])
62
+ for log_stream in log_streams:
63
+ log_stream_name = log_stream["logStreamName"]
64
+ last_event_timestamp = log_stream.get("lastEventTimestamp", 0)
65
+ if last_event_timestamp >= start_time_ms:
66
+ active_streams.append(log_stream_name)
67
+ else:
68
+ break
69
+ if "nextToken" in response:
70
+ stream_params["nextToken"] = response["nextToken"]
71
+ else:
72
+ break
73
+ # Sort and filter streams
74
+ active_streams.sort()
75
+ if stream_filter and active_streams:
76
+ active_streams = [stream for stream in active_streams if stream_filter in stream]
77
+ return active_streams
78
+
79
+
80
+ def stream_log_events(
81
+ log_group_name: str,
82
+ log_stream_name: str,
83
+ start_time: Optional[datetime] = None,
84
+ end_time: Optional[datetime] = None,
85
+ follow: bool = False,
86
+ client=None,
87
+ ) -> Generator[Dict, None, None]:
88
+ """
89
+ Stream log events from a specific log stream.
90
+ Yields:
91
+ Log events as dictionaries
92
+ """
93
+ if not client:
94
+ client = get_cloudwatch_client()
95
+ params = {"logGroupName": log_group_name, "logStreamName": log_stream_name, "startFromHead": True}
96
+ if start_time:
97
+ params["startTime"] = int(start_time.timestamp() * 1000)
98
+ if end_time:
99
+ params["endTime"] = int(end_time.timestamp() * 1000)
100
+ next_token = None
101
+ while True:
102
+ if next_token:
103
+ params["nextToken"] = next_token
104
+ params.pop("startTime", None)
105
+ try:
106
+ response = client.get_log_events(**params)
107
+ events = response.get("events", [])
108
+ for event in events:
109
+ event["logStreamName"] = log_stream_name
110
+ yield event
111
+ next_token = response.get("nextForwardToken")
112
+ # Break if no more events or same token
113
+ if not next_token or next_token == params.get("nextToken"):
114
+ if not follow:
115
+ break
116
+ time.sleep(2)
117
+ except client.exceptions.ResourceNotFoundException:
118
+ if not follow:
119
+ break
120
+ time.sleep(2)
121
+
122
+
123
+ def print_log_event(
124
+ event: dict, show_stream: bool = True, local_time: bool = True, custom_format: Optional[str] = None
125
+ ):
126
+ """Print a formatted log event."""
127
+ timestamp = datetime.fromtimestamp(event["timestamp"] / 1000, tz=timezone.utc)
128
+ if local_time:
129
+ timestamp = timestamp.astimezone()
130
+ message = event["message"].rstrip()
131
+ if custom_format:
132
+ # Allow custom formatting
133
+ print(custom_format.format(stream=event.get("logStreamName", ""), time=timestamp, message=message))
134
+ elif show_stream and "logStreamName" in event:
135
+ print(f"[{event['logStreamName']}] [{timestamp:%Y-%m-%d %I:%M%p}] {message}")
136
+ else:
137
+ print(f"[{timestamp:%H:%M:%S}] {message}")
@@ -4,15 +4,12 @@ import os
4
4
  import sys
5
5
  import platform
6
6
  import logging
7
- import importlib.resources as resources # noqa: F401 Python 3.9 compatibility
8
7
  from typing import Any, Dict
8
+ from importlib.resources import files, as_file
9
9
 
10
10
  # Workbench imports
11
11
  from workbench.utils.license_manager import LicenseManager
12
- from workbench.utils.execution_environment import running_as_service
13
-
14
- # Python 3.9 compatibility
15
- from workbench.utils.resource_utils import get_resource_path
12
+ from workbench_bridges.utils.execution_environment import running_as_service
16
13
 
17
14
 
18
15
  class FatalConfigError(Exception):
@@ -172,8 +169,7 @@ class ConfigManager:
172
169
  Returns:
173
170
  str: The open source API key.
174
171
  """
175
- # Python 3.9 compatibility
176
- with get_resource_path("workbench.resources", "open_source_api.key") as open_source_key_path:
172
+ with as_file(files("workbench.resources").joinpath("open_source_api.key")) as open_source_key_path:
177
173
  with open(open_source_key_path, "r") as key_file:
178
174
  return key_file.read().strip()
179
175
 
@@ -7,9 +7,7 @@ from typing import Union, Optional
7
7
  import pandas as pd
8
8
 
9
9
  # Workbench Imports
10
- from workbench.api.feature_set import FeatureSet
11
- from workbench.api.model import Model
12
- from workbench.api.endpoint import Endpoint
10
+ from workbench.api import FeatureSet, Model, Endpoint
13
11
 
14
12
  # Set up the log
15
13
  log = logging.getLogger("workbench")
@@ -77,7 +75,7 @@ def internal_model_data_url(endpoint_config_name: str, session: boto3.Session) -
77
75
  return None
78
76
 
79
77
 
80
- def fs_training_data(end: Endpoint) -> pd.DataFrame:
78
+ def get_training_data(end: Endpoint) -> pd.DataFrame:
81
79
  """Code to get the training data from the FeatureSet used to train the Model
82
80
 
83
81
  Args:
@@ -100,7 +98,7 @@ def fs_training_data(end: Endpoint) -> pd.DataFrame:
100
98
  return train_df
101
99
 
102
100
 
103
- def fs_evaluation_data(end: Endpoint) -> pd.DataFrame:
101
+ def get_evaluation_data(end: Endpoint) -> pd.DataFrame:
104
102
  """Code to get the evaluation data from the FeatureSet NOT used for training
105
103
 
106
104
  Args:
@@ -178,11 +176,11 @@ if __name__ == "__main__":
178
176
  print(model_data_url)
179
177
 
180
178
  # Get the training data
181
- my_train_df = fs_training_data(my_endpoint)
179
+ my_train_df = get_training_data(my_endpoint)
182
180
  print(my_train_df)
183
181
 
184
182
  # Get the evaluation data
185
- my_eval_df = fs_evaluation_data(my_endpoint)
183
+ my_eval_df = get_evaluation_data(my_endpoint)
186
184
  print(my_eval_df)
187
185
 
188
186
  # Backtrack to the FeatureSet
@@ -6,15 +6,12 @@ import json
6
6
  import logging
7
7
  import requests
8
8
  from typing import Union
9
- import importlib.resources as resources # noqa: F401 Python 3.9 compatibility
10
9
  from datetime import datetime
11
10
  from cryptography.hazmat.primitives import hashes
12
11
  from cryptography.hazmat.primitives.asymmetric import padding
13
12
  from cryptography.hazmat.primitives import serialization
14
13
  from cryptography.hazmat.backends import default_backend
15
-
16
- # Python 3.9 compatibility
17
- from workbench.utils.resource_utils import get_resource_path
14
+ from importlib.resources import files, as_file
18
15
 
19
16
 
20
17
  class FatalLicenseError(Exception):
@@ -140,8 +137,7 @@ class LicenseManager:
140
137
  Returns:
141
138
  The public key as an object.
142
139
  """
143
- # Python 3.9 compatibility
144
- with get_resource_path("workbench.resources", "signature_verify_pub.pem") as public_key_path:
140
+ with as_file(files("workbench.resources").joinpath("signature_verify_pub.pem")) as public_key_path:
145
141
  with open(public_key_path, "rb") as key_file:
146
142
  public_key_data = key_file.read()
147
143