workbench 0.8.166__py3-none-any.whl → 0.8.167__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of workbench might be problematic. Click here for more details.
- workbench/core/cloud_platform/aws/aws_session.py +4 -4
- workbench/model_scripts/xgb_model/generated_model_script.py +5 -5
- workbench/scripts/ml_pipeline_launcher.py +109 -0
- {workbench-0.8.166.dist-info → workbench-0.8.167.dist-info}/METADATA +1 -1
- {workbench-0.8.166.dist-info → workbench-0.8.167.dist-info}/RECORD +9 -8
- {workbench-0.8.166.dist-info → workbench-0.8.167.dist-info}/entry_points.txt +1 -0
- {workbench-0.8.166.dist-info → workbench-0.8.167.dist-info}/WHEEL +0 -0
- {workbench-0.8.166.dist-info → workbench-0.8.167.dist-info}/licenses/LICENSE +0 -0
- {workbench-0.8.166.dist-info → workbench-0.8.167.dist-info}/top_level.txt +0 -0
|
@@ -10,7 +10,7 @@ import logging
|
|
|
10
10
|
|
|
11
11
|
# Workbench Imports
|
|
12
12
|
from workbench.utils.config_manager import ConfigManager
|
|
13
|
-
from workbench_bridges.utils.execution_environment import
|
|
13
|
+
from workbench_bridges.utils.execution_environment import running_as_service
|
|
14
14
|
|
|
15
15
|
# Attempt to import IPython-related utilities
|
|
16
16
|
try:
|
|
@@ -66,10 +66,10 @@ class AWSSession:
|
|
|
66
66
|
return self._cached_boto3_session
|
|
67
67
|
|
|
68
68
|
def _create_boto3_session(self):
|
|
69
|
-
"""Internal: Get the AWS Boto3 Session,
|
|
69
|
+
"""Internal: Get the AWS Boto3 Session, assuming the Workbench Role if necessary."""
|
|
70
70
|
|
|
71
|
-
# Check
|
|
72
|
-
if
|
|
71
|
+
# Check if we're running as a service or already using the Workbench Role
|
|
72
|
+
if running_as_service() or self.is_workbench_role():
|
|
73
73
|
self.log.important("Using the default Boto3 session...")
|
|
74
74
|
return boto3.Session(region_name=self.region)
|
|
75
75
|
|
|
@@ -28,12 +28,12 @@ from typing import List, Tuple
|
|
|
28
28
|
|
|
29
29
|
# Template Parameters
|
|
30
30
|
TEMPLATE_PARAMS = {
|
|
31
|
-
"model_type": "
|
|
32
|
-
"target_column": "
|
|
33
|
-
"features": ['
|
|
31
|
+
"model_type": "classifier",
|
|
32
|
+
"target_column": "class",
|
|
33
|
+
"features": ['bcut2d_logplow', 'numradicalelectrons', 'smr_vsa5', 'fr_lactam', 'fr_morpholine', 'fr_aldehyde', 'slogp_vsa1', 'fr_amidine', 'bpol', 'fr_ester', 'fr_azo', 'kappa3', 'peoe_vsa5', 'fr_ketone_topliss', 'vsa_estate9', 'estate_vsa9', 'bcut2d_mrhi', 'fr_ndealkylation1', 'numrotatablebonds', 'minestateindex', 'fr_quatn', 'peoe_vsa3', 'fr_epoxide', 'fr_aniline', 'minpartialcharge', 'fr_nitroso', 'fpdensitymorgan2', 'fr_oxime', 'fr_sulfone', 'smr_vsa1', 'kappa1', 'fr_pyridine', 'numaromaticrings', 'vsa_estate6', 'molmr', 'estate_vsa1', 'fr_dihydropyridine', 'vsa_estate10', 'fr_alkyl_halide', 'chi2n', 'fr_thiocyan', 'fpdensitymorgan1', 'fr_unbrch_alkane', 'slogp_vsa9', 'chi4n', 'fr_nitro_arom', 'fr_al_oh', 'fr_furan', 'fr_c_s', 'peoe_vsa8', 'peoe_vsa14', 'numheteroatoms', 'fr_ndealkylation2', 'maxabspartialcharge', 'vsa_estate2', 'peoe_vsa7', 'apol', 'numhacceptors', 'fr_tetrazole', 'vsa_estate1', 'peoe_vsa9', 'naromatom', 'bcut2d_chghi', 'fr_sh', 'fr_halogen', 'slogp_vsa4', 'fr_benzodiazepine', 'molwt', 'fr_isocyan', 'fr_prisulfonamd', 'maxabsestateindex', 'minabsestateindex', 'peoe_vsa11', 'slogp_vsa12', 'estate_vsa5', 'numaliphaticcarbocycles', 'bcut2d_mwlow', 'slogp_vsa7', 'fr_allylic_oxid', 'fr_methoxy', 'fr_nh0', 'fr_coo2', 'fr_phenol', 'nacid', 'nbase', 'chi3v', 'fr_ar_nh', 'fr_nitrile', 'fr_imidazole', 'fr_urea', 'bcut2d_mrlow', 'chi1', 'smr_vsa6', 'fr_aryl_methyl', 'narombond', 'fr_alkyl_carbamate', 'fr_piperzine', 'exactmolwt', 'qed', 'chi0n', 'fr_sulfonamd', 'fr_thiazole', 'numvalenceelectrons', 'fr_phos_acid', 'peoe_vsa12', 'fr_nh1', 'fr_hdrzine', 'fr_c_o_nocoo', 'fr_lactone', 'estate_vsa6', 'bcut2d_logphi', 'vsa_estate7', 'peoe_vsa13', 'numsaturatedcarbocycles', 'fr_nitro', 'fr_phenol_noorthohbond', 'rotratio', 'fr_barbitur', 'fr_isothiocyan', 'balabanj', 'fr_arn', 'fr_imine', 'maxpartialcharge', 'fr_sulfide', 'slogp_vsa11', 'fr_hoccn', 'fr_n_o', 'peoe_vsa1', 'slogp_vsa6', 'heavyatommolwt', 'fractioncsp3', 'estate_vsa8', 'peoe_vsa10', 'numaliphaticrings', 'fr_thiophene', 'maxestateindex', 'smr_vsa10', 'labuteasa', 'smr_vsa2', 'fpdensitymorgan3', 'smr_vsa9', 'slogp_vsa10', 'numaromaticheterocycles', 'fr_nh2', 'fr_diazo', 'chi3n', 'fr_ar_coo', 'slogp_vsa5', 'fr_bicyclic', 'fr_amide', 'estate_vsa10', 'fr_guanido', 'chi1n', 'numsaturatedrings', 'fr_piperdine', 'fr_term_acetylene', 'estate_vsa4', 'slogp_vsa3', 'fr_coo', 'fr_ether', 'estate_vsa7', 'bcut2d_chglo', 'fr_oxazole', 'peoe_vsa6', 'hallkieralpha', 'peoe_vsa2', 'chi2v', 'nocount', 'vsa_estate5', 'fr_nhpyrrole', 'fr_al_coo', 'bertzct', 'estate_vsa11', 'minabspartialcharge', 'slogp_vsa8', 'fr_imide', 'kappa2', 'numaliphaticheterocycles', 'numsaturatedheterocycles', 'fr_hdrzone', 'smr_vsa4', 'fr_ar_n', 'nrot', 'smr_vsa8', 'slogp_vsa2', 'chi4v', 'fr_phos_ester', 'fr_para_hydroxylation', 'smr_vsa3', 'nhohcount', 'estate_vsa2', 'mollogp', 'tpsa', 'fr_azide', 'peoe_vsa4', 'numhdonors', 'fr_al_oh_notert', 'fr_c_o', 'chi0', 'fr_nitro_arom_nonortho', 'vsa_estate3', 'fr_benzene', 'fr_ketone', 'vsa_estate8', 'smr_vsa7', 'fr_ar_oh', 'fr_priamide', 'ringcount', 'estate_vsa3', 'numaromaticcarbocycles', 'bcut2d_mwhi', 'chi1v', 'heavyatomcount', 'vsa_estate4', 'chi0v', 'pred_pka_reg'],
|
|
34
34
|
"compressed_features": [],
|
|
35
|
-
"model_metrics_s3_path": "s3://
|
|
36
|
-
"train_all_data":
|
|
35
|
+
"model_metrics_s3_path": "s3://ideaya-sageworks-bucket/models/sol-with-pka-class-100-test/training",
|
|
36
|
+
"train_all_data": True
|
|
37
37
|
}
|
|
38
38
|
|
|
39
39
|
# Function to check if dataframe is empty
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import logging
|
|
3
|
+
import time
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
# Workbench Imports
|
|
8
|
+
from workbench.core.cloud_platform.aws.aws_account_clamp import AWSAccountClamp
|
|
9
|
+
from workbench.utils.config_manager import ConfigManager
|
|
10
|
+
from workbench.utils.s3_utils import upload_content_to_s3
|
|
11
|
+
|
|
12
|
+
log = logging.getLogger("workbench")
|
|
13
|
+
cm = ConfigManager()
|
|
14
|
+
workbench_bucket = cm.get_config("WORKBENCH_BUCKET")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def get_ecr_image_uri() -> str:
|
|
18
|
+
"""Get the ECR image URI for the current region."""
|
|
19
|
+
region = AWSAccountClamp().region
|
|
20
|
+
return f"507740646243.dkr.ecr.{region}.amazonaws.com/aws-ml-images/py312-ml-pipelines:0.1"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_batch_role_arn() -> str:
|
|
24
|
+
"""Get the Batch execution role ARN."""
|
|
25
|
+
account_id = AWSAccountClamp().account_id
|
|
26
|
+
return f"arn:aws:iam::{account_id}:role/Workbench-BatchRole"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def ensure_job_definition():
|
|
30
|
+
"""Ensure the job definition exists with network configuration."""
|
|
31
|
+
batch = AWSAccountClamp().boto3_session.client("batch")
|
|
32
|
+
name = "workbench-ml-pipeline-runner"
|
|
33
|
+
|
|
34
|
+
response = batch.register_job_definition(
|
|
35
|
+
jobDefinitionName=name,
|
|
36
|
+
type="container",
|
|
37
|
+
platformCapabilities=["FARGATE"],
|
|
38
|
+
containerProperties={
|
|
39
|
+
"image": get_ecr_image_uri(),
|
|
40
|
+
"resourceRequirements": [{"type": "VCPU", "value": "2"}, {"type": "MEMORY", "value": "4096"}],
|
|
41
|
+
"jobRoleArn": get_batch_role_arn(),
|
|
42
|
+
"executionRoleArn": get_batch_role_arn(),
|
|
43
|
+
"environment": [{"name": "WORKBENCH_BUCKET", "value": workbench_bucket}],
|
|
44
|
+
"networkConfiguration": {"assignPublicIp": "ENABLED"}, # This is required so the ECR image can be pulled
|
|
45
|
+
},
|
|
46
|
+
timeout={"attemptDurationSeconds": 10800}, # 3 hours
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
log.info(f"Job definition ready: {name} (revision {response['revision']})")
|
|
50
|
+
return name
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def run_batch_job(script_path: str) -> int:
|
|
54
|
+
"""Upload script, submit job, and track to completion."""
|
|
55
|
+
batch = AWSAccountClamp().boto3_session.client("batch")
|
|
56
|
+
script_name = Path(script_path).stem
|
|
57
|
+
|
|
58
|
+
# Upload script to S3
|
|
59
|
+
s3_path = f"s3://{workbench_bucket}/batch-jobs/{Path(script_path).name}"
|
|
60
|
+
log.info(f"Uploading script to {s3_path}")
|
|
61
|
+
upload_content_to_s3(Path(script_path).read_text(), s3_path)
|
|
62
|
+
|
|
63
|
+
# Submit job
|
|
64
|
+
job_name = f"workbench_{script_name}_{datetime.now():%Y%m%d_%H%M%S}"
|
|
65
|
+
response = batch.submit_job(
|
|
66
|
+
jobName=job_name,
|
|
67
|
+
jobQueue="workbench-job-queue",
|
|
68
|
+
jobDefinition=ensure_job_definition(),
|
|
69
|
+
containerOverrides={
|
|
70
|
+
"environment": [
|
|
71
|
+
{"name": "SCRIPT_S3_PATH", "value": s3_path},
|
|
72
|
+
{"name": "WORKBENCH_BUCKET", "value": workbench_bucket},
|
|
73
|
+
]
|
|
74
|
+
},
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
job_id = response["jobId"]
|
|
78
|
+
log.info(f"Submitted job: {job_name} ({job_id})")
|
|
79
|
+
|
|
80
|
+
# Track job to completion
|
|
81
|
+
while True:
|
|
82
|
+
job = batch.describe_jobs(jobs=[job_id])["jobs"][0]
|
|
83
|
+
status = job["status"]
|
|
84
|
+
log.info(f"Job status: {status}")
|
|
85
|
+
|
|
86
|
+
if status in ["SUCCEEDED", "FAILED"]:
|
|
87
|
+
exit_code = job.get("attempts", [{}])[-1].get("exitCode", 1)
|
|
88
|
+
if status == "FAILED":
|
|
89
|
+
log.error(f"Job failed: {job.get('statusReason', 'Unknown reason')}")
|
|
90
|
+
return exit_code
|
|
91
|
+
|
|
92
|
+
time.sleep(30)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def main():
|
|
96
|
+
parser = argparse.ArgumentParser(description="Run ML pipeline script on AWS Batch")
|
|
97
|
+
parser.add_argument("script_file", help="Local path to ML pipeline script")
|
|
98
|
+
args = parser.parse_args()
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
exit_code = run_batch_job(args.script_file)
|
|
102
|
+
exit(exit_code)
|
|
103
|
+
except Exception as e:
|
|
104
|
+
log.error(f"Error: {e}")
|
|
105
|
+
exit(1)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
if __name__ == "__main__":
|
|
109
|
+
main()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: workbench
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.167
|
|
4
4
|
Summary: Workbench: A Dashboard and Python API for creating and deploying AWS SageMaker Model Pipelines
|
|
5
5
|
Author-email: SuperCowPowers LLC <support@supercowpowers.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -65,7 +65,7 @@ workbench/core/cloud_platform/aws/aws_graph_store.py,sha256=ytYxQTplUmeWbsPmxyZb
|
|
|
65
65
|
workbench/core/cloud_platform/aws/aws_meta.py,sha256=xpidYpDydgWmKmJPrNFWbggahDY-nRXzXTRaEA3c5Sc,34587
|
|
66
66
|
workbench/core/cloud_platform/aws/aws_parameter_store.py,sha256=9ekuMOQFHFMIEV68UbHhS_fLB9iqG5Hvu4EV6iamEpk,10400
|
|
67
67
|
workbench/core/cloud_platform/aws/aws_secrets_manager.py,sha256=TUnddp1gX-OwxJ_oO5ONh7OI4Z2HC_6euGkJ-himCCk,8615
|
|
68
|
-
workbench/core/cloud_platform/aws/aws_session.py,sha256=
|
|
68
|
+
workbench/core/cloud_platform/aws/aws_session.py,sha256=2Gc_k4Q87BBeQDgXgVR-w-qmsF6ncZR8wvTeNnixM6k,6926
|
|
69
69
|
workbench/core/cloud_platform/aws/cache_dataframe.py,sha256=VnObkVqcjg7v4fegrIkXR1j-K2AHTBpSAoriUXDe12A,2314
|
|
70
70
|
workbench/core/cloud_platform/azure/README.md,sha256=ciIXZwjtOPYf9ViquFQxjLKuFwje_hZJHJ2hMQghziI,101
|
|
71
71
|
workbench/core/cloud_platform/gcp/README.md,sha256=MzObe3mWQzjviKD2aXlAV9r_bU4HzTJGapWRsFn6pCU,106
|
|
@@ -158,7 +158,7 @@ workbench/model_scripts/quant_regression/requirements.txt,sha256=jWlGc7HH7vqyukT
|
|
|
158
158
|
workbench/model_scripts/scikit_learn/generated_model_script.py,sha256=c73ZpJBlU5k13Nx-ZDkLXu7da40CYyhwjwwmuPq6uLg,12870
|
|
159
159
|
workbench/model_scripts/scikit_learn/requirements.txt,sha256=aVvwiJ3LgBUhM_PyFlb2gHXu_kpGPho3ANBzlOkfcvs,107
|
|
160
160
|
workbench/model_scripts/scikit_learn/scikit_learn.template,sha256=d4pgeZYFezUQsB-7iIsjsUgB1FM6d27651wpfDdXmI0,12640
|
|
161
|
-
workbench/model_scripts/xgb_model/generated_model_script.py,sha256=
|
|
161
|
+
workbench/model_scripts/xgb_model/generated_model_script.py,sha256=IITiaNcB7kqQtBCTvTbWwCb-vAKNeJsbyxBB691sU8U,21091
|
|
162
162
|
workbench/model_scripts/xgb_model/requirements.txt,sha256=jWlGc7HH7vqyukTm38LN4EyDi8jDUPEay4n45z-30uc,104
|
|
163
163
|
workbench/model_scripts/xgb_model/xgb_model.template,sha256=RaUr8X6al5R2IILNKgGUH05Gb4H7AFFG9RE524_VH7Q,17935
|
|
164
164
|
workbench/repl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -167,6 +167,7 @@ workbench/resources/open_source_api.key,sha256=3S0OTblsmC0msUPdE_dbBmI83xJNmYscu
|
|
|
167
167
|
workbench/resources/signature_verify_pub.pem,sha256=V3-u-3_z2PH-805ybkKvzDOBwAbvHxcKn0jLBImEtzM,272
|
|
168
168
|
workbench/scripts/check_double_bond_stereo.py,sha256=p5hnL54Weq77ES0HCELq9JeoM-PyUGkvVSeWYF2dKyo,7776
|
|
169
169
|
workbench/scripts/glue_launcher.py,sha256=bIKQvfGxpAhzbeNvTnHfRW_5kQhY-169_868ZnCejJk,10692
|
|
170
|
+
workbench/scripts/ml_pipeline_launcher.py,sha256=AJF7An1fMdyj3SNPHnGE__NuR89vtPeszqGw6WInGt4,3712
|
|
170
171
|
workbench/scripts/monitor_cloud_watch.py,sha256=5QODOSVmfunf6L-gtK1dhW93z9ZbMy2UEsuyR2tij5E,12463
|
|
171
172
|
workbench/scripts/redis_expire.py,sha256=DxI_RKSNlrW2BsJZXcsSbaWGBgPZdPhtzHjV9SUtElE,1120
|
|
172
173
|
workbench/scripts/redis_report.py,sha256=iaJSuGPyLCs6e0TMcZDoT0YyJ43xJ1u74YD8FLnnUg4,990
|
|
@@ -274,9 +275,9 @@ workbench/web_interface/page_views/main_page.py,sha256=X4-KyGTKLAdxR-Zk2niuLJB2Y
|
|
|
274
275
|
workbench/web_interface/page_views/models_page_view.py,sha256=M0bdC7bAzLyIaE2jviY12FF4abdMFZmg6sFuOY_LaGI,2650
|
|
275
276
|
workbench/web_interface/page_views/page_view.py,sha256=Gh6YnpOGlUejx-bHZAf5pzqoQ1H1R0OSwOpGhOBO06w,455
|
|
276
277
|
workbench/web_interface/page_views/pipelines_page_view.py,sha256=v2pxrIbsHBcYiblfius3JK766NZ7ciD2yPx0t3E5IJo,2656
|
|
277
|
-
workbench-0.8.
|
|
278
|
-
workbench-0.8.
|
|
279
|
-
workbench-0.8.
|
|
280
|
-
workbench-0.8.
|
|
281
|
-
workbench-0.8.
|
|
282
|
-
workbench-0.8.
|
|
278
|
+
workbench-0.8.167.dist-info/licenses/LICENSE,sha256=z4QMMPlLJkZjU8VOKqJkZiQZCEZ--saIU2Z8-p3aVc0,1080
|
|
279
|
+
workbench-0.8.167.dist-info/METADATA,sha256=RYkpkQ2sNwdHY4IPmFBH_3UcW-zKAZaonN2hVc_FTuc,9210
|
|
280
|
+
workbench-0.8.167.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
281
|
+
workbench-0.8.167.dist-info/entry_points.txt,sha256=V_v6hQ4DYoCJnTnqbm036reCri_CXkA_ONcRSuF5OKg,305
|
|
282
|
+
workbench-0.8.167.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
|
|
283
|
+
workbench-0.8.167.dist-info/RECORD,,
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
[console_scripts]
|
|
2
2
|
cloud_watch = workbench.scripts.monitor_cloud_watch:main
|
|
3
3
|
glue_launcher = workbench.scripts.glue_launcher:main
|
|
4
|
+
ml_pipeline_launcher = workbench.scripts.ml_pipeline_launcher:main
|
|
4
5
|
workbench = workbench.repl.workbench_shell:launch_shell
|
|
5
6
|
workbench_config = workbench.scripts.show_config:main
|
|
File without changes
|
|
File without changes
|
|
File without changes
|