workbench 0.8.168__py3-none-any.whl → 0.8.169__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of workbench might be problematic. Click here for more details.

@@ -27,60 +27,56 @@ def get_batch_role_arn() -> str:
27
27
  return f"arn:aws:iam::{account_id}:role/Workbench-BatchRole"
28
28
 
29
29
 
30
- def ensure_job_definition():
31
- """Register or update the Batch job definition for ML pipeline runner."""
32
- batch = AWSAccountClamp().boto3_session.client("batch")
33
- name = "workbench-ml-pipeline-runner"
34
- response = batch.register_job_definition(
35
- jobDefinitionName=name,
36
- type="container",
37
- platformCapabilities=["FARGATE"],
38
- containerProperties={
39
- "image": get_ecr_image_uri(),
40
- "resourceRequirements": [{"type": "VCPU", "value": "2"}, {"type": "MEMORY", "value": "4096"}],
41
- "jobRoleArn": get_batch_role_arn(),
42
- "executionRoleArn": get_batch_role_arn(),
43
- "environment": [
44
- {"name": "WORKBENCH_BUCKET", "value": workbench_bucket},
45
- {"name": "PYTHONUNBUFFERED", "value": "1"},
46
- ],
47
- # "networkConfiguration": {"assignPublicIp": "ENABLED"}, # Required for ECR Image Pull (when not in VPC)
48
- },
49
- timeout={"attemptDurationSeconds": 10800}, # 3 hours
50
- )
51
- log.info(f"Job definition ready: {name} (revision {response['revision']})")
52
- return name
30
+ def _log_cloudwatch_link(job: dict, message_prefix: str = "View logs") -> None:
31
+ """
32
+ Helper method to log CloudWatch logs link with clickable URL and full URL display.
33
+
34
+ Args:
35
+ job: Batch job description dictionary
36
+ message_prefix: Prefix for the log message (default: "View logs")
37
+ """
38
+ log_stream = job.get("container", {}).get("logStreamName")
39
+ logs_url = get_cloudwatch_logs_url(log_group="/aws/batch/job", log_stream=log_stream)
40
+ if logs_url:
41
+ clickable_url = f"\033]8;;{logs_url}\033\\{logs_url}\033]8;;\033\\"
42
+ log.info(f"{message_prefix}: {clickable_url}")
43
+ else:
44
+ log.info("Check AWS Batch console for logs")
53
45
 
54
46
 
55
- def run_batch_job(script_path: str) -> int:
47
+ def run_batch_job(script_path: str, size: str = "small") -> int:
56
48
  """
57
49
  Submit and monitor an AWS Batch job for ML pipeline execution.
58
- This function:
59
- 1. Uploads the ML pipeline script to S3
60
- 2. Submits a Batch job to run the script in a container
61
- 3. Monitors job status until completion
62
- 4. Returns the job's exit code
50
+
51
+ Uploads script to S3, submits Batch job, monitors until completion or 2 minutes of RUNNING.
63
52
 
64
53
  Args:
65
54
  script_path: Local path to the ML pipeline script
55
+ size: Job size tier - "small" (default), "medium", or "large"
56
+ - small: 2 vCPU, 4GB RAM for lightweight processing
57
+ - medium: 4 vCPU, 8GB RAM for standard ML workloads
58
+ - large: 8 vCPU, 16GB RAM for heavy training/inference
66
59
 
67
60
  Returns:
68
- Exit code from the batch job (0 for success, non-zero for failure)
61
+ Exit code (0 for success/disconnected, non-zero for failure)
69
62
  """
63
+ if size not in ["small", "medium", "large"]:
64
+ raise ValueError(f"Invalid size '{size}'. Must be 'small', 'medium', or 'large'")
65
+
70
66
  batch = AWSAccountClamp().boto3_session.client("batch")
71
67
  script_name = Path(script_path).stem
72
68
 
73
- # Upload script to S3 for the container to download
69
+ # Upload script to S3
74
70
  s3_path = f"s3://{workbench_bucket}/batch-jobs/{Path(script_path).name}"
75
71
  log.info(f"Uploading script to {s3_path}")
76
72
  upload_content_to_s3(Path(script_path).read_text(), s3_path)
77
73
 
78
- # Submit the Batch job
74
+ # Submit job
79
75
  job_name = f"workbench_{script_name}_{datetime.now():%Y%m%d_%H%M%S}"
80
76
  response = batch.submit_job(
81
77
  jobName=job_name,
82
78
  jobQueue="workbench-job-queue",
83
- jobDefinition=ensure_job_definition(),
79
+ jobDefinition=f"workbench-ml-pipeline-{size}",
84
80
  containerOverrides={
85
81
  "environment": [
86
82
  {"name": "ML_PIPELINE_S3_PATH", "value": s3_path},
@@ -89,36 +85,38 @@ def run_batch_job(script_path: str) -> int:
89
85
  },
90
86
  )
91
87
  job_id = response["jobId"]
92
- log.info(f"Submitted job: {job_name} ({job_id})")
88
+ log.info(f"Submitted job: {job_name} ({job_id}) using {size} tier")
93
89
 
94
- # Monitor job execution
95
- last_status = None
90
+ # Monitor job
91
+ last_status, running_start = None, None
96
92
  while True:
97
- # Check job status
98
93
  job = batch.describe_jobs(jobs=[job_id])["jobs"][0]
99
94
  status = job["status"]
95
+
100
96
  if status != last_status:
101
97
  log.info(f"Job status: {status}")
102
98
  last_status = status
99
+ if status == "RUNNING":
100
+ running_start = time.time()
101
+
102
+ # Disconnect after 2 minutes of running
103
+ if status == "RUNNING" and running_start and (time.time() - running_start >= 120):
104
+ log.info("✅ ML Pipeline is running successfully!")
105
+ _log_cloudwatch_link(job, "📊 Monitor logs")
106
+ return 0
103
107
 
104
- # Check if job completed
108
+ # Handle completion
105
109
  if status in ["SUCCEEDED", "FAILED"]:
106
110
  exit_code = job.get("attempts", [{}])[-1].get("exitCode", 1)
107
- if status == "FAILED":
108
- log.error(f"Job failed: {job.get('statusReason', 'Unknown reason')}")
109
- else:
110
- log.info("Job completed successfully")
111
-
112
- # Get CloudWatch logs URL
113
- log_stream_name = job.get("container", {}).get("logStreamName")
114
- logs_url = get_cloudwatch_logs_url(log_group="/aws/batch/job", log_stream=log_stream_name)
115
- if logs_url:
116
- # OSC 8 hyperlink format for modern terminals
117
- clickable_url = f"\033]8;;{logs_url}\033\\{logs_url}\033]8;;\033\\"
118
- log.info(f"View logs: {clickable_url}")
111
+ msg = (
112
+ "Job completed successfully"
113
+ if status == "SUCCEEDED"
114
+ else f"Job failed: {job.get('statusReason', 'Unknown')}"
115
+ )
116
+ log.info(msg) if status == "SUCCEEDED" else log.error(msg)
117
+ _log_cloudwatch_link(job)
119
118
  return exit_code
120
119
 
121
- # Sleep a bit before next status check
122
120
  time.sleep(10)
123
121
 
124
122
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: workbench
3
- Version: 0.8.168
3
+ Version: 0.8.169
4
4
  Summary: Workbench: A Dashboard and Python API for creating and deploying AWS SageMaker Model Pipelines
5
5
  Author-email: SuperCowPowers LLC <support@supercowpowers.com>
6
6
  License-Expression: MIT
@@ -167,7 +167,7 @@ workbench/resources/open_source_api.key,sha256=3S0OTblsmC0msUPdE_dbBmI83xJNmYscu
167
167
  workbench/resources/signature_verify_pub.pem,sha256=V3-u-3_z2PH-805ybkKvzDOBwAbvHxcKn0jLBImEtzM,272
168
168
  workbench/scripts/check_double_bond_stereo.py,sha256=p5hnL54Weq77ES0HCELq9JeoM-PyUGkvVSeWYF2dKyo,7776
169
169
  workbench/scripts/glue_launcher.py,sha256=bIKQvfGxpAhzbeNvTnHfRW_5kQhY-169_868ZnCejJk,10692
170
- workbench/scripts/ml_pipeline_launcher.py,sha256=RUKUBERL7RE-uNs_ttkPUa6Rf-QJERYWIhp_XLOtF78,5083
170
+ workbench/scripts/ml_pipeline_launcher.py,sha256=fjI35SXi9CDSQ6Lan7qGcLAHkVCDioyhbPlo0eDHDxQ,4913
171
171
  workbench/scripts/monitor_cloud_watch.py,sha256=s7MY4bsHts0nup9G0lWESCvgJZ9Mw1Eo-c8aKRgLjMw,9235
172
172
  workbench/scripts/redis_expire.py,sha256=DxI_RKSNlrW2BsJZXcsSbaWGBgPZdPhtzHjV9SUtElE,1120
173
173
  workbench/scripts/redis_report.py,sha256=iaJSuGPyLCs6e0TMcZDoT0YyJ43xJ1u74YD8FLnnUg4,990
@@ -276,9 +276,9 @@ workbench/web_interface/page_views/main_page.py,sha256=X4-KyGTKLAdxR-Zk2niuLJB2Y
276
276
  workbench/web_interface/page_views/models_page_view.py,sha256=M0bdC7bAzLyIaE2jviY12FF4abdMFZmg6sFuOY_LaGI,2650
277
277
  workbench/web_interface/page_views/page_view.py,sha256=Gh6YnpOGlUejx-bHZAf5pzqoQ1H1R0OSwOpGhOBO06w,455
278
278
  workbench/web_interface/page_views/pipelines_page_view.py,sha256=v2pxrIbsHBcYiblfius3JK766NZ7ciD2yPx0t3E5IJo,2656
279
- workbench-0.8.168.dist-info/licenses/LICENSE,sha256=z4QMMPlLJkZjU8VOKqJkZiQZCEZ--saIU2Z8-p3aVc0,1080
280
- workbench-0.8.168.dist-info/METADATA,sha256=PTFR16ft5NCrG-_umsJKrIJSa3eLnpju1EkXZmafxxM,9210
281
- workbench-0.8.168.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
282
- workbench-0.8.168.dist-info/entry_points.txt,sha256=V_v6hQ4DYoCJnTnqbm036reCri_CXkA_ONcRSuF5OKg,305
283
- workbench-0.8.168.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
284
- workbench-0.8.168.dist-info/RECORD,,
279
+ workbench-0.8.169.dist-info/licenses/LICENSE,sha256=z4QMMPlLJkZjU8VOKqJkZiQZCEZ--saIU2Z8-p3aVc0,1080
280
+ workbench-0.8.169.dist-info/METADATA,sha256=AoOujKSh6ueEHjNLcz8g5UMWZr5bGZrzk-ycBaw62n0,9210
281
+ workbench-0.8.169.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
282
+ workbench-0.8.169.dist-info/entry_points.txt,sha256=V_v6hQ4DYoCJnTnqbm036reCri_CXkA_ONcRSuF5OKg,305
283
+ workbench-0.8.169.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
284
+ workbench-0.8.169.dist-info/RECORD,,