workbench 0.8.165__py3-none-any.whl → 0.8.166__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of workbench might be problematic. Click here for more details.
- workbench/api/meta.py +5 -2
- workbench/core/artifacts/monitor_core.py +31 -30
- workbench/core/cloud_platform/aws/aws_meta.py +1 -1
- workbench/core/cloud_platform/aws/aws_session.py +1 -1
- workbench/repl/workbench_shell.py +1 -1
- workbench/utils/aws_utils.py +4 -3
- workbench/utils/cloudwatch_handler.py +1 -1
- workbench/utils/config_manager.py +1 -1
- {workbench-0.8.165.dist-info → workbench-0.8.166.dist-info}/METADATA +2 -2
- {workbench-0.8.165.dist-info → workbench-0.8.166.dist-info}/RECORD +14 -15
- workbench/utils/execution_environment.py +0 -211
- {workbench-0.8.165.dist-info → workbench-0.8.166.dist-info}/WHEEL +0 -0
- {workbench-0.8.165.dist-info → workbench-0.8.166.dist-info}/entry_points.txt +0 -0
- {workbench-0.8.165.dist-info → workbench-0.8.166.dist-info}/licenses/LICENSE +0 -0
- {workbench-0.8.165.dist-info → workbench-0.8.166.dist-info}/top_level.txt +0 -0
workbench/api/meta.py
CHANGED
|
@@ -113,13 +113,16 @@ class Meta(CloudMeta):
|
|
|
113
113
|
"""
|
|
114
114
|
return super().models(details=details)
|
|
115
115
|
|
|
116
|
-
def endpoints(self) -> pd.DataFrame:
|
|
116
|
+
def endpoints(self, details: bool = False) -> pd.DataFrame:
|
|
117
117
|
"""Get a summary of the Endpoints deployed in the Cloud Platform
|
|
118
118
|
|
|
119
|
+
Args:
|
|
120
|
+
details (bool, optional): Include detailed information. Defaults to False.
|
|
121
|
+
|
|
119
122
|
Returns:
|
|
120
123
|
pd.DataFrame: A summary of the Endpoints in the Cloud Platform
|
|
121
124
|
"""
|
|
122
|
-
return super().endpoints()
|
|
125
|
+
return super().endpoints(details=details)
|
|
123
126
|
|
|
124
127
|
def pipelines(self) -> pd.DataFrame:
|
|
125
128
|
"""Get a summary of the ML Pipelines deployed in the Cloud Platform
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import json
|
|
5
|
+
import re
|
|
6
|
+
from datetime import datetime
|
|
5
7
|
from typing import Union, Tuple
|
|
6
8
|
import pandas as pd
|
|
7
9
|
from sagemaker import Predictor
|
|
@@ -283,67 +285,66 @@ class MonitorCore:
|
|
|
283
285
|
self.log.error(f"Error checking data capture percentage: {e}")
|
|
284
286
|
return None
|
|
285
287
|
|
|
286
|
-
def get_captured_data(self,
|
|
288
|
+
def get_captured_data(self, from_date=None, add_timestamp=True) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
|
287
289
|
"""
|
|
288
290
|
Read and process captured data from S3.
|
|
289
291
|
|
|
290
292
|
Args:
|
|
291
|
-
|
|
293
|
+
from_date (str, optional): Only process files from this date onwards (YYYY-MM-DD format).
|
|
292
294
|
Defaults to None to process all files.
|
|
293
295
|
add_timestamp (bool, optional): Whether to add a timestamp column to the DataFrame.
|
|
294
296
|
|
|
295
297
|
Returns:
|
|
296
298
|
Tuple[pd.DataFrame, pd.DataFrame]: Processed input and output DataFrames.
|
|
297
299
|
"""
|
|
298
|
-
# List files in the specified S3 path
|
|
299
300
|
files = wr.s3.list_objects(self.data_capture_path)
|
|
300
301
|
if not files:
|
|
301
302
|
self.log.warning(f"No data capture files found in {self.data_capture_path}.")
|
|
302
303
|
return pd.DataFrame(), pd.DataFrame()
|
|
303
304
|
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
305
|
+
# Filter by date if specified
|
|
306
|
+
if from_date:
|
|
307
|
+
from_date_obj = datetime.strptime(from_date, "%Y-%m-%d").date()
|
|
308
|
+
files = [f for f in files if self._file_date_filter(f, from_date_obj)]
|
|
309
|
+
self.log.info(f"Processing {len(files)} files from {from_date} onwards.")
|
|
310
|
+
else:
|
|
311
|
+
self.log.info(f"Processing all {len(files)} files.")
|
|
307
312
|
files.sort()
|
|
308
313
|
|
|
309
|
-
#
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
self.log.info(f"Processing all {len(files)} files.")
|
|
313
|
-
else:
|
|
314
|
-
files_to_process = files[-max_files:] if files else []
|
|
315
|
-
self.log.info(f"Processing the {len(files_to_process)} most recent file(s).")
|
|
316
|
-
|
|
317
|
-
# Process each file
|
|
318
|
-
all_input_dfs = []
|
|
319
|
-
all_output_dfs = []
|
|
320
|
-
for file_path in files_to_process:
|
|
321
|
-
self.log.info(f"Processing {file_path}...")
|
|
314
|
+
# Process files
|
|
315
|
+
all_input_dfs, all_output_dfs = [], []
|
|
316
|
+
for file_path in files:
|
|
322
317
|
try:
|
|
323
|
-
# Read the JSON lines file
|
|
324
318
|
df = wr.s3.read_json(path=file_path, lines=True)
|
|
325
319
|
if not df.empty:
|
|
326
320
|
input_df, output_df = process_data_capture(df)
|
|
327
|
-
# Generate a timestamp column if requested
|
|
328
321
|
if add_timestamp:
|
|
329
|
-
|
|
330
|
-
file_metadata = wr.s3.describe_objects(path=file_path)
|
|
331
|
-
timestamp = file_metadata[file_path]["LastModified"]
|
|
322
|
+
timestamp = wr.s3.describe_objects(path=file_path)[file_path]["LastModified"]
|
|
332
323
|
output_df["timestamp"] = timestamp
|
|
333
|
-
|
|
334
|
-
# Append the processed DataFrames to the lists
|
|
335
324
|
all_input_dfs.append(input_df)
|
|
336
325
|
all_output_dfs.append(output_df)
|
|
337
326
|
except Exception as e:
|
|
338
|
-
self.log.warning(f"Error processing
|
|
327
|
+
self.log.warning(f"Error processing {file_path}: {e}")
|
|
339
328
|
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
self.log.warning("No valid data was processed from the captured files.")
|
|
329
|
+
if not all_input_dfs:
|
|
330
|
+
self.log.warning("No valid data was processed.")
|
|
343
331
|
return pd.DataFrame(), pd.DataFrame()
|
|
344
332
|
|
|
345
333
|
return pd.concat(all_input_dfs, ignore_index=True), pd.concat(all_output_dfs, ignore_index=True)
|
|
346
334
|
|
|
335
|
+
def _file_date_filter(self, file_path, from_date_obj):
|
|
336
|
+
"""Extract date from S3 path and compare with from_date."""
|
|
337
|
+
try:
|
|
338
|
+
# Match YYYY/MM/DD pattern in the path
|
|
339
|
+
date_match = re.search(r"/(\d{4})/(\d{2})/(\d{2})/", file_path)
|
|
340
|
+
if date_match:
|
|
341
|
+
year, month, day = date_match.groups()
|
|
342
|
+
file_date = datetime(int(year), int(month), int(day)).date()
|
|
343
|
+
return file_date >= from_date_obj
|
|
344
|
+
return False # No date pattern found
|
|
345
|
+
except ValueError:
|
|
346
|
+
return False
|
|
347
|
+
|
|
347
348
|
def baseline_exists(self) -> bool:
|
|
348
349
|
"""
|
|
349
350
|
Check if baseline files exist in S3.
|
|
@@ -308,7 +308,7 @@ class AWSMeta:
|
|
|
308
308
|
"Status": endpoint_details.get("EndpointStatus", "-"),
|
|
309
309
|
"Config": endpoint_details.get("EndpointConfigName", "-"),
|
|
310
310
|
"Variant": endpoint_details["config"]["variant"],
|
|
311
|
-
"Capture": str(endpoint_details.get("DataCaptureConfig", {}).get("EnableCapture", "
|
|
311
|
+
"Capture": str(endpoint_details.get("DataCaptureConfig", {}).get("EnableCapture", "-")),
|
|
312
312
|
"Samp(%)": str(endpoint_details.get("DataCaptureConfig", {}).get("CurrentSamplingPercentage", "-")),
|
|
313
313
|
"Tags": aws_tags.get("workbench_tags", "-"),
|
|
314
314
|
"Monitored": endpoint_details["monitored"],
|
|
@@ -10,7 +10,7 @@ import logging
|
|
|
10
10
|
|
|
11
11
|
# Workbench Imports
|
|
12
12
|
from workbench.utils.config_manager import ConfigManager
|
|
13
|
-
from
|
|
13
|
+
from workbench_bridges.utils.execution_environment import running_on_lambda, running_on_glue
|
|
14
14
|
|
|
15
15
|
# Attempt to import IPython-related utilities
|
|
16
16
|
try:
|
|
@@ -72,7 +72,7 @@ if not ConfigManager().config_okay():
|
|
|
72
72
|
|
|
73
73
|
# Set the log level to important
|
|
74
74
|
log = logging.getLogger("workbench")
|
|
75
|
-
log.setLevel(
|
|
75
|
+
log.setLevel(logging.INFO)
|
|
76
76
|
log.addFilter(
|
|
77
77
|
lambda record: not (
|
|
78
78
|
record.getMessage().startswith("Async: Metadata") or record.getMessage().startswith("Updated Metadata")
|
workbench/utils/aws_utils.py
CHANGED
|
@@ -55,7 +55,8 @@ def aws_throttle(func=None, retry_intervals=None):
|
|
|
55
55
|
if func is None:
|
|
56
56
|
return lambda f: aws_throttle(f, retry_intervals=retry_intervals)
|
|
57
57
|
|
|
58
|
-
|
|
58
|
+
# This is currently commented out (we might want to use it later)
|
|
59
|
+
# service_hold_time = 2 # Seconds to wait before calling AWS function
|
|
59
60
|
default_intervals = [2**i for i in range(1, 9)] # Default exponential backoff: 2, 4, 8... 256 seconds
|
|
60
61
|
intervals = retry_intervals or default_intervals
|
|
61
62
|
|
|
@@ -64,8 +65,8 @@ def aws_throttle(func=None, retry_intervals=None):
|
|
|
64
65
|
for attempt, delay in enumerate(intervals, start=1):
|
|
65
66
|
try:
|
|
66
67
|
# Add sleep before calling AWS func if running as a service
|
|
67
|
-
if cm.running_as_service:
|
|
68
|
-
|
|
68
|
+
# if cm.running_as_service:
|
|
69
|
+
# time.sleep(service_hold_time)
|
|
69
70
|
return func(*args, **kwargs)
|
|
70
71
|
except ClientError as e:
|
|
71
72
|
if e.response["Error"]["Code"] == "ThrottlingException":
|
|
@@ -9,7 +9,7 @@ from typing import Any, Dict
|
|
|
9
9
|
|
|
10
10
|
# Workbench imports
|
|
11
11
|
from workbench.utils.license_manager import LicenseManager
|
|
12
|
-
from
|
|
12
|
+
from workbench_bridges.utils.execution_environment import running_as_service
|
|
13
13
|
|
|
14
14
|
# Python 3.9 compatibility
|
|
15
15
|
from workbench.utils.resource_utils import get_resource_path
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: workbench
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.166
|
|
4
4
|
Summary: Workbench: A Dashboard and Python API for creating and deploying AWS SageMaker Model Pipelines
|
|
5
5
|
Author-email: SuperCowPowers LLC <support@supercowpowers.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -30,7 +30,7 @@ Requires-Dist: joblib>=1.3.2
|
|
|
30
30
|
Requires-Dist: requests>=2.26.0
|
|
31
31
|
Requires-Dist: rdkit>=2024.9.5
|
|
32
32
|
Requires-Dist: mordredcommunity>=2.0.6
|
|
33
|
-
Requires-Dist: workbench-bridges>=0.1.
|
|
33
|
+
Requires-Dist: workbench-bridges>=0.1.10
|
|
34
34
|
Provides-Extra: ui
|
|
35
35
|
Requires-Dist: plotly>=6.0.0; extra == "ui"
|
|
36
36
|
Requires-Dist: dash>3.0.0; extra == "ui"
|
|
@@ -34,7 +34,7 @@ workbench/api/df_store.py,sha256=Wybb3zO-jPpAi2Ns8Ks1-lagvXAaBlRpBZHhnnl3Lms,613
|
|
|
34
34
|
workbench/api/endpoint.py,sha256=RWGqxsCW_pMiENMb_XZlm2ZCldMS4suEBM3F5gT3hYI,3814
|
|
35
35
|
workbench/api/feature_set.py,sha256=wzNxNjN0K2FaIC7QUIogMnoHqw2vo0iAHYlGk6fWLCw,6649
|
|
36
36
|
workbench/api/graph_store.py,sha256=LremJyPrQFgsHb7hxsctuCsoxx3p7TKtaY5qALHe6pc,4372
|
|
37
|
-
workbench/api/meta.py,sha256=
|
|
37
|
+
workbench/api/meta.py,sha256=1_9989cPvf3hd3tA-83hLijOGNnhwXAF8aZF45adeDQ,8596
|
|
38
38
|
workbench/api/model.py,sha256=2hPN8UK4whZ0kDgPtbR7lEknw7XhH5hGYaHA55jmZWQ,4529
|
|
39
39
|
workbench/api/monitor.py,sha256=kQHSFiVLRWnHekSdatMKR3QbRj1BBNrVXpZgvV83LPM,5027
|
|
40
40
|
workbench/api/parameter_store.py,sha256=7BObkuATuP6C5AG_46kCWsmuCwuh1vgMJDBSN0gTkwM,4294
|
|
@@ -56,16 +56,16 @@ workbench/core/artifacts/data_source_factory.py,sha256=YL_tA5fsgubbB3dPF6T4tO0rG
|
|
|
56
56
|
workbench/core/artifacts/endpoint_core.py,sha256=6uDOl-VKrTbLMlHZEYFY80XwrCP5H0W36JoHySjhl7M,48163
|
|
57
57
|
workbench/core/artifacts/feature_set_core.py,sha256=055VdSYR09HP4ygAuYvIYtHQ7Ec4XxsZygpgEl5H5jQ,29136
|
|
58
58
|
workbench/core/artifacts/model_core.py,sha256=U0dSkpZMrsIgbUglVkPwAgN0gji7Oa7glOjqMQJDAzE,50927
|
|
59
|
-
workbench/core/artifacts/monitor_core.py,sha256=
|
|
59
|
+
workbench/core/artifacts/monitor_core.py,sha256=AIgceuO_YqAmttDdJWcdFcrvE93icfSsTr6WgXWGjYo,37684
|
|
60
60
|
workbench/core/cloud_platform/cloud_meta.py,sha256=-g4-LTC3D0PXb3VfaXdLR1ERijKuHdffeMK_zhD-koQ,8809
|
|
61
61
|
workbench/core/cloud_platform/aws/README.md,sha256=QT5IQXoUHbIA0qQ2wO6_2P2lYjYQFVYuezc22mWY4i8,97
|
|
62
62
|
workbench/core/cloud_platform/aws/aws_account_clamp.py,sha256=OzFknZXKW7VTvnDGGX4BXKoh0i1gQ7yaEBhkLCyHFSs,6310
|
|
63
63
|
workbench/core/cloud_platform/aws/aws_df_store.py,sha256=utRIlTCPwFneHHZ8_Z3Hw3rOJSeryiFA4wBtucxULRQ,15055
|
|
64
64
|
workbench/core/cloud_platform/aws/aws_graph_store.py,sha256=ytYxQTplUmeWbsPmxyZbf6mO9qyTl60ewlJG8MyfyEY,9414
|
|
65
|
-
workbench/core/cloud_platform/aws/aws_meta.py,sha256=
|
|
65
|
+
workbench/core/cloud_platform/aws/aws_meta.py,sha256=xpidYpDydgWmKmJPrNFWbggahDY-nRXzXTRaEA3c5Sc,34587
|
|
66
66
|
workbench/core/cloud_platform/aws/aws_parameter_store.py,sha256=9ekuMOQFHFMIEV68UbHhS_fLB9iqG5Hvu4EV6iamEpk,10400
|
|
67
67
|
workbench/core/cloud_platform/aws/aws_secrets_manager.py,sha256=TUnddp1gX-OwxJ_oO5ONh7OI4Z2HC_6euGkJ-himCCk,8615
|
|
68
|
-
workbench/core/cloud_platform/aws/aws_session.py,sha256=
|
|
68
|
+
workbench/core/cloud_platform/aws/aws_session.py,sha256=T0Vt8K2MRRznOwxC_MqVgGg2p_CLr0jb3hB5LpwXy_w,6980
|
|
69
69
|
workbench/core/cloud_platform/aws/cache_dataframe.py,sha256=VnObkVqcjg7v4fegrIkXR1j-K2AHTBpSAoriUXDe12A,2314
|
|
70
70
|
workbench/core/cloud_platform/azure/README.md,sha256=ciIXZwjtOPYf9ViquFQxjLKuFwje_hZJHJ2hMQghziI,101
|
|
71
71
|
workbench/core/cloud_platform/gcp/README.md,sha256=MzObe3mWQzjviKD2aXlAV9r_bU4HzTJGapWRsFn6pCU,106
|
|
@@ -162,7 +162,7 @@ workbench/model_scripts/xgb_model/generated_model_script.py,sha256=mYl7Wfokbynqr
|
|
|
162
162
|
workbench/model_scripts/xgb_model/requirements.txt,sha256=jWlGc7HH7vqyukTm38LN4EyDi8jDUPEay4n45z-30uc,104
|
|
163
163
|
workbench/model_scripts/xgb_model/xgb_model.template,sha256=RaUr8X6al5R2IILNKgGUH05Gb4H7AFFG9RE524_VH7Q,17935
|
|
164
164
|
workbench/repl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
165
|
-
workbench/repl/workbench_shell.py,sha256=
|
|
165
|
+
workbench/repl/workbench_shell.py,sha256=eJ3rpYgEwZjhrVVCaJHht2N5BrimN6mbxqHXGrJmwC8,22130
|
|
166
166
|
workbench/resources/open_source_api.key,sha256=3S0OTblsmC0msUPdE_dbBmI83xJNmYscuwLJ57JmuOc,433
|
|
167
167
|
workbench/resources/signature_verify_pub.pem,sha256=V3-u-3_z2PH-805ybkKvzDOBwAbvHxcKn0jLBImEtzM,272
|
|
168
168
|
workbench/scripts/check_double_bond_stereo.py,sha256=p5hnL54Weq77ES0HCELq9JeoM-PyUGkvVSeWYF2dKyo,7776
|
|
@@ -193,13 +193,13 @@ workbench/utils/ai_compound_generator.py,sha256=8no4ufP1LJhQfPqaDHvqUYOAh1kzeDVe
|
|
|
193
193
|
workbench/utils/ai_summary.py,sha256=KwutaozocDxrfErodYpFCDmt1c8zVKf3Jnu4UzBMLJU,4155
|
|
194
194
|
workbench/utils/ai_synth.py,sha256=3mz7NwZeOuNH5ju5n_pikOwfxtVN6LaipLFkAZqoN2U,5607
|
|
195
195
|
workbench/utils/athena_utils.py,sha256=DDyLhJujzh1PfejtGU7ZzOf5hLPOgoXmi4Lrn-_AJzU,4812
|
|
196
|
-
workbench/utils/aws_utils.py,sha256=
|
|
196
|
+
workbench/utils/aws_utils.py,sha256=x8c_WxtdSKmBqNg8P_Z6K2m4AsSMEiD_kh2nVaUZ28c,22077
|
|
197
197
|
workbench/utils/bulk_utils.py,sha256=s1lYN2Uk536MNGetekLYL_VL0N34hUjk1FX9BAz3Qu0,1182
|
|
198
198
|
workbench/utils/cache.py,sha256=0R5RXYEz_XHARK3anmQC4VRMawMks_cJ8S4vwC2roAE,5524
|
|
199
199
|
workbench/utils/chem_utils.py,sha256=tLTAvLKTOiYSzbVQF0M8V5-ej36IVgr21CNB2vVJjYQ,56780
|
|
200
|
-
workbench/utils/cloudwatch_handler.py,sha256=
|
|
200
|
+
workbench/utils/cloudwatch_handler.py,sha256=t0L280Qa1nMq95dwnf8lB5g8FHrQAyGY5S4JwP3yIa8,5165
|
|
201
201
|
workbench/utils/color_utils.py,sha256=TmDGLK44t975lkfjt_1O-ee02QxrKfke7vPuXb-V-Uo,11779
|
|
202
|
-
workbench/utils/config_manager.py,sha256=
|
|
202
|
+
workbench/utils/config_manager.py,sha256=SBBmO1RGCQ_Zyh91tDxL1HOm5B0v38ImlLnOsEKzXPU,17649
|
|
203
203
|
workbench/utils/dashboard_metrics.py,sha256=cNFI0GIAjd_IiDzM1oebsJ2QkRZuW068W_66ZC3J100,7398
|
|
204
204
|
workbench/utils/datetime_utils.py,sha256=r3G_KB2euu26lwVbDXYXPJEpJCZwin2Iph7BiBIoybg,4454
|
|
205
205
|
workbench/utils/deprecated_utils.py,sha256=qniHVpDGuwOnhxn65LofDQ_EA2OhSUcZLPxAXtx7FgA,3540
|
|
@@ -207,7 +207,6 @@ workbench/utils/df_to_endpoint.py,sha256=bIb1CDko8_BClX5wcQuBbmcGH79n3oHUcb_1jdU
|
|
|
207
207
|
workbench/utils/ecs_info.py,sha256=Gs9jNb4vcj2pziufIOI4BVIH1J-3XBMtWm1phVh8oRY,2873
|
|
208
208
|
workbench/utils/endpoint_metrics.py,sha256=_4WVU6cLLuV0t_i0PSvhi0EoA5ss5aDFe7ZDpumx2R8,7822
|
|
209
209
|
workbench/utils/endpoint_utils.py,sha256=3-njrhMSAIOaEEiH7qMA9vgD3I7J2S9iUAcqXKx3OBo,7104
|
|
210
|
-
workbench/utils/execution_environment.py,sha256=n8XJa5-fNXYeUA6YFXdk3Z9ZVwIlmakF6dQpwCl5gK0,6920
|
|
211
210
|
workbench/utils/extract_model_artifact.py,sha256=sFwkJd5mfJ1PU37pIHVmUIQS-taIUJdqi3D9-qRmy8g,7870
|
|
212
211
|
workbench/utils/fast_inference.py,sha256=Sm0EV1oPsYYGqiDBVUu3Nj6Ti68JV-UR2S0ZliBDPTk,6148
|
|
213
212
|
workbench/utils/glue_utils.py,sha256=dslfXQcJ4C-mGmsD6LqeK8vsXBez570t3fZBVZLV7HA,2039
|
|
@@ -275,9 +274,9 @@ workbench/web_interface/page_views/main_page.py,sha256=X4-KyGTKLAdxR-Zk2niuLJB2Y
|
|
|
275
274
|
workbench/web_interface/page_views/models_page_view.py,sha256=M0bdC7bAzLyIaE2jviY12FF4abdMFZmg6sFuOY_LaGI,2650
|
|
276
275
|
workbench/web_interface/page_views/page_view.py,sha256=Gh6YnpOGlUejx-bHZAf5pzqoQ1H1R0OSwOpGhOBO06w,455
|
|
277
276
|
workbench/web_interface/page_views/pipelines_page_view.py,sha256=v2pxrIbsHBcYiblfius3JK766NZ7ciD2yPx0t3E5IJo,2656
|
|
278
|
-
workbench-0.8.
|
|
279
|
-
workbench-0.8.
|
|
280
|
-
workbench-0.8.
|
|
281
|
-
workbench-0.8.
|
|
282
|
-
workbench-0.8.
|
|
283
|
-
workbench-0.8.
|
|
277
|
+
workbench-0.8.166.dist-info/licenses/LICENSE,sha256=z4QMMPlLJkZjU8VOKqJkZiQZCEZ--saIU2Z8-p3aVc0,1080
|
|
278
|
+
workbench-0.8.166.dist-info/METADATA,sha256=a7nuZYqwYcDrQwDLekEr7_Afyxv2IUjxZoD1YwIqzwo,9210
|
|
279
|
+
workbench-0.8.166.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
280
|
+
workbench-0.8.166.dist-info/entry_points.txt,sha256=oZykkheWiiIBjRE8cS5SdcxwmZKSFaQEGwMBjNh-eNM,238
|
|
281
|
+
workbench-0.8.166.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
|
|
282
|
+
workbench-0.8.166.dist-info/RECORD,,
|
|
@@ -1,211 +0,0 @@
|
|
|
1
|
-
"""ExecutionEnvironment provides logic/functionality to figure out the current execution environment"""
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
import sys
|
|
5
|
-
import logging
|
|
6
|
-
import requests
|
|
7
|
-
from typing import Union
|
|
8
|
-
import boto3
|
|
9
|
-
from datetime import datetime, timezone
|
|
10
|
-
|
|
11
|
-
# Workbench imports
|
|
12
|
-
from workbench.utils.glue_utils import get_resolved_options
|
|
13
|
-
from workbench.utils.deprecated_utils import deprecated
|
|
14
|
-
|
|
15
|
-
# Set up the logger
|
|
16
|
-
log = logging.getLogger("workbench")
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def running_on_glue():
|
|
20
|
-
"""
|
|
21
|
-
Check if the current execution environment is an AWS Glue job.
|
|
22
|
-
|
|
23
|
-
Returns:
|
|
24
|
-
bool: True if running in AWS Glue environment, False otherwise.
|
|
25
|
-
"""
|
|
26
|
-
# Check if GLUE_VERSION or GLUE_PYTHON_VERSION is in the environment
|
|
27
|
-
if "GLUE_VERSION" in os.environ or "GLUE_PYTHON_VERSION" in os.environ:
|
|
28
|
-
log.info("Running in AWS Glue Environment...")
|
|
29
|
-
return True
|
|
30
|
-
else:
|
|
31
|
-
return False
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def running_on_lambda():
|
|
35
|
-
"""
|
|
36
|
-
Check if the current execution environment is an AWS Lambda function.
|
|
37
|
-
|
|
38
|
-
Returns:
|
|
39
|
-
bool: True if running in AWS Lambda environment, False otherwise.
|
|
40
|
-
"""
|
|
41
|
-
if "AWS_LAMBDA_FUNCTION_NAME" in os.environ:
|
|
42
|
-
log.info("Running in AWS Lambda Environment...")
|
|
43
|
-
return True
|
|
44
|
-
else:
|
|
45
|
-
return False
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def running_on_docker() -> bool:
|
|
49
|
-
"""Check if the current environment is running on a Docker container.
|
|
50
|
-
|
|
51
|
-
Returns:
|
|
52
|
-
bool: True if running in a Docker container, False otherwise.
|
|
53
|
-
"""
|
|
54
|
-
try:
|
|
55
|
-
# Docker creates a .dockerenv file at the root of the directory tree inside the container.
|
|
56
|
-
# If this file exists, it is very likely that we are running inside a Docker container.
|
|
57
|
-
with open("/.dockerenv") as f:
|
|
58
|
-
return True
|
|
59
|
-
except FileNotFoundError:
|
|
60
|
-
pass
|
|
61
|
-
|
|
62
|
-
try:
|
|
63
|
-
# Another method is to check the contents of /proc/self/cgroup which should be different
|
|
64
|
-
# inside a Docker container.
|
|
65
|
-
with open("/proc/self/cgroup") as f:
|
|
66
|
-
if any("docker" in line for line in f):
|
|
67
|
-
return True
|
|
68
|
-
except FileNotFoundError:
|
|
69
|
-
pass
|
|
70
|
-
|
|
71
|
-
# Check if we are running on ECS
|
|
72
|
-
if running_on_ecs():
|
|
73
|
-
return True
|
|
74
|
-
|
|
75
|
-
# Probably not running in a Docker container
|
|
76
|
-
return False
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
def running_on_ecs() -> bool:
|
|
80
|
-
"""
|
|
81
|
-
Check if the current environment is running on AWS ECS.
|
|
82
|
-
|
|
83
|
-
Returns:
|
|
84
|
-
bool: True if running on AWS ECS, False otherwise.
|
|
85
|
-
"""
|
|
86
|
-
indicators = [
|
|
87
|
-
"ECS_SERVICE_NAME",
|
|
88
|
-
"ECS_CONTAINER_METADATA_URI",
|
|
89
|
-
"ECS_CONTAINER_METADATA_URI_V4",
|
|
90
|
-
"AWS_CONTAINER_CREDENTIALS_RELATIVE_URI",
|
|
91
|
-
"AWS_EXECUTION_ENV",
|
|
92
|
-
]
|
|
93
|
-
return any(indicator in os.environ for indicator in indicators)
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
def running_as_service() -> bool:
|
|
97
|
-
"""
|
|
98
|
-
Check if the current environment is running as a service (e.g. Docker, ECS, Glue, Lambda).
|
|
99
|
-
|
|
100
|
-
Returns:
|
|
101
|
-
bool: True if running as a service, False otherwise.
|
|
102
|
-
"""
|
|
103
|
-
return running_on_docker() or running_on_glue() or running_on_lambda()
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
def _glue_job_from_script_name(args):
|
|
107
|
-
"""Get the Glue Job Name from the script name"""
|
|
108
|
-
try:
|
|
109
|
-
script_name = args["scriptLocation"]
|
|
110
|
-
return os.path.splitext(os.path.basename(script_name))[0]
|
|
111
|
-
except Exception:
|
|
112
|
-
return "unknown"
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
def glue_job_name():
|
|
116
|
-
"""Get the Glue Job Name from the environment or script name"""
|
|
117
|
-
# Define the required argument
|
|
118
|
-
args = get_resolved_options(sys.argv)
|
|
119
|
-
|
|
120
|
-
# Get the job name
|
|
121
|
-
job_name = args.get("JOB_NAME") or _glue_job_from_script_name(args)
|
|
122
|
-
return job_name
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
@deprecated(version=0.9)
|
|
126
|
-
def glue_job_run_id(job_name: str, session: boto3.Session) -> Union[str, None]:
|
|
127
|
-
"""Retrieve the Glue Job Run ID closest to the current time for the given job name.
|
|
128
|
-
Note: This mostly doesn't work, it will grab A glue job id but often not the correct one.
|
|
129
|
-
For now, I would just skip using this
|
|
130
|
-
"""
|
|
131
|
-
try:
|
|
132
|
-
# Set current time in UTC
|
|
133
|
-
current_time = datetime.now(timezone.utc)
|
|
134
|
-
|
|
135
|
-
job_runs = session.client("glue").get_job_runs(JobName=job_name)
|
|
136
|
-
if job_runs["JobRuns"]:
|
|
137
|
-
# Find the job run with the StartedOn time closest to the current time
|
|
138
|
-
closest_job_run = min(job_runs["JobRuns"], key=lambda run: abs(run["StartedOn"] - current_time))
|
|
139
|
-
job_id = closest_job_run["Id"]
|
|
140
|
-
return job_id[:9] # Shorten the Job Run ID to 9 characters
|
|
141
|
-
|
|
142
|
-
log.error(f"No runs found for Glue Job '{job_name}', returning None for Job Run ID.")
|
|
143
|
-
return None
|
|
144
|
-
|
|
145
|
-
except session.client("glue").exceptions.EntityNotFoundException:
|
|
146
|
-
log.error(f"Glue Job '{job_name}' not found, returning None for Job Run ID.")
|
|
147
|
-
return None
|
|
148
|
-
except Exception as e:
|
|
149
|
-
log.error(f"An error occurred while retrieving job run ID: {e}")
|
|
150
|
-
return None
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
def ecs_job_name():
|
|
154
|
-
"""Get the ECS Job Name from the metadata endpoint or environment variables."""
|
|
155
|
-
# Attempt to get the job name from ECS metadata
|
|
156
|
-
ecs_metadata_uri = os.environ.get("ECS_CONTAINER_METADATA_URI_V4")
|
|
157
|
-
|
|
158
|
-
if ecs_metadata_uri:
|
|
159
|
-
try:
|
|
160
|
-
response = requests.get(f"{ecs_metadata_uri}/task")
|
|
161
|
-
if response.status_code == 200:
|
|
162
|
-
metadata = response.json()
|
|
163
|
-
job_name = metadata.get("Family") # 'Family' represents the ECS task definition family name
|
|
164
|
-
if job_name:
|
|
165
|
-
return job_name
|
|
166
|
-
except requests.RequestException as e:
|
|
167
|
-
# Log the error or handle it as needed
|
|
168
|
-
log.error(f"Failed to fetch ECS metadata: {e}")
|
|
169
|
-
|
|
170
|
-
# Fallback to environment variables if metadata is not available
|
|
171
|
-
job_name = os.environ.get("ECS_SERVICE_NAME", "unknown")
|
|
172
|
-
return job_name
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
if __name__ == "__main__":
|
|
176
|
-
"""Test the Execution Environment utilities"""
|
|
177
|
-
|
|
178
|
-
# Test running_on_glue
|
|
179
|
-
assert running_on_glue() is False
|
|
180
|
-
os.environ["GLUE_VERSION"] = "1.0"
|
|
181
|
-
assert running_on_glue() is True
|
|
182
|
-
del os.environ["GLUE_VERSION"]
|
|
183
|
-
|
|
184
|
-
# Test running_on_lambda
|
|
185
|
-
assert running_on_lambda() is False
|
|
186
|
-
os.environ["AWS_LAMBDA_FUNCTION_NAME"] = "my_lambda_function"
|
|
187
|
-
assert running_on_lambda() is True
|
|
188
|
-
del os.environ["AWS_LAMBDA_FUNCTION_NAME"]
|
|
189
|
-
|
|
190
|
-
# Test running_on_docker
|
|
191
|
-
assert running_on_docker() is False
|
|
192
|
-
os.environ["ECS_CONTAINER_METADATA_URI"] = "http://localhost:8080"
|
|
193
|
-
assert running_on_docker() is True
|
|
194
|
-
del os.environ["ECS_CONTAINER_METADATA_URI"]
|
|
195
|
-
|
|
196
|
-
# Test running_on_ecs
|
|
197
|
-
assert running_on_ecs() is False
|
|
198
|
-
os.environ["ECS_CONTAINER_METADATA_URI"] = "http://localhost:8080"
|
|
199
|
-
assert running_on_ecs() is True
|
|
200
|
-
del os.environ["ECS_CONTAINER_METADATA_URI"]
|
|
201
|
-
|
|
202
|
-
# Test getting the Glue Job Name
|
|
203
|
-
print(glue_job_name())
|
|
204
|
-
|
|
205
|
-
# Test getting the Glue Job Run ID
|
|
206
|
-
from workbench.core.cloud_platform.aws.aws_session import AWSSession
|
|
207
|
-
|
|
208
|
-
session = AWSSession().boto3_session
|
|
209
|
-
print(glue_job_run_id("Test_Workbench_Shell", session))
|
|
210
|
-
|
|
211
|
-
print("All tests passed!")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|