workbench 0.8.165__py3-none-any.whl → 0.8.166__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of workbench might be problematic. Click here for more details.

workbench/api/meta.py CHANGED
@@ -113,13 +113,16 @@ class Meta(CloudMeta):
113
113
  """
114
114
  return super().models(details=details)
115
115
 
116
- def endpoints(self) -> pd.DataFrame:
116
+ def endpoints(self, details: bool = False) -> pd.DataFrame:
117
117
  """Get a summary of the Endpoints deployed in the Cloud Platform
118
118
 
119
+ Args:
120
+ details (bool, optional): Include detailed information. Defaults to False.
121
+
119
122
  Returns:
120
123
  pd.DataFrame: A summary of the Endpoints in the Cloud Platform
121
124
  """
122
- return super().endpoints()
125
+ return super().endpoints(details=details)
123
126
 
124
127
  def pipelines(self) -> pd.DataFrame:
125
128
  """Get a summary of the ML Pipelines deployed in the Cloud Platform
@@ -2,6 +2,8 @@
2
2
 
3
3
  import logging
4
4
  import json
5
+ import re
6
+ from datetime import datetime
5
7
  from typing import Union, Tuple
6
8
  import pandas as pd
7
9
  from sagemaker import Predictor
@@ -283,67 +285,66 @@ class MonitorCore:
283
285
  self.log.error(f"Error checking data capture percentage: {e}")
284
286
  return None
285
287
 
286
- def get_captured_data(self, max_files=None, add_timestamp=True) -> Tuple[pd.DataFrame, pd.DataFrame]:
288
+ def get_captured_data(self, from_date=None, add_timestamp=True) -> Tuple[pd.DataFrame, pd.DataFrame]:
287
289
  """
288
290
  Read and process captured data from S3.
289
291
 
290
292
  Args:
291
- max_files (int, optional): Maximum number of files to process.
293
+ from_date (str, optional): Only process files from this date onwards (YYYY-MM-DD format).
292
294
  Defaults to None to process all files.
293
295
  add_timestamp (bool, optional): Whether to add a timestamp column to the DataFrame.
294
296
 
295
297
  Returns:
296
298
  Tuple[pd.DataFrame, pd.DataFrame]: Processed input and output DataFrames.
297
299
  """
298
- # List files in the specified S3 path
299
300
  files = wr.s3.list_objects(self.data_capture_path)
300
301
  if not files:
301
302
  self.log.warning(f"No data capture files found in {self.data_capture_path}.")
302
303
  return pd.DataFrame(), pd.DataFrame()
303
304
 
304
- self.log.info(f"Found {len(files)} files in {self.data_capture_path}.")
305
-
306
- # Sort files by timestamp (assuming the naming convention includes timestamp)
305
+ # Filter by date if specified
306
+ if from_date:
307
+ from_date_obj = datetime.strptime(from_date, "%Y-%m-%d").date()
308
+ files = [f for f in files if self._file_date_filter(f, from_date_obj)]
309
+ self.log.info(f"Processing {len(files)} files from {from_date} onwards.")
310
+ else:
311
+ self.log.info(f"Processing all {len(files)} files.")
307
312
  files.sort()
308
313
 
309
- # Select files to process
310
- if max_files is None:
311
- files_to_process = files
312
- self.log.info(f"Processing all {len(files)} files.")
313
- else:
314
- files_to_process = files[-max_files:] if files else []
315
- self.log.info(f"Processing the {len(files_to_process)} most recent file(s).")
316
-
317
- # Process each file
318
- all_input_dfs = []
319
- all_output_dfs = []
320
- for file_path in files_to_process:
321
- self.log.info(f"Processing {file_path}...")
314
+ # Process files
315
+ all_input_dfs, all_output_dfs = [], []
316
+ for file_path in files:
322
317
  try:
323
- # Read the JSON lines file
324
318
  df = wr.s3.read_json(path=file_path, lines=True)
325
319
  if not df.empty:
326
320
  input_df, output_df = process_data_capture(df)
327
- # Generate a timestamp column if requested
328
321
  if add_timestamp:
329
- # Get file metadata to extract last modified time
330
- file_metadata = wr.s3.describe_objects(path=file_path)
331
- timestamp = file_metadata[file_path]["LastModified"]
322
+ timestamp = wr.s3.describe_objects(path=file_path)[file_path]["LastModified"]
332
323
  output_df["timestamp"] = timestamp
333
-
334
- # Append the processed DataFrames to the lists
335
324
  all_input_dfs.append(input_df)
336
325
  all_output_dfs.append(output_df)
337
326
  except Exception as e:
338
- self.log.warning(f"Error processing file {file_path}: {e}")
327
+ self.log.warning(f"Error processing {file_path}: {e}")
339
328
 
340
- # Combine all DataFrames
341
- if not all_input_dfs or not all_output_dfs:
342
- self.log.warning("No valid data was processed from the captured files.")
329
+ if not all_input_dfs:
330
+ self.log.warning("No valid data was processed.")
343
331
  return pd.DataFrame(), pd.DataFrame()
344
332
 
345
333
  return pd.concat(all_input_dfs, ignore_index=True), pd.concat(all_output_dfs, ignore_index=True)
346
334
 
335
+ def _file_date_filter(self, file_path, from_date_obj):
336
+ """Extract date from S3 path and compare with from_date."""
337
+ try:
338
+ # Match YYYY/MM/DD pattern in the path
339
+ date_match = re.search(r"/(\d{4})/(\d{2})/(\d{2})/", file_path)
340
+ if date_match:
341
+ year, month, day = date_match.groups()
342
+ file_date = datetime(int(year), int(month), int(day)).date()
343
+ return file_date >= from_date_obj
344
+ return False # No date pattern found
345
+ except ValueError:
346
+ return False
347
+
347
348
  def baseline_exists(self) -> bool:
348
349
  """
349
350
  Check if baseline files exist in S3.
@@ -308,7 +308,7 @@ class AWSMeta:
308
308
  "Status": endpoint_details.get("EndpointStatus", "-"),
309
309
  "Config": endpoint_details.get("EndpointConfigName", "-"),
310
310
  "Variant": endpoint_details["config"]["variant"],
311
- "Capture": str(endpoint_details.get("DataCaptureConfig", {}).get("EnableCapture", "False")),
311
+ "Capture": str(endpoint_details.get("DataCaptureConfig", {}).get("EnableCapture", "-")),
312
312
  "Samp(%)": str(endpoint_details.get("DataCaptureConfig", {}).get("CurrentSamplingPercentage", "-")),
313
313
  "Tags": aws_tags.get("workbench_tags", "-"),
314
314
  "Monitored": endpoint_details["monitored"],
@@ -10,7 +10,7 @@ import logging
10
10
 
11
11
  # Workbench Imports
12
12
  from workbench.utils.config_manager import ConfigManager
13
- from workbench.utils.execution_environment import running_on_lambda, running_on_glue
13
+ from workbench_bridges.utils.execution_environment import running_on_lambda, running_on_glue
14
14
 
15
15
  # Attempt to import IPython-related utilities
16
16
  try:
@@ -72,7 +72,7 @@ if not ConfigManager().config_okay():
72
72
 
73
73
  # Set the log level to important
74
74
  log = logging.getLogger("workbench")
75
- log.setLevel(IMPORTANT_LEVEL_NUM)
75
+ log.setLevel(logging.INFO)
76
76
  log.addFilter(
77
77
  lambda record: not (
78
78
  record.getMessage().startswith("Async: Metadata") or record.getMessage().startswith("Updated Metadata")
@@ -55,7 +55,8 @@ def aws_throttle(func=None, retry_intervals=None):
55
55
  if func is None:
56
56
  return lambda f: aws_throttle(f, retry_intervals=retry_intervals)
57
57
 
58
- service_hold_time = 2 # Seconds to wait before calling AWS function
58
+ # This is currently commented out (we might want to use it later)
59
+ # service_hold_time = 2 # Seconds to wait before calling AWS function
59
60
  default_intervals = [2**i for i in range(1, 9)] # Default exponential backoff: 2, 4, 8... 256 seconds
60
61
  intervals = retry_intervals or default_intervals
61
62
 
@@ -64,8 +65,8 @@ def aws_throttle(func=None, retry_intervals=None):
64
65
  for attempt, delay in enumerate(intervals, start=1):
65
66
  try:
66
67
  # Add sleep before calling AWS func if running as a service
67
- if cm.running_as_service:
68
- time.sleep(service_hold_time)
68
+ # if cm.running_as_service:
69
+ # time.sleep(service_hold_time)
69
70
  return func(*args, **kwargs)
70
71
  except ClientError as e:
71
72
  if e.response["Error"]["Code"] == "ThrottlingException":
@@ -5,7 +5,7 @@ import getpass
5
5
  import time # For managing send intervals
6
6
 
7
7
  # Workbench imports
8
- from workbench.utils.execution_environment import (
8
+ from workbench_bridges.utils.execution_environment import (
9
9
  running_on_lambda,
10
10
  running_on_glue,
11
11
  running_on_ecs,
@@ -9,7 +9,7 @@ from typing import Any, Dict
9
9
 
10
10
  # Workbench imports
11
11
  from workbench.utils.license_manager import LicenseManager
12
- from workbench.utils.execution_environment import running_as_service
12
+ from workbench_bridges.utils.execution_environment import running_as_service
13
13
 
14
14
  # Python 3.9 compatibility
15
15
  from workbench.utils.resource_utils import get_resource_path
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: workbench
3
- Version: 0.8.165
3
+ Version: 0.8.166
4
4
  Summary: Workbench: A Dashboard and Python API for creating and deploying AWS SageMaker Model Pipelines
5
5
  Author-email: SuperCowPowers LLC <support@supercowpowers.com>
6
6
  License-Expression: MIT
@@ -30,7 +30,7 @@ Requires-Dist: joblib>=1.3.2
30
30
  Requires-Dist: requests>=2.26.0
31
31
  Requires-Dist: rdkit>=2024.9.5
32
32
  Requires-Dist: mordredcommunity>=2.0.6
33
- Requires-Dist: workbench-bridges>=0.1.8
33
+ Requires-Dist: workbench-bridges>=0.1.10
34
34
  Provides-Extra: ui
35
35
  Requires-Dist: plotly>=6.0.0; extra == "ui"
36
36
  Requires-Dist: dash>3.0.0; extra == "ui"
@@ -34,7 +34,7 @@ workbench/api/df_store.py,sha256=Wybb3zO-jPpAi2Ns8Ks1-lagvXAaBlRpBZHhnnl3Lms,613
34
34
  workbench/api/endpoint.py,sha256=RWGqxsCW_pMiENMb_XZlm2ZCldMS4suEBM3F5gT3hYI,3814
35
35
  workbench/api/feature_set.py,sha256=wzNxNjN0K2FaIC7QUIogMnoHqw2vo0iAHYlGk6fWLCw,6649
36
36
  workbench/api/graph_store.py,sha256=LremJyPrQFgsHb7hxsctuCsoxx3p7TKtaY5qALHe6pc,4372
37
- workbench/api/meta.py,sha256=fCOtZMfAHWaerzcsTeFnimXfgV8STe9JDiB7QBogktc,8456
37
+ workbench/api/meta.py,sha256=1_9989cPvf3hd3tA-83hLijOGNnhwXAF8aZF45adeDQ,8596
38
38
  workbench/api/model.py,sha256=2hPN8UK4whZ0kDgPtbR7lEknw7XhH5hGYaHA55jmZWQ,4529
39
39
  workbench/api/monitor.py,sha256=kQHSFiVLRWnHekSdatMKR3QbRj1BBNrVXpZgvV83LPM,5027
40
40
  workbench/api/parameter_store.py,sha256=7BObkuATuP6C5AG_46kCWsmuCwuh1vgMJDBSN0gTkwM,4294
@@ -56,16 +56,16 @@ workbench/core/artifacts/data_source_factory.py,sha256=YL_tA5fsgubbB3dPF6T4tO0rG
56
56
  workbench/core/artifacts/endpoint_core.py,sha256=6uDOl-VKrTbLMlHZEYFY80XwrCP5H0W36JoHySjhl7M,48163
57
57
  workbench/core/artifacts/feature_set_core.py,sha256=055VdSYR09HP4ygAuYvIYtHQ7Ec4XxsZygpgEl5H5jQ,29136
58
58
  workbench/core/artifacts/model_core.py,sha256=U0dSkpZMrsIgbUglVkPwAgN0gji7Oa7glOjqMQJDAzE,50927
59
- workbench/core/artifacts/monitor_core.py,sha256=BP6UuCyBI4zB2wwcIXvUw6RC0EktTcQd5Rv0x73qzio,37670
59
+ workbench/core/artifacts/monitor_core.py,sha256=AIgceuO_YqAmttDdJWcdFcrvE93icfSsTr6WgXWGjYo,37684
60
60
  workbench/core/cloud_platform/cloud_meta.py,sha256=-g4-LTC3D0PXb3VfaXdLR1ERijKuHdffeMK_zhD-koQ,8809
61
61
  workbench/core/cloud_platform/aws/README.md,sha256=QT5IQXoUHbIA0qQ2wO6_2P2lYjYQFVYuezc22mWY4i8,97
62
62
  workbench/core/cloud_platform/aws/aws_account_clamp.py,sha256=OzFknZXKW7VTvnDGGX4BXKoh0i1gQ7yaEBhkLCyHFSs,6310
63
63
  workbench/core/cloud_platform/aws/aws_df_store.py,sha256=utRIlTCPwFneHHZ8_Z3Hw3rOJSeryiFA4wBtucxULRQ,15055
64
64
  workbench/core/cloud_platform/aws/aws_graph_store.py,sha256=ytYxQTplUmeWbsPmxyZbf6mO9qyTl60ewlJG8MyfyEY,9414
65
- workbench/core/cloud_platform/aws/aws_meta.py,sha256=ZCKr4cMc0XE9HC0FnLJM1wS85kK8zbzo54OIRN7MiLE,34591
65
+ workbench/core/cloud_platform/aws/aws_meta.py,sha256=xpidYpDydgWmKmJPrNFWbggahDY-nRXzXTRaEA3c5Sc,34587
66
66
  workbench/core/cloud_platform/aws/aws_parameter_store.py,sha256=9ekuMOQFHFMIEV68UbHhS_fLB9iqG5Hvu4EV6iamEpk,10400
67
67
  workbench/core/cloud_platform/aws/aws_secrets_manager.py,sha256=TUnddp1gX-OwxJ_oO5ONh7OI4Z2HC_6euGkJ-himCCk,8615
68
- workbench/core/cloud_platform/aws/aws_session.py,sha256=IIGz0ekbNunWzQaeaZzC2-Vl49o4Lv2F35vLtgjMGsQ,6972
68
+ workbench/core/cloud_platform/aws/aws_session.py,sha256=T0Vt8K2MRRznOwxC_MqVgGg2p_CLr0jb3hB5LpwXy_w,6980
69
69
  workbench/core/cloud_platform/aws/cache_dataframe.py,sha256=VnObkVqcjg7v4fegrIkXR1j-K2AHTBpSAoriUXDe12A,2314
70
70
  workbench/core/cloud_platform/azure/README.md,sha256=ciIXZwjtOPYf9ViquFQxjLKuFwje_hZJHJ2hMQghziI,101
71
71
  workbench/core/cloud_platform/gcp/README.md,sha256=MzObe3mWQzjviKD2aXlAV9r_bU4HzTJGapWRsFn6pCU,106
@@ -162,7 +162,7 @@ workbench/model_scripts/xgb_model/generated_model_script.py,sha256=mYl7Wfokbynqr
162
162
  workbench/model_scripts/xgb_model/requirements.txt,sha256=jWlGc7HH7vqyukTm38LN4EyDi8jDUPEay4n45z-30uc,104
163
163
  workbench/model_scripts/xgb_model/xgb_model.template,sha256=RaUr8X6al5R2IILNKgGUH05Gb4H7AFFG9RE524_VH7Q,17935
164
164
  workbench/repl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
165
- workbench/repl/workbench_shell.py,sha256=ms9nVFfKohK8efmiQ2YbOH1OYBRWLgqbByshkcoKDog,22137
165
+ workbench/repl/workbench_shell.py,sha256=eJ3rpYgEwZjhrVVCaJHht2N5BrimN6mbxqHXGrJmwC8,22130
166
166
  workbench/resources/open_source_api.key,sha256=3S0OTblsmC0msUPdE_dbBmI83xJNmYscuwLJ57JmuOc,433
167
167
  workbench/resources/signature_verify_pub.pem,sha256=V3-u-3_z2PH-805ybkKvzDOBwAbvHxcKn0jLBImEtzM,272
168
168
  workbench/scripts/check_double_bond_stereo.py,sha256=p5hnL54Weq77ES0HCELq9JeoM-PyUGkvVSeWYF2dKyo,7776
@@ -193,13 +193,13 @@ workbench/utils/ai_compound_generator.py,sha256=8no4ufP1LJhQfPqaDHvqUYOAh1kzeDVe
193
193
  workbench/utils/ai_summary.py,sha256=KwutaozocDxrfErodYpFCDmt1c8zVKf3Jnu4UzBMLJU,4155
194
194
  workbench/utils/ai_synth.py,sha256=3mz7NwZeOuNH5ju5n_pikOwfxtVN6LaipLFkAZqoN2U,5607
195
195
  workbench/utils/athena_utils.py,sha256=DDyLhJujzh1PfejtGU7ZzOf5hLPOgoXmi4Lrn-_AJzU,4812
196
- workbench/utils/aws_utils.py,sha256=XckM0vzud7Nx1OxD1GoYGLQxdj1PqeQ43cN66tnrRYI,22002
196
+ workbench/utils/aws_utils.py,sha256=x8c_WxtdSKmBqNg8P_Z6K2m4AsSMEiD_kh2nVaUZ28c,22077
197
197
  workbench/utils/bulk_utils.py,sha256=s1lYN2Uk536MNGetekLYL_VL0N34hUjk1FX9BAz3Qu0,1182
198
198
  workbench/utils/cache.py,sha256=0R5RXYEz_XHARK3anmQC4VRMawMks_cJ8S4vwC2roAE,5524
199
199
  workbench/utils/chem_utils.py,sha256=tLTAvLKTOiYSzbVQF0M8V5-ej36IVgr21CNB2vVJjYQ,56780
200
- workbench/utils/cloudwatch_handler.py,sha256=dtnkr8tXtTRAASQ60QO0lz3SRA5LEbzsK1VCIqblfKs,5157
200
+ workbench/utils/cloudwatch_handler.py,sha256=t0L280Qa1nMq95dwnf8lB5g8FHrQAyGY5S4JwP3yIa8,5165
201
201
  workbench/utils/color_utils.py,sha256=TmDGLK44t975lkfjt_1O-ee02QxrKfke7vPuXb-V-Uo,11779
202
- workbench/utils/config_manager.py,sha256=Yj43Ta67dn34XdOcKcOvXw38ln6TRiv4DePXjPt2jg8,17641
202
+ workbench/utils/config_manager.py,sha256=SBBmO1RGCQ_Zyh91tDxL1HOm5B0v38ImlLnOsEKzXPU,17649
203
203
  workbench/utils/dashboard_metrics.py,sha256=cNFI0GIAjd_IiDzM1oebsJ2QkRZuW068W_66ZC3J100,7398
204
204
  workbench/utils/datetime_utils.py,sha256=r3G_KB2euu26lwVbDXYXPJEpJCZwin2Iph7BiBIoybg,4454
205
205
  workbench/utils/deprecated_utils.py,sha256=qniHVpDGuwOnhxn65LofDQ_EA2OhSUcZLPxAXtx7FgA,3540
@@ -207,7 +207,6 @@ workbench/utils/df_to_endpoint.py,sha256=bIb1CDko8_BClX5wcQuBbmcGH79n3oHUcb_1jdU
207
207
  workbench/utils/ecs_info.py,sha256=Gs9jNb4vcj2pziufIOI4BVIH1J-3XBMtWm1phVh8oRY,2873
208
208
  workbench/utils/endpoint_metrics.py,sha256=_4WVU6cLLuV0t_i0PSvhi0EoA5ss5aDFe7ZDpumx2R8,7822
209
209
  workbench/utils/endpoint_utils.py,sha256=3-njrhMSAIOaEEiH7qMA9vgD3I7J2S9iUAcqXKx3OBo,7104
210
- workbench/utils/execution_environment.py,sha256=n8XJa5-fNXYeUA6YFXdk3Z9ZVwIlmakF6dQpwCl5gK0,6920
211
210
  workbench/utils/extract_model_artifact.py,sha256=sFwkJd5mfJ1PU37pIHVmUIQS-taIUJdqi3D9-qRmy8g,7870
212
211
  workbench/utils/fast_inference.py,sha256=Sm0EV1oPsYYGqiDBVUu3Nj6Ti68JV-UR2S0ZliBDPTk,6148
213
212
  workbench/utils/glue_utils.py,sha256=dslfXQcJ4C-mGmsD6LqeK8vsXBez570t3fZBVZLV7HA,2039
@@ -275,9 +274,9 @@ workbench/web_interface/page_views/main_page.py,sha256=X4-KyGTKLAdxR-Zk2niuLJB2Y
275
274
  workbench/web_interface/page_views/models_page_view.py,sha256=M0bdC7bAzLyIaE2jviY12FF4abdMFZmg6sFuOY_LaGI,2650
276
275
  workbench/web_interface/page_views/page_view.py,sha256=Gh6YnpOGlUejx-bHZAf5pzqoQ1H1R0OSwOpGhOBO06w,455
277
276
  workbench/web_interface/page_views/pipelines_page_view.py,sha256=v2pxrIbsHBcYiblfius3JK766NZ7ciD2yPx0t3E5IJo,2656
278
- workbench-0.8.165.dist-info/licenses/LICENSE,sha256=z4QMMPlLJkZjU8VOKqJkZiQZCEZ--saIU2Z8-p3aVc0,1080
279
- workbench-0.8.165.dist-info/METADATA,sha256=ET9NQ-NKZIP4pJelWJ3R_SqXS4Nn6C1MhQ5CtZIqu1o,9209
280
- workbench-0.8.165.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
281
- workbench-0.8.165.dist-info/entry_points.txt,sha256=oZykkheWiiIBjRE8cS5SdcxwmZKSFaQEGwMBjNh-eNM,238
282
- workbench-0.8.165.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
283
- workbench-0.8.165.dist-info/RECORD,,
277
+ workbench-0.8.166.dist-info/licenses/LICENSE,sha256=z4QMMPlLJkZjU8VOKqJkZiQZCEZ--saIU2Z8-p3aVc0,1080
278
+ workbench-0.8.166.dist-info/METADATA,sha256=a7nuZYqwYcDrQwDLekEr7_Afyxv2IUjxZoD1YwIqzwo,9210
279
+ workbench-0.8.166.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
280
+ workbench-0.8.166.dist-info/entry_points.txt,sha256=oZykkheWiiIBjRE8cS5SdcxwmZKSFaQEGwMBjNh-eNM,238
281
+ workbench-0.8.166.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
282
+ workbench-0.8.166.dist-info/RECORD,,
@@ -1,211 +0,0 @@
1
- """ExecutionEnvironment provides logic/functionality to figure out the current execution environment"""
2
-
3
- import os
4
- import sys
5
- import logging
6
- import requests
7
- from typing import Union
8
- import boto3
9
- from datetime import datetime, timezone
10
-
11
- # Workbench imports
12
- from workbench.utils.glue_utils import get_resolved_options
13
- from workbench.utils.deprecated_utils import deprecated
14
-
15
- # Set up the logger
16
- log = logging.getLogger("workbench")
17
-
18
-
19
- def running_on_glue():
20
- """
21
- Check if the current execution environment is an AWS Glue job.
22
-
23
- Returns:
24
- bool: True if running in AWS Glue environment, False otherwise.
25
- """
26
- # Check if GLUE_VERSION or GLUE_PYTHON_VERSION is in the environment
27
- if "GLUE_VERSION" in os.environ or "GLUE_PYTHON_VERSION" in os.environ:
28
- log.info("Running in AWS Glue Environment...")
29
- return True
30
- else:
31
- return False
32
-
33
-
34
- def running_on_lambda():
35
- """
36
- Check if the current execution environment is an AWS Lambda function.
37
-
38
- Returns:
39
- bool: True if running in AWS Lambda environment, False otherwise.
40
- """
41
- if "AWS_LAMBDA_FUNCTION_NAME" in os.environ:
42
- log.info("Running in AWS Lambda Environment...")
43
- return True
44
- else:
45
- return False
46
-
47
-
48
- def running_on_docker() -> bool:
49
- """Check if the current environment is running on a Docker container.
50
-
51
- Returns:
52
- bool: True if running in a Docker container, False otherwise.
53
- """
54
- try:
55
- # Docker creates a .dockerenv file at the root of the directory tree inside the container.
56
- # If this file exists, it is very likely that we are running inside a Docker container.
57
- with open("/.dockerenv") as f:
58
- return True
59
- except FileNotFoundError:
60
- pass
61
-
62
- try:
63
- # Another method is to check the contents of /proc/self/cgroup which should be different
64
- # inside a Docker container.
65
- with open("/proc/self/cgroup") as f:
66
- if any("docker" in line for line in f):
67
- return True
68
- except FileNotFoundError:
69
- pass
70
-
71
- # Check if we are running on ECS
72
- if running_on_ecs():
73
- return True
74
-
75
- # Probably not running in a Docker container
76
- return False
77
-
78
-
79
- def running_on_ecs() -> bool:
80
- """
81
- Check if the current environment is running on AWS ECS.
82
-
83
- Returns:
84
- bool: True if running on AWS ECS, False otherwise.
85
- """
86
- indicators = [
87
- "ECS_SERVICE_NAME",
88
- "ECS_CONTAINER_METADATA_URI",
89
- "ECS_CONTAINER_METADATA_URI_V4",
90
- "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI",
91
- "AWS_EXECUTION_ENV",
92
- ]
93
- return any(indicator in os.environ for indicator in indicators)
94
-
95
-
96
- def running_as_service() -> bool:
97
- """
98
- Check if the current environment is running as a service (e.g. Docker, ECS, Glue, Lambda).
99
-
100
- Returns:
101
- bool: True if running as a service, False otherwise.
102
- """
103
- return running_on_docker() or running_on_glue() or running_on_lambda()
104
-
105
-
106
- def _glue_job_from_script_name(args):
107
- """Get the Glue Job Name from the script name"""
108
- try:
109
- script_name = args["scriptLocation"]
110
- return os.path.splitext(os.path.basename(script_name))[0]
111
- except Exception:
112
- return "unknown"
113
-
114
-
115
- def glue_job_name():
116
- """Get the Glue Job Name from the environment or script name"""
117
- # Define the required argument
118
- args = get_resolved_options(sys.argv)
119
-
120
- # Get the job name
121
- job_name = args.get("JOB_NAME") or _glue_job_from_script_name(args)
122
- return job_name
123
-
124
-
125
- @deprecated(version=0.9)
126
- def glue_job_run_id(job_name: str, session: boto3.Session) -> Union[str, None]:
127
- """Retrieve the Glue Job Run ID closest to the current time for the given job name.
128
- Note: This mostly doesn't work, it will grab A glue job id but often not the correct one.
129
- For now, I would just skip using this
130
- """
131
- try:
132
- # Set current time in UTC
133
- current_time = datetime.now(timezone.utc)
134
-
135
- job_runs = session.client("glue").get_job_runs(JobName=job_name)
136
- if job_runs["JobRuns"]:
137
- # Find the job run with the StartedOn time closest to the current time
138
- closest_job_run = min(job_runs["JobRuns"], key=lambda run: abs(run["StartedOn"] - current_time))
139
- job_id = closest_job_run["Id"]
140
- return job_id[:9] # Shorten the Job Run ID to 9 characters
141
-
142
- log.error(f"No runs found for Glue Job '{job_name}', returning None for Job Run ID.")
143
- return None
144
-
145
- except session.client("glue").exceptions.EntityNotFoundException:
146
- log.error(f"Glue Job '{job_name}' not found, returning None for Job Run ID.")
147
- return None
148
- except Exception as e:
149
- log.error(f"An error occurred while retrieving job run ID: {e}")
150
- return None
151
-
152
-
153
- def ecs_job_name():
154
- """Get the ECS Job Name from the metadata endpoint or environment variables."""
155
- # Attempt to get the job name from ECS metadata
156
- ecs_metadata_uri = os.environ.get("ECS_CONTAINER_METADATA_URI_V4")
157
-
158
- if ecs_metadata_uri:
159
- try:
160
- response = requests.get(f"{ecs_metadata_uri}/task")
161
- if response.status_code == 200:
162
- metadata = response.json()
163
- job_name = metadata.get("Family") # 'Family' represents the ECS task definition family name
164
- if job_name:
165
- return job_name
166
- except requests.RequestException as e:
167
- # Log the error or handle it as needed
168
- log.error(f"Failed to fetch ECS metadata: {e}")
169
-
170
- # Fallback to environment variables if metadata is not available
171
- job_name = os.environ.get("ECS_SERVICE_NAME", "unknown")
172
- return job_name
173
-
174
-
175
- if __name__ == "__main__":
176
- """Test the Execution Environment utilities"""
177
-
178
- # Test running_on_glue
179
- assert running_on_glue() is False
180
- os.environ["GLUE_VERSION"] = "1.0"
181
- assert running_on_glue() is True
182
- del os.environ["GLUE_VERSION"]
183
-
184
- # Test running_on_lambda
185
- assert running_on_lambda() is False
186
- os.environ["AWS_LAMBDA_FUNCTION_NAME"] = "my_lambda_function"
187
- assert running_on_lambda() is True
188
- del os.environ["AWS_LAMBDA_FUNCTION_NAME"]
189
-
190
- # Test running_on_docker
191
- assert running_on_docker() is False
192
- os.environ["ECS_CONTAINER_METADATA_URI"] = "http://localhost:8080"
193
- assert running_on_docker() is True
194
- del os.environ["ECS_CONTAINER_METADATA_URI"]
195
-
196
- # Test running_on_ecs
197
- assert running_on_ecs() is False
198
- os.environ["ECS_CONTAINER_METADATA_URI"] = "http://localhost:8080"
199
- assert running_on_ecs() is True
200
- del os.environ["ECS_CONTAINER_METADATA_URI"]
201
-
202
- # Test getting the Glue Job Name
203
- print(glue_job_name())
204
-
205
- # Test getting the Glue Job Run ID
206
- from workbench.core.cloud_platform.aws.aws_session import AWSSession
207
-
208
- session = AWSSession().boto3_session
209
- print(glue_job_run_id("Test_Workbench_Shell", session))
210
-
211
- print("All tests passed!")