workbench 0.8.236__py3-none-any.whl → 0.8.243__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- workbench/api/model.py +2 -2
- workbench/core/artifacts/athena_source.py +5 -3
- workbench/core/artifacts/endpoint_core.py +1 -1
- workbench/core/cloud_platform/aws/aws_meta.py +2 -1
- workbench/core/transforms/features_to_model/features_to_model.py +2 -2
- workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +26 -14
- workbench/model_script_utils/pytorch_utils.py +2 -2
- workbench/model_scripts/chemprop/chemprop.template +8 -5
- workbench/model_scripts/chemprop/generated_model_script.py +10 -7
- workbench/model_scripts/pytorch_model/generated_model_script.py +8 -8
- workbench/model_scripts/pytorch_model/pytorch.template +1 -1
- workbench/model_scripts/pytorch_model/pytorch_utils.py +2 -2
- workbench/scripts/ml_pipeline_batch.py +47 -2
- workbench/scripts/ml_pipeline_launcher.py +410 -0
- workbench/scripts/ml_pipeline_sqs.py +22 -2
- workbench/web_interface/components/plugins/ag_table.py +4 -11
- {workbench-0.8.236.dist-info → workbench-0.8.243.dist-info}/METADATA +2 -2
- {workbench-0.8.236.dist-info → workbench-0.8.243.dist-info}/RECORD +22 -21
- {workbench-0.8.236.dist-info → workbench-0.8.243.dist-info}/WHEEL +1 -1
- {workbench-0.8.236.dist-info → workbench-0.8.243.dist-info}/entry_points.txt +1 -0
- {workbench-0.8.236.dist-info → workbench-0.8.243.dist-info}/licenses/LICENSE +0 -0
- {workbench-0.8.236.dist-info → workbench-0.8.243.dist-info}/top_level.txt +0 -0
workbench/api/model.py
CHANGED
|
@@ -44,7 +44,7 @@ class Model(ModelCore):
|
|
|
44
44
|
serverless: bool = True,
|
|
45
45
|
mem_size: int = 2048,
|
|
46
46
|
max_concurrency: int = 5,
|
|
47
|
-
instance: str =
|
|
47
|
+
instance: str = None,
|
|
48
48
|
data_capture: bool = False,
|
|
49
49
|
) -> Endpoint:
|
|
50
50
|
"""Create an Endpoint from the Model.
|
|
@@ -55,7 +55,7 @@ class Model(ModelCore):
|
|
|
55
55
|
serverless (bool): Set the endpoint to be serverless (default: True)
|
|
56
56
|
mem_size (int): The memory size for the Endpoint in MB (default: 2048)
|
|
57
57
|
max_concurrency (int): The maximum concurrency for the Endpoint (default: 5)
|
|
58
|
-
instance (str): The instance type
|
|
58
|
+
instance (str): The instance type for Realtime Endpoints (default: None = auto-select based on model)
|
|
59
59
|
data_capture (bool): Enable data capture for the Endpoint (default: False)
|
|
60
60
|
|
|
61
61
|
Returns:
|
|
@@ -258,7 +258,7 @@ class AthenaSource(DataSourceAbstract):
|
|
|
258
258
|
|
|
259
259
|
# Wait for the query to complete
|
|
260
260
|
wr.athena.wait_query(query_execution_id=query_execution_id, boto3_session=self.boto3_session)
|
|
261
|
-
self.log.debug(f"
|
|
261
|
+
self.log.debug(f"Query executed successfully: {query_execution_id}")
|
|
262
262
|
break # If successful, exit the retry loop
|
|
263
263
|
except wr.exceptions.QueryFailed as e:
|
|
264
264
|
if "AlreadyExistsException" in str(e):
|
|
@@ -271,11 +271,13 @@ class AthenaSource(DataSourceAbstract):
|
|
|
271
271
|
time.sleep(retry_delay)
|
|
272
272
|
else:
|
|
273
273
|
if not silence_errors:
|
|
274
|
-
self.log.critical(f"Failed to execute
|
|
274
|
+
self.log.critical(f"Failed to execute query after {max_retries} attempts: {query}")
|
|
275
|
+
self.log.critical(f"Error: {e}")
|
|
275
276
|
raise
|
|
276
277
|
else:
|
|
277
278
|
if not silence_errors:
|
|
278
|
-
self.log.critical(f"Failed to execute
|
|
279
|
+
self.log.critical(f"Failed to execute query: {query}")
|
|
280
|
+
self.log.critical(f"Error: {e}")
|
|
279
281
|
raise
|
|
280
282
|
|
|
281
283
|
def s3_storage_location(self) -> str:
|
|
@@ -1138,7 +1138,7 @@ class EndpointCore(Artifact):
|
|
|
1138
1138
|
cls.log.error("Error deleting endpoint.")
|
|
1139
1139
|
raise e
|
|
1140
1140
|
|
|
1141
|
-
time.sleep(
|
|
1141
|
+
time.sleep(10) # Final sleep for AWS to fully register deletions
|
|
1142
1142
|
|
|
1143
1143
|
@classmethod
|
|
1144
1144
|
def delete_endpoint_models(cls, endpoint_name: str):
|
|
@@ -245,7 +245,8 @@ class AWSMeta:
|
|
|
245
245
|
"Model Group": model_group_name,
|
|
246
246
|
"Health": health_tags,
|
|
247
247
|
"Owner": aws_tags.get("workbench_owner", "-"),
|
|
248
|
-
"
|
|
248
|
+
"Type": aws_tags.get("workbench_model_type", "-"),
|
|
249
|
+
"Framework": aws_tags.get("workbench_model_framework", "-"),
|
|
249
250
|
"Created": created,
|
|
250
251
|
"Ver": model_details.get("ModelPackageVersion", "-"),
|
|
251
252
|
"Input": aws_tags.get("workbench_input", "-"),
|
|
@@ -247,9 +247,9 @@ class FeaturesToModel(Transform):
|
|
|
247
247
|
# Create a Sagemaker Model with our script
|
|
248
248
|
image = ModelImages.get_image_uri(self.sm_session.boto_region_name, self.training_image)
|
|
249
249
|
|
|
250
|
-
# Use GPU instance for ChemProp/PyTorch
|
|
250
|
+
# Use GPU instance for ChemProp/PyTorch
|
|
251
251
|
if self.model_framework in [ModelFramework.CHEMPROP, ModelFramework.PYTORCH]:
|
|
252
|
-
train_instance_type = "ml.g6.xlarge" # NVIDIA L4 GPU, ~$
|
|
252
|
+
train_instance_type = "ml.g6.xlarge" # NVIDIA L4 GPU, ~$1.00/hr
|
|
253
253
|
self.log.important(f"Using GPU instance {train_instance_type} for {self.model_framework.value}")
|
|
254
254
|
else:
|
|
255
255
|
train_instance_type = "ml.m5.xlarge"
|
|
@@ -26,13 +26,13 @@ class ModelToEndpoint(Transform):
|
|
|
26
26
|
```
|
|
27
27
|
"""
|
|
28
28
|
|
|
29
|
-
def __init__(self, model_name: str, endpoint_name: str, serverless: bool = True, instance: str =
|
|
29
|
+
def __init__(self, model_name: str, endpoint_name: str, serverless: bool = True, instance: str = None):
|
|
30
30
|
"""ModelToEndpoint Initialization
|
|
31
31
|
Args:
|
|
32
32
|
model_name(str): The Name of the input Model
|
|
33
33
|
endpoint_name(str): The Name of the output Endpoint
|
|
34
34
|
serverless(bool): Deploy the Endpoint in serverless mode (default: True)
|
|
35
|
-
instance(str): The instance type
|
|
35
|
+
instance(str): The instance type for Realtime Endpoints (default: None = auto-select)
|
|
36
36
|
"""
|
|
37
37
|
# Make sure the endpoint_name is a valid name
|
|
38
38
|
Artifact.is_name_valid(endpoint_name, delimiter="-", lower_case=False)
|
|
@@ -42,7 +42,7 @@ class ModelToEndpoint(Transform):
|
|
|
42
42
|
|
|
43
43
|
# Set up all my instance attributes
|
|
44
44
|
self.serverless = serverless
|
|
45
|
-
self.
|
|
45
|
+
self.instance = instance
|
|
46
46
|
self.input_type = TransformInput.MODEL
|
|
47
47
|
self.output_type = TransformOutput.ENDPOINT
|
|
48
48
|
|
|
@@ -100,24 +100,36 @@ class ModelToEndpoint(Transform):
|
|
|
100
100
|
# Get the metadata/tags to push into AWS
|
|
101
101
|
aws_tags = self.get_aws_tags()
|
|
102
102
|
|
|
103
|
+
# Check the model framework for resource requirements
|
|
104
|
+
from workbench.api import ModelFramework
|
|
105
|
+
|
|
106
|
+
self.log.info(f"Model Framework: {workbench_model.model_framework}")
|
|
107
|
+
needs_more_resources = workbench_model.model_framework in [ModelFramework.PYTORCH, ModelFramework.CHEMPROP]
|
|
108
|
+
|
|
103
109
|
# Is this a serverless deployment?
|
|
104
110
|
serverless_config = None
|
|
105
111
|
if self.serverless:
|
|
106
112
|
# For PyTorch or ChemProp we need at least 4GB of memory
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
if workbench_model.model_framework in [ModelFramework.PYTORCH, ModelFramework.CHEMPROP]:
|
|
111
|
-
if mem_size < 4096:
|
|
112
|
-
self.log.important(
|
|
113
|
-
f"{workbench_model.model_framework} needs at least 4GB of memory (setting to 4GB)"
|
|
114
|
-
)
|
|
115
|
-
mem_size = 4096
|
|
113
|
+
if needs_more_resources and mem_size < 4096:
|
|
114
|
+
self.log.important(f"{workbench_model.model_framework} needs at least 4GB of memory (setting to 4GB)")
|
|
115
|
+
mem_size = 4096
|
|
116
116
|
serverless_config = ServerlessInferenceConfig(
|
|
117
117
|
memory_size_in_mb=mem_size,
|
|
118
118
|
max_concurrency=max_concurrency,
|
|
119
119
|
)
|
|
120
|
+
instance_type = "serverless"
|
|
120
121
|
self.log.important(f"Serverless Config: Memory={mem_size}MB, MaxConcurrency={max_concurrency}")
|
|
122
|
+
else:
|
|
123
|
+
# For realtime endpoints, use explicit instance if provided, otherwise auto-select
|
|
124
|
+
if self.instance:
|
|
125
|
+
instance_type = self.instance
|
|
126
|
+
self.log.important(f"Realtime Endpoint: Using specified instance type: {instance_type}")
|
|
127
|
+
elif needs_more_resources:
|
|
128
|
+
instance_type = "ml.c7i.large"
|
|
129
|
+
self.log.important(f"{workbench_model.model_framework} needs more resources (using {instance_type})")
|
|
130
|
+
else:
|
|
131
|
+
instance_type = "ml.t2.medium"
|
|
132
|
+
self.log.important(f"Realtime Endpoint: Instance Type={instance_type}")
|
|
121
133
|
|
|
122
134
|
# Configure data capture if requested (and not serverless)
|
|
123
135
|
data_capture_config = None
|
|
@@ -141,7 +153,7 @@ class ModelToEndpoint(Transform):
|
|
|
141
153
|
try:
|
|
142
154
|
model_package.deploy(
|
|
143
155
|
initial_instance_count=1,
|
|
144
|
-
instance_type=
|
|
156
|
+
instance_type=instance_type,
|
|
145
157
|
serverless_inference_config=serverless_config,
|
|
146
158
|
endpoint_name=self.output_name,
|
|
147
159
|
serializer=CSVSerializer(),
|
|
@@ -158,7 +170,7 @@ class ModelToEndpoint(Transform):
|
|
|
158
170
|
# Retry the deploy
|
|
159
171
|
model_package.deploy(
|
|
160
172
|
initial_instance_count=1,
|
|
161
|
-
instance_type=
|
|
173
|
+
instance_type=instance_type,
|
|
162
174
|
serverless_inference_config=serverless_config,
|
|
163
175
|
endpoint_name=self.output_name,
|
|
164
176
|
serializer=CSVSerializer(),
|
|
@@ -266,8 +266,8 @@ def train_model(
|
|
|
266
266
|
train_dataset = TensorDataset(train_x_cont, dummy_cat, train_y)
|
|
267
267
|
val_dataset = TensorDataset(val_x_cont, dummy_val_cat, val_y)
|
|
268
268
|
|
|
269
|
-
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
|
|
270
|
-
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
|
|
269
|
+
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
|
|
270
|
+
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)
|
|
271
271
|
|
|
272
272
|
# Loss and optimizer
|
|
273
273
|
if task == "classification":
|
|
@@ -34,7 +34,7 @@ DEFAULT_HYPERPARAMETERS = {
|
|
|
34
34
|
"n_folds": 5,
|
|
35
35
|
"max_epochs": 400,
|
|
36
36
|
"patience": 50,
|
|
37
|
-
"batch_size":
|
|
37
|
+
"batch_size": 64,
|
|
38
38
|
# Message Passing (ignored when using foundation model)
|
|
39
39
|
"hidden_dim": 700,
|
|
40
40
|
"depth": 6,
|
|
@@ -220,7 +220,9 @@ def predict_fn(df: pd.DataFrame, model_dict: dict) -> pd.DataFrame:
|
|
|
220
220
|
return df
|
|
221
221
|
|
|
222
222
|
dataset = data.MoleculeDataset(datapoints)
|
|
223
|
-
|
|
223
|
+
# Note: Use dataset length as batch_size to prevent ChemProp's build_dataloader from
|
|
224
|
+
# dropping single-sample batches (its drop_last logic triggers when len(dataset) % batch_size == 1)
|
|
225
|
+
dataloader = data.build_dataloader(dataset, shuffle=False, batch_size=len(dataset))
|
|
224
226
|
|
|
225
227
|
# Ensemble predictions using direct PyTorch inference (no Lightning Trainer)
|
|
226
228
|
all_preds = []
|
|
@@ -588,8 +590,9 @@ if __name__ == "__main__":
|
|
|
588
590
|
val_dataset.normalize_targets(target_scaler)
|
|
589
591
|
output_transform = nn.UnscaleTransform.from_standard_scaler(target_scaler)
|
|
590
592
|
|
|
591
|
-
|
|
592
|
-
|
|
593
|
+
num_workers = min(os.cpu_count() or 4, 8) # Scale with CPUs, cap at 8
|
|
594
|
+
train_loader = data.build_dataloader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, persistent_workers=num_workers > 0, pin_memory=True, prefetch_factor=2)
|
|
595
|
+
val_loader = data.build_dataloader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, persistent_workers=num_workers > 0, pin_memory=True, prefetch_factor=2)
|
|
593
596
|
|
|
594
597
|
# Build model
|
|
595
598
|
pl.seed_everything(hyperparameters["seed"] + fold_idx)
|
|
@@ -615,7 +618,7 @@ if __name__ == "__main__":
|
|
|
615
618
|
callbacks.append(pl.callbacks.ModelCheckpoint(
|
|
616
619
|
dirpath=args.model_dir, filename=f"best_{fold_idx}", monitor="val_loss", mode="min", save_top_k=1
|
|
617
620
|
))
|
|
618
|
-
return pl.Trainer(accelerator="auto", max_epochs=max_epochs, logger=False, enable_progress_bar=True, callbacks=callbacks)
|
|
621
|
+
return pl.Trainer(accelerator="auto", max_epochs=max_epochs, precision="16-mixed", logger=False, enable_progress_bar=True, callbacks=callbacks)
|
|
619
622
|
|
|
620
623
|
if use_two_phase:
|
|
621
624
|
# Phase 1: Freeze MPNN, train FFN only
|
|
@@ -45,10 +45,10 @@ DEFAULT_HYPERPARAMETERS = {
|
|
|
45
45
|
# Loss function for regression (mae, mse)
|
|
46
46
|
"criterion": "mae",
|
|
47
47
|
# Split strategy: "random", "scaffold", or "butina"
|
|
48
|
-
# - random: Standard random split
|
|
48
|
+
# - random: Standard random split (default)
|
|
49
49
|
# - scaffold: Bemis-Murcko scaffold-based grouping
|
|
50
50
|
# - butina: Morgan fingerprint clustering (recommended for ADMET)
|
|
51
|
-
"split_strategy": "
|
|
51
|
+
"split_strategy": "random",
|
|
52
52
|
"butina_cutoff": 0.4, # Tanimoto distance cutoff for Butina clustering
|
|
53
53
|
# Random seed
|
|
54
54
|
"seed": 42,
|
|
@@ -65,11 +65,11 @@ DEFAULT_HYPERPARAMETERS = {
|
|
|
65
65
|
# Template parameters (filled in by Workbench)
|
|
66
66
|
TEMPLATE_PARAMS = {
|
|
67
67
|
"model_type": "uq_regressor",
|
|
68
|
-
"targets": ['
|
|
68
|
+
"targets": ['udm_asy_res_extraction_percent'],
|
|
69
69
|
"feature_list": ['smiles'],
|
|
70
|
-
"id_column": "
|
|
71
|
-
"model_metrics_s3_path": "s3://
|
|
72
|
-
"hyperparameters": {
|
|
70
|
+
"id_column": "udm_mol_bat_id",
|
|
71
|
+
"model_metrics_s3_path": "s3://idb-prod-sageworks-artifacts/models/hlm-extraction-reg-chemprop-1-260128/training",
|
|
72
|
+
"hyperparameters": {},
|
|
73
73
|
}
|
|
74
74
|
|
|
75
75
|
|
|
@@ -220,7 +220,9 @@ def predict_fn(df: pd.DataFrame, model_dict: dict) -> pd.DataFrame:
|
|
|
220
220
|
return df
|
|
221
221
|
|
|
222
222
|
dataset = data.MoleculeDataset(datapoints)
|
|
223
|
-
|
|
223
|
+
# Note: Use dataset length as batch_size to prevent ChemProp's build_dataloader from
|
|
224
|
+
# dropping single-sample batches (its drop_last logic triggers when len(dataset) % batch_size == 1)
|
|
225
|
+
dataloader = data.build_dataloader(dataset, shuffle=False, batch_size=len(dataset))
|
|
224
226
|
|
|
225
227
|
# Ensemble predictions using direct PyTorch inference (no Lightning Trainer)
|
|
226
228
|
all_preds = []
|
|
@@ -752,6 +754,7 @@ if __name__ == "__main__":
|
|
|
752
754
|
output_columns += [f"{t}_pred" for t in target_columns] + [f"{t}_pred_std" for t in target_columns]
|
|
753
755
|
output_columns += ["prediction", "prediction_std", "confidence"]
|
|
754
756
|
output_columns += [c for c in df_val.columns if c.endswith("_proba")]
|
|
757
|
+
|
|
755
758
|
output_columns = [c for c in output_columns if c in df_val.columns]
|
|
756
759
|
|
|
757
760
|
wr.s3.to_csv(df_val[output_columns], f"{model_metrics_s3_path}/validation_predictions.csv", index=False)
|
|
@@ -54,10 +54,10 @@ DEFAULT_HYPERPARAMETERS = {
|
|
|
54
54
|
# Loss function for regression (L1Loss=MAE, MSELoss=MSE, HuberLoss, SmoothL1Loss)
|
|
55
55
|
"loss": "L1Loss",
|
|
56
56
|
# Split strategy: "random", "scaffold", or "butina"
|
|
57
|
-
# - random: Standard random split
|
|
57
|
+
# - random: Standard random split (default)
|
|
58
58
|
# - scaffold: Bemis-Murcko scaffold-based grouping (requires 'smiles' column in data)
|
|
59
59
|
# - butina: Morgan fingerprint clustering (requires 'smiles' column, recommended for ADMET)
|
|
60
|
-
"split_strategy": "
|
|
60
|
+
"split_strategy": "random",
|
|
61
61
|
"butina_cutoff": 0.4, # Tanimoto distance cutoff for Butina clustering
|
|
62
62
|
# Random seed
|
|
63
63
|
"seed": 42,
|
|
@@ -65,13 +65,13 @@ DEFAULT_HYPERPARAMETERS = {
|
|
|
65
65
|
|
|
66
66
|
# Template parameters (filled in by Workbench)
|
|
67
67
|
TEMPLATE_PARAMS = {
|
|
68
|
-
"model_type": "
|
|
69
|
-
"target": "
|
|
68
|
+
"model_type": "classifier",
|
|
69
|
+
"target": "class",
|
|
70
70
|
"features": ['chi2v', 'fr_sulfone', 'chi1v', 'bcut2d_logplow', 'fr_piperzine', 'kappa3', 'smr_vsa1', 'slogp_vsa5', 'fr_ketone_topliss', 'fr_sulfonamd', 'fr_imine', 'fr_benzene', 'fr_ester', 'chi2n', 'labuteasa', 'peoe_vsa2', 'smr_vsa6', 'bcut2d_chglo', 'fr_sh', 'peoe_vsa1', 'fr_allylic_oxid', 'chi4n', 'fr_ar_oh', 'fr_nh0', 'fr_term_acetylene', 'slogp_vsa7', 'slogp_vsa4', 'estate_vsa1', 'vsa_estate4', 'numbridgeheadatoms', 'numheterocycles', 'fr_ketone', 'fr_morpholine', 'fr_guanido', 'estate_vsa2', 'numheteroatoms', 'fr_nitro_arom_nonortho', 'fr_piperdine', 'nocount', 'numspiroatoms', 'fr_aniline', 'fr_thiophene', 'slogp_vsa10', 'fr_amide', 'slogp_vsa2', 'fr_epoxide', 'vsa_estate7', 'fr_ar_coo', 'fr_imidazole', 'fr_nitrile', 'fr_oxazole', 'numsaturatedrings', 'fr_pyridine', 'fr_hoccn', 'fr_ndealkylation1', 'numaliphaticheterocycles', 'fr_phenol', 'maxpartialcharge', 'vsa_estate5', 'peoe_vsa13', 'minpartialcharge', 'qed', 'fr_al_oh', 'slogp_vsa11', 'chi0n', 'fr_bicyclic', 'peoe_vsa12', 'fpdensitymorgan1', 'fr_oxime', 'molwt', 'fr_dihydropyridine', 'smr_vsa5', 'peoe_vsa5', 'fr_nitro', 'hallkieralpha', 'heavyatommolwt', 'fr_alkyl_halide', 'peoe_vsa8', 'fr_nhpyrrole', 'fr_isocyan', 'bcut2d_chghi', 'fr_lactam', 'peoe_vsa11', 'smr_vsa9', 'tpsa', 'chi4v', 'slogp_vsa1', 'phi', 'bcut2d_logphi', 'avgipc', 'estate_vsa11', 'fr_coo', 'bcut2d_mwhi', 'numunspecifiedatomstereocenters', 'vsa_estate10', 'estate_vsa8', 'numvalenceelectrons', 'fr_nh2', 'fr_lactone', 'vsa_estate1', 'estate_vsa4', 'numatomstereocenters', 'vsa_estate8', 'fr_para_hydroxylation', 'peoe_vsa3', 'fr_thiazole', 'peoe_vsa10', 'fr_ndealkylation2', 'slogp_vsa12', 'peoe_vsa9', 'maxestateindex', 'fr_quatn', 'smr_vsa7', 'minestateindex', 'numaromaticheterocycles', 'numrotatablebonds', 'fr_ar_nh', 'fr_ether', 'exactmolwt', 'fr_phenol_noorthohbond', 'slogp_vsa3', 'fr_ar_n', 'sps', 'fr_c_o_nocoo', 'bertzct', 'peoe_vsa7', 'slogp_vsa8', 'numradicalelectrons', 'molmr', 'fr_tetrazole', 'numsaturatedcarbocycles', 'bcut2d_mrhi', 'kappa1', 'numamidebonds', 'fpdensitymorgan2', 'smr_vsa8', 'chi1n', 'estate_vsa6', 'fr_barbitur', 'fr_diazo', 'kappa2', 'chi0', 'bcut2d_mrlow', 'balabanj', 'peoe_vsa4', 'numhacceptors', 'fr_sulfide', 'chi3n', 'smr_vsa2', 'fr_al_oh_notert', 'fr_benzodiazepine', 'fr_phos_ester', 'fr_aldehyde', 'fr_coo2', 'estate_vsa5', 'fr_prisulfonamd', 'numaromaticcarbocycles', 'fr_unbrch_alkane', 'fr_urea', 'fr_nitroso', 'smr_vsa10', 'fr_c_s', 'smr_vsa3', 'fr_methoxy', 'maxabspartialcharge', 'slogp_vsa9', 'heavyatomcount', 'fr_azide', 'chi3v', 'smr_vsa4', 'mollogp', 'chi0v', 'fr_aryl_methyl', 'fr_nh1', 'fpdensitymorgan3', 'fr_furan', 'fr_hdrzine', 'fr_arn', 'numaromaticrings', 'vsa_estate3', 'fr_azo', 'fr_halogen', 'estate_vsa9', 'fr_hdrzone', 'numhdonors', 'fr_alkyl_carbamate', 'fr_isothiocyan', 'minabspartialcharge', 'fr_al_coo', 'ringcount', 'chi1', 'estate_vsa7', 'fr_nitro_arom', 'vsa_estate9', 'minabsestateindex', 'maxabsestateindex', 'vsa_estate6', 'estate_vsa10', 'estate_vsa3', 'fr_n_o', 'fr_amidine', 'fr_thiocyan', 'fr_phos_acid', 'fr_c_o', 'fr_imide', 'numaliphaticrings', 'peoe_vsa6', 'vsa_estate2', 'nhohcount', 'numsaturatedheterocycles', 'slogp_vsa6', 'peoe_vsa14', 'fractioncsp3', 'bcut2d_mwlow', 'numaliphaticcarbocycles', 'fr_priamide', 'nacid', 'nbase', 'naromatom', 'narombond', 'sz', 'sm', 'sv', 'sse', 'spe', 'sare', 'sp', 'si', 'mz', 'mm', 'mv', 'mse', 'mpe', 'mare', 'mp', 'mi', 'xch_3d', 'xch_4d', 'xch_5d', 'xch_6d', 'xch_7d', 'xch_3dv', 'xch_4dv', 'xch_5dv', 'xch_6dv', 'xch_7dv', 'xc_3d', 'xc_4d', 'xc_5d', 'xc_6d', 'xc_3dv', 'xc_4dv', 'xc_5dv', 'xc_6dv', 'xpc_4d', 'xpc_5d', 'xpc_6d', 'xpc_4dv', 'xpc_5dv', 'xpc_6dv', 'xp_0d', 'xp_1d', 'xp_2d', 'xp_3d', 'xp_4d', 'xp_5d', 'xp_6d', 'xp_7d', 'axp_0d', 'axp_1d', 'axp_2d', 'axp_3d', 'axp_4d', 'axp_5d', 'axp_6d', 'axp_7d', 'xp_0dv', 'xp_1dv', 'xp_2dv', 'xp_3dv', 'xp_4dv', 'xp_5dv', 'xp_6dv', 'xp_7dv', 'axp_0dv', 'axp_1dv', 'axp_2dv', 'axp_3dv', 'axp_4dv', 'axp_5dv', 'axp_6dv', 'axp_7dv', 'c1sp1', 'c2sp1', 'c1sp2', 'c2sp2', 'c3sp2', 'c1sp3', 'c2sp3', 'c3sp3', 'c4sp3', 'hybratio', 'fcsp3', 'num_stereocenters', 'num_unspecified_stereocenters', 'num_defined_stereocenters', 'num_r_centers', 'num_s_centers', 'num_stereobonds', 'num_e_bonds', 'num_z_bonds', 'stereo_complexity', 'frac_defined_stereo'],
|
|
71
|
-
"id_column": "
|
|
72
|
-
"compressed_features": [
|
|
73
|
-
"model_metrics_s3_path": "s3://
|
|
74
|
-
"hyperparameters": {
|
|
71
|
+
"id_column": "udm_mol_bat_id",
|
|
72
|
+
"compressed_features": [],
|
|
73
|
+
"model_metrics_s3_path": "s3://ideaya-sageworks-bucket/models/pka-b1-value-class-pytorch-1-dt/training",
|
|
74
|
+
"hyperparameters": {},
|
|
75
75
|
}
|
|
76
76
|
|
|
77
77
|
|
|
@@ -45,7 +45,7 @@ DEFAULT_HYPERPARAMETERS = {
|
|
|
45
45
|
"n_folds": 5,
|
|
46
46
|
"max_epochs": 200,
|
|
47
47
|
"early_stopping_patience": 30,
|
|
48
|
-
"batch_size":
|
|
48
|
+
"batch_size": 64,
|
|
49
49
|
# Model architecture (larger capacity - ensemble provides regularization)
|
|
50
50
|
"layers": "512-256-128",
|
|
51
51
|
"learning_rate": 1e-3,
|
|
@@ -266,8 +266,8 @@ def train_model(
|
|
|
266
266
|
train_dataset = TensorDataset(train_x_cont, dummy_cat, train_y)
|
|
267
267
|
val_dataset = TensorDataset(val_x_cont, dummy_val_cat, val_y)
|
|
268
268
|
|
|
269
|
-
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
|
|
270
|
-
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
|
|
269
|
+
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
|
|
270
|
+
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)
|
|
271
271
|
|
|
272
272
|
# Loss and optimizer
|
|
273
273
|
if task == "classification":
|
|
@@ -44,7 +44,14 @@ def _log_cloudwatch_link(job: dict, message_prefix: str = "View logs") -> None:
|
|
|
44
44
|
log.info("Check AWS Batch console for logs")
|
|
45
45
|
|
|
46
46
|
|
|
47
|
-
def run_batch_job(
|
|
47
|
+
def run_batch_job(
|
|
48
|
+
script_path: str,
|
|
49
|
+
size: str = "small",
|
|
50
|
+
realtime: bool = False,
|
|
51
|
+
dt: bool = False,
|
|
52
|
+
promote: bool = False,
|
|
53
|
+
test_promote: bool = False,
|
|
54
|
+
) -> int:
|
|
48
55
|
"""
|
|
49
56
|
Submit and monitor an AWS Batch job for ML pipeline execution.
|
|
50
57
|
|
|
@@ -56,6 +63,10 @@ def run_batch_job(script_path: str, size: str = "small") -> int:
|
|
|
56
63
|
- small: 2 vCPU, 4GB RAM for lightweight processing
|
|
57
64
|
- medium: 4 vCPU, 8GB RAM for standard ML workloads
|
|
58
65
|
- large: 8 vCPU, 16GB RAM for heavy training/inference
|
|
66
|
+
realtime: If True, sets serverless=False for real-time processing (default: False)
|
|
67
|
+
dt: If True, sets DT=True in environment (default: False)
|
|
68
|
+
promote: If True, sets PROMOTE=True in environment (default: False)
|
|
69
|
+
test_promote: If True, sets TEST_PROMOTE=True in environment (default: False)
|
|
59
70
|
|
|
60
71
|
Returns:
|
|
61
72
|
Exit code (0 for success/disconnected, non-zero for failure)
|
|
@@ -81,6 +92,10 @@ def run_batch_job(script_path: str, size: str = "small") -> int:
|
|
|
81
92
|
"environment": [
|
|
82
93
|
{"name": "ML_PIPELINE_S3_PATH", "value": s3_path},
|
|
83
94
|
{"name": "WORKBENCH_BUCKET", "value": workbench_bucket},
|
|
95
|
+
{"name": "SERVERLESS", "value": "False" if realtime else "True"},
|
|
96
|
+
{"name": "DT", "value": str(dt)},
|
|
97
|
+
{"name": "PROMOTE", "value": str(promote)},
|
|
98
|
+
{"name": "TEST_PROMOTE", "value": str(test_promote)},
|
|
84
99
|
]
|
|
85
100
|
},
|
|
86
101
|
)
|
|
@@ -124,9 +139,39 @@ def main():
|
|
|
124
139
|
"""CLI entry point for running ML pipelines on AWS Batch."""
|
|
125
140
|
parser = argparse.ArgumentParser(description="Run ML pipeline script on AWS Batch")
|
|
126
141
|
parser.add_argument("script_file", help="Local path to ML pipeline script")
|
|
142
|
+
parser.add_argument(
|
|
143
|
+
"--size", default="small", choices=["small", "medium", "large"], help="Job size tier (default: small)"
|
|
144
|
+
)
|
|
145
|
+
parser.add_argument(
|
|
146
|
+
"--realtime",
|
|
147
|
+
action="store_true",
|
|
148
|
+
help="Create realtime endpoints (default is serverless)",
|
|
149
|
+
)
|
|
150
|
+
parser.add_argument(
|
|
151
|
+
"--dt",
|
|
152
|
+
action="store_true",
|
|
153
|
+
help="Set DT=True (models and endpoints will have '-dt' suffix)",
|
|
154
|
+
)
|
|
155
|
+
parser.add_argument(
|
|
156
|
+
"--promote",
|
|
157
|
+
action="store_true",
|
|
158
|
+
help="Set Promote=True (models and endpoints will use promoted naming)",
|
|
159
|
+
)
|
|
160
|
+
parser.add_argument(
|
|
161
|
+
"--test-promote",
|
|
162
|
+
action="store_true",
|
|
163
|
+
help="Set TEST_PROMOTE=True (creates test endpoint with '-test' suffix)",
|
|
164
|
+
)
|
|
127
165
|
args = parser.parse_args()
|
|
128
166
|
try:
|
|
129
|
-
exit_code = run_batch_job(
|
|
167
|
+
exit_code = run_batch_job(
|
|
168
|
+
args.script_file,
|
|
169
|
+
size=args.size,
|
|
170
|
+
realtime=args.realtime,
|
|
171
|
+
dt=args.dt,
|
|
172
|
+
promote=args.promote,
|
|
173
|
+
test_promote=args.test_promote,
|
|
174
|
+
)
|
|
130
175
|
exit(exit_code)
|
|
131
176
|
except Exception as e:
|
|
132
177
|
log.error(f"Error: {e}")
|
|
@@ -0,0 +1,410 @@
|
|
|
1
|
+
"""Launch ML pipelines via SQS for testing.
|
|
2
|
+
|
|
3
|
+
Run this from a directory containing pipeline subdirectories (e.g., ml_pipelines/).
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
ml_pipeline_launcher --dt # Launch 1 random pipeline group (all scripts in a directory)
|
|
7
|
+
ml_pipeline_launcher --dt -n 3 # Launch 3 random pipeline groups
|
|
8
|
+
ml_pipeline_launcher --dt --all # Launch ALL pipelines
|
|
9
|
+
ml_pipeline_launcher --dt caco2 # Launch pipelines matching 'caco2'
|
|
10
|
+
ml_pipeline_launcher --dt caco2 ppb # Launch pipelines matching 'caco2' or 'ppb'
|
|
11
|
+
ml_pipeline_launcher --promote --all # Promote ALL pipelines
|
|
12
|
+
ml_pipeline_launcher --test-promote --all # Test-promote ALL pipelines
|
|
13
|
+
ml_pipeline_launcher --dt --dry-run # Show what would be launched without launching
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import argparse
|
|
17
|
+
import ast
|
|
18
|
+
import random
|
|
19
|
+
import re
|
|
20
|
+
import subprocess
|
|
21
|
+
import time
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def parse_workbench_batch(script_path: Path) -> dict | None:
|
|
26
|
+
"""Parse WORKBENCH_BATCH config from a script file."""
|
|
27
|
+
content = script_path.read_text()
|
|
28
|
+
match = re.search(r"WORKBENCH_BATCH\s*=\s*(\{[^}]+\})", content, re.DOTALL)
|
|
29
|
+
if match:
|
|
30
|
+
try:
|
|
31
|
+
return ast.literal_eval(match.group(1))
|
|
32
|
+
except (ValueError, SyntaxError):
|
|
33
|
+
return None
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def build_dependency_graph(configs: dict[Path, dict]) -> dict[str, str]:
|
|
38
|
+
"""Build a mapping from each output to its root producer.
|
|
39
|
+
|
|
40
|
+
For a chain like A -> B -> C (where B depends on A, C depends on B),
|
|
41
|
+
this returns {A: A, B: A, C: A} so all are in the same message group.
|
|
42
|
+
"""
|
|
43
|
+
# Build output -> input mapping (what does each output depend on?)
|
|
44
|
+
output_to_input = {}
|
|
45
|
+
for config in configs.values():
|
|
46
|
+
if not config:
|
|
47
|
+
continue
|
|
48
|
+
outputs = config.get("outputs", [])
|
|
49
|
+
inputs = config.get("inputs", [])
|
|
50
|
+
for output in outputs:
|
|
51
|
+
output_to_input[output] = inputs[0] if inputs else None
|
|
52
|
+
|
|
53
|
+
# Walk chain to find root
|
|
54
|
+
def find_root(output: str, visited: set = None) -> str:
|
|
55
|
+
if visited is None:
|
|
56
|
+
visited = set()
|
|
57
|
+
if output in visited:
|
|
58
|
+
return output
|
|
59
|
+
visited.add(output)
|
|
60
|
+
parent = output_to_input.get(output)
|
|
61
|
+
if parent is None:
|
|
62
|
+
return output
|
|
63
|
+
return find_root(parent, visited)
|
|
64
|
+
|
|
65
|
+
return {output: find_root(output) for output in output_to_input}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def get_group_id(config: dict | None, root_map: dict[str, str]) -> str | None:
|
|
69
|
+
"""Get the root group_id for a pipeline based on its config and root_map."""
|
|
70
|
+
if not config:
|
|
71
|
+
return None
|
|
72
|
+
outputs = config.get("outputs", [])
|
|
73
|
+
inputs = config.get("inputs", [])
|
|
74
|
+
# Check inputs first (this script depends on something)
|
|
75
|
+
if inputs and inputs[0] in root_map:
|
|
76
|
+
return root_map[inputs[0]]
|
|
77
|
+
# Check outputs (this script produces something)
|
|
78
|
+
if outputs and outputs[0] in root_map:
|
|
79
|
+
return root_map[outputs[0]]
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def sort_by_dependencies(pipelines: list[Path]) -> tuple[list[Path], dict[Path, dict], dict[str, str]]:
|
|
84
|
+
"""Sort pipelines by dependency chains. Returns (sorted_list, configs, root_map)."""
|
|
85
|
+
# Parse all configs
|
|
86
|
+
configs = {}
|
|
87
|
+
for pipeline in pipelines:
|
|
88
|
+
configs[pipeline] = parse_workbench_batch(pipeline)
|
|
89
|
+
|
|
90
|
+
# Build root map for group_id resolution
|
|
91
|
+
root_map = build_dependency_graph(configs)
|
|
92
|
+
|
|
93
|
+
# Build output -> pipeline mapping
|
|
94
|
+
output_to_pipeline = {}
|
|
95
|
+
for pipeline, config in configs.items():
|
|
96
|
+
if config and config.get("outputs"):
|
|
97
|
+
for output in config["outputs"]:
|
|
98
|
+
output_to_pipeline[output] = pipeline
|
|
99
|
+
|
|
100
|
+
# Build chains by walking from root producers
|
|
101
|
+
sorted_pipelines = []
|
|
102
|
+
used = set()
|
|
103
|
+
|
|
104
|
+
for pipeline in sorted(pipelines):
|
|
105
|
+
config = configs.get(pipeline)
|
|
106
|
+
|
|
107
|
+
# Skip if already used or has inputs (not a root)
|
|
108
|
+
if pipeline in used:
|
|
109
|
+
continue
|
|
110
|
+
if config and config.get("inputs"):
|
|
111
|
+
continue
|
|
112
|
+
|
|
113
|
+
# Walk the chain from this root
|
|
114
|
+
chain = [pipeline]
|
|
115
|
+
used.add(pipeline)
|
|
116
|
+
|
|
117
|
+
current = pipeline
|
|
118
|
+
while True:
|
|
119
|
+
current_config = configs.get(current)
|
|
120
|
+
if not current_config or not current_config.get("outputs"):
|
|
121
|
+
break
|
|
122
|
+
|
|
123
|
+
current_output = current_config["outputs"][0]
|
|
124
|
+
# Find pipeline that consumes this output
|
|
125
|
+
next_pipeline = None
|
|
126
|
+
for p, c in configs.items():
|
|
127
|
+
if p in used or p not in pipelines:
|
|
128
|
+
continue
|
|
129
|
+
if c and c.get("inputs") and current_output in c["inputs"]:
|
|
130
|
+
next_pipeline = p
|
|
131
|
+
break
|
|
132
|
+
|
|
133
|
+
if next_pipeline:
|
|
134
|
+
chain.append(next_pipeline)
|
|
135
|
+
used.add(next_pipeline)
|
|
136
|
+
current = next_pipeline
|
|
137
|
+
else:
|
|
138
|
+
break
|
|
139
|
+
|
|
140
|
+
sorted_pipelines.extend(chain)
|
|
141
|
+
|
|
142
|
+
# Add any remaining pipelines not in chains
|
|
143
|
+
for pipeline in sorted(pipelines):
|
|
144
|
+
if pipeline not in used:
|
|
145
|
+
sorted_pipelines.append(pipeline)
|
|
146
|
+
|
|
147
|
+
return sorted_pipelines, configs, root_map
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def format_dependency_chains(pipelines: list[Path], configs: dict[Path, dict]) -> list[str]:
|
|
151
|
+
"""Format pipelines as dependency chains for display."""
|
|
152
|
+
# Build output -> pipeline mapping
|
|
153
|
+
output_to_pipeline = {}
|
|
154
|
+
for pipeline, config in configs.items():
|
|
155
|
+
if config and config.get("outputs"):
|
|
156
|
+
for output in config["outputs"]:
|
|
157
|
+
output_to_pipeline[output] = pipeline
|
|
158
|
+
|
|
159
|
+
# Build chains by walking from root producers
|
|
160
|
+
chains = []
|
|
161
|
+
used = set()
|
|
162
|
+
|
|
163
|
+
for pipeline in pipelines:
|
|
164
|
+
config = configs.get(pipeline)
|
|
165
|
+
|
|
166
|
+
# Skip if already part of a chain or has inputs (not a root)
|
|
167
|
+
if pipeline in used:
|
|
168
|
+
continue
|
|
169
|
+
if config and config.get("inputs"):
|
|
170
|
+
continue
|
|
171
|
+
|
|
172
|
+
# Start a new chain from this root producer (or standalone)
|
|
173
|
+
chain = [pipeline]
|
|
174
|
+
used.add(pipeline)
|
|
175
|
+
|
|
176
|
+
# Walk the chain: find who consumes our output
|
|
177
|
+
current = pipeline
|
|
178
|
+
while True:
|
|
179
|
+
current_config = configs.get(current)
|
|
180
|
+
if not current_config or not current_config.get("outputs"):
|
|
181
|
+
break
|
|
182
|
+
|
|
183
|
+
current_output = current_config["outputs"][0]
|
|
184
|
+
# Find a pipeline that takes this output as input
|
|
185
|
+
next_pipeline = None
|
|
186
|
+
for p, c in configs.items():
|
|
187
|
+
if p in used or p not in pipelines:
|
|
188
|
+
continue
|
|
189
|
+
if c and c.get("inputs") and current_output in c["inputs"]:
|
|
190
|
+
next_pipeline = p
|
|
191
|
+
break
|
|
192
|
+
|
|
193
|
+
if next_pipeline:
|
|
194
|
+
chain.append(next_pipeline)
|
|
195
|
+
used.add(next_pipeline)
|
|
196
|
+
current = next_pipeline
|
|
197
|
+
else:
|
|
198
|
+
break
|
|
199
|
+
|
|
200
|
+
chains.append(chain)
|
|
201
|
+
|
|
202
|
+
# Add any remaining pipelines not in chains (shouldn't happen but just in case)
|
|
203
|
+
for pipeline in pipelines:
|
|
204
|
+
if pipeline not in used:
|
|
205
|
+
chains.append([pipeline])
|
|
206
|
+
|
|
207
|
+
# Format chains as strings
|
|
208
|
+
lines = []
|
|
209
|
+
for chain in chains:
|
|
210
|
+
names = [p.stem for p in chain]
|
|
211
|
+
lines.append(" " + " --> ".join(names))
|
|
212
|
+
|
|
213
|
+
return lines
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def get_all_pipelines() -> list[Path]:
|
|
217
|
+
"""Get all ML pipeline scripts from subdirectories of current working directory."""
|
|
218
|
+
cwd = Path.cwd()
|
|
219
|
+
# Find all .py files in subdirectories (not in cwd itself)
|
|
220
|
+
pipelines = []
|
|
221
|
+
for subdir in cwd.iterdir():
|
|
222
|
+
if subdir.is_dir():
|
|
223
|
+
pipelines.extend(subdir.rglob("*.py"))
|
|
224
|
+
return pipelines
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def get_pipeline_groups(pipelines: list[Path]) -> dict[Path, list[Path]]:
|
|
228
|
+
"""Group pipelines by their parent directory (leaf directories)."""
|
|
229
|
+
groups = {}
|
|
230
|
+
for pipeline in pipelines:
|
|
231
|
+
parent = pipeline.parent
|
|
232
|
+
groups.setdefault(parent, []).append(pipeline)
|
|
233
|
+
return groups
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def select_random_groups(pipelines: list[Path], num_groups: int) -> list[Path]:
|
|
237
|
+
"""Select pipelines from n random leaf directories."""
|
|
238
|
+
groups = get_pipeline_groups(pipelines)
|
|
239
|
+
if not groups:
|
|
240
|
+
return []
|
|
241
|
+
|
|
242
|
+
# Select up to num_groups random directories
|
|
243
|
+
dirs = list(groups.keys())
|
|
244
|
+
selected_dirs = random.sample(dirs, min(num_groups, len(dirs)))
|
|
245
|
+
|
|
246
|
+
# Return all pipelines from those directories
|
|
247
|
+
selected = []
|
|
248
|
+
for d in selected_dirs:
|
|
249
|
+
selected.extend(groups[d])
|
|
250
|
+
return selected
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def filter_pipelines_by_patterns(pipelines: list[Path], patterns: list[str]) -> list[Path]:
|
|
254
|
+
"""Filter pipelines by substring patterns matching the basename."""
|
|
255
|
+
if not patterns:
|
|
256
|
+
return pipelines
|
|
257
|
+
|
|
258
|
+
matched = []
|
|
259
|
+
for pipeline in pipelines:
|
|
260
|
+
basename = pipeline.stem.lower()
|
|
261
|
+
if any(pattern.lower() in basename for pattern in patterns):
|
|
262
|
+
matched.append(pipeline)
|
|
263
|
+
return matched
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def main():
|
|
267
|
+
parser = argparse.ArgumentParser(description="Launch ML pipelines via SQS for testing")
|
|
268
|
+
parser.add_argument(
|
|
269
|
+
"patterns",
|
|
270
|
+
nargs="*",
|
|
271
|
+
help="Substring patterns to filter pipelines by basename (e.g., 'caco2' 'ppb')",
|
|
272
|
+
)
|
|
273
|
+
parser.add_argument(
|
|
274
|
+
"-n",
|
|
275
|
+
"--num-groups",
|
|
276
|
+
type=int,
|
|
277
|
+
default=1,
|
|
278
|
+
help="Number of random pipeline groups to launch (default: 1, ignored if --all or patterns specified)",
|
|
279
|
+
)
|
|
280
|
+
parser.add_argument(
|
|
281
|
+
"--all",
|
|
282
|
+
action="store_true",
|
|
283
|
+
help="Launch ALL pipelines (ignores -n)",
|
|
284
|
+
)
|
|
285
|
+
parser.add_argument(
|
|
286
|
+
"--realtime",
|
|
287
|
+
action="store_true",
|
|
288
|
+
help="Create realtime endpoints (default is serverless)",
|
|
289
|
+
)
|
|
290
|
+
parser.add_argument(
|
|
291
|
+
"--dry-run",
|
|
292
|
+
action="store_true",
|
|
293
|
+
help="Show what would be launched without actually launching",
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
# Mode flags (mutually exclusive)
|
|
297
|
+
mode_group = parser.add_mutually_exclusive_group(required=True)
|
|
298
|
+
mode_group.add_argument(
|
|
299
|
+
"--dt",
|
|
300
|
+
action="store_true",
|
|
301
|
+
help="Launch with DT=True (dynamic training mode)",
|
|
302
|
+
)
|
|
303
|
+
mode_group.add_argument(
|
|
304
|
+
"--promote",
|
|
305
|
+
action="store_true",
|
|
306
|
+
help="Launch with PROMOTE=True (promotion mode)",
|
|
307
|
+
)
|
|
308
|
+
mode_group.add_argument(
|
|
309
|
+
"--test-promote",
|
|
310
|
+
action="store_true",
|
|
311
|
+
help="Launch with TEST_PROMOTE=True (test promotion mode)",
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
args = parser.parse_args()
|
|
315
|
+
|
|
316
|
+
# Get all pipelines from subdirectories of current working directory
|
|
317
|
+
all_pipelines = get_all_pipelines()
|
|
318
|
+
if not all_pipelines:
|
|
319
|
+
print(f"No pipeline scripts found in subdirectories of {Path.cwd()}")
|
|
320
|
+
exit(1)
|
|
321
|
+
|
|
322
|
+
# Determine which pipelines to run
|
|
323
|
+
if args.patterns:
|
|
324
|
+
# Filter by patterns
|
|
325
|
+
selected_pipelines = filter_pipelines_by_patterns(all_pipelines, args.patterns)
|
|
326
|
+
if not selected_pipelines:
|
|
327
|
+
print(f"No pipelines matching patterns: {args.patterns}")
|
|
328
|
+
exit(1)
|
|
329
|
+
selection_mode = f"matching {args.patterns}"
|
|
330
|
+
elif args.all:
|
|
331
|
+
# Run all pipelines
|
|
332
|
+
selected_pipelines = all_pipelines
|
|
333
|
+
selection_mode = "ALL"
|
|
334
|
+
else:
|
|
335
|
+
# Random group selection
|
|
336
|
+
selected_pipelines = select_random_groups(all_pipelines, args.num_groups)
|
|
337
|
+
if not selected_pipelines:
|
|
338
|
+
print("No pipeline groups found")
|
|
339
|
+
exit(1)
|
|
340
|
+
# Get the directory names for display
|
|
341
|
+
groups = get_pipeline_groups(selected_pipelines)
|
|
342
|
+
group_names = [d.name for d in groups.keys()]
|
|
343
|
+
selection_mode = f"RANDOM {args.num_groups} group(s): {group_names}"
|
|
344
|
+
|
|
345
|
+
# Sort by dependencies (producers before consumers)
|
|
346
|
+
selected_pipelines, configs, root_map = sort_by_dependencies(selected_pipelines)
|
|
347
|
+
|
|
348
|
+
# Determine mode for display and CLI flag
|
|
349
|
+
if args.dt:
|
|
350
|
+
mode_name = "DT (Dynamic Training)"
|
|
351
|
+
mode_flag = "--dt"
|
|
352
|
+
elif args.promote:
|
|
353
|
+
mode_name = "PROMOTE"
|
|
354
|
+
mode_flag = "--promote"
|
|
355
|
+
else:
|
|
356
|
+
mode_name = "TEST_PROMOTE"
|
|
357
|
+
mode_flag = "--test-promote"
|
|
358
|
+
|
|
359
|
+
print(f"\n{'=' * 60}")
|
|
360
|
+
print(f"{'DRY RUN - ' if args.dry_run else ''}LAUNCHING {len(selected_pipelines)} PIPELINES")
|
|
361
|
+
print(f"{'=' * 60}")
|
|
362
|
+
print(f"Source: {Path.cwd()}")
|
|
363
|
+
print(f"Selection: {selection_mode}")
|
|
364
|
+
print(f"Mode: {mode_name}")
|
|
365
|
+
print(f"Endpoint: {'Realtime' if args.realtime else 'Serverless'}")
|
|
366
|
+
print("\nPipeline Chains:")
|
|
367
|
+
for line in format_dependency_chains(selected_pipelines, configs):
|
|
368
|
+
print(line)
|
|
369
|
+
print()
|
|
370
|
+
|
|
371
|
+
# Dry run - just show what would be launched
|
|
372
|
+
if args.dry_run:
|
|
373
|
+
print("Dry run complete. No pipelines were launched.\n")
|
|
374
|
+
return
|
|
375
|
+
|
|
376
|
+
# Countdown before launching
|
|
377
|
+
print("Launching in ", end="", flush=True)
|
|
378
|
+
for i in range(10, 0, -1):
|
|
379
|
+
print(f"{i}...", end="", flush=True)
|
|
380
|
+
time.sleep(1)
|
|
381
|
+
print(" GO!\n")
|
|
382
|
+
|
|
383
|
+
# Launch each pipeline using the CLI
|
|
384
|
+
for i, pipeline in enumerate(selected_pipelines, 1):
|
|
385
|
+
print(f"\n{'─' * 60}")
|
|
386
|
+
print(f"Launching pipeline {i}/{len(selected_pipelines)}: {pipeline.name}")
|
|
387
|
+
print(f"{'─' * 60}")
|
|
388
|
+
|
|
389
|
+
# Build the command
|
|
390
|
+
cmd = ["ml_pipeline_sqs", str(pipeline), mode_flag]
|
|
391
|
+
if args.realtime:
|
|
392
|
+
cmd.append("--realtime")
|
|
393
|
+
|
|
394
|
+
# Pass root group_id for dependency chain ordering
|
|
395
|
+
group_id = get_group_id(configs.get(pipeline), root_map)
|
|
396
|
+
if group_id:
|
|
397
|
+
cmd.extend(["--group-id", group_id])
|
|
398
|
+
|
|
399
|
+
print(f"Running: {' '.join(cmd)}\n")
|
|
400
|
+
result = subprocess.run(cmd)
|
|
401
|
+
if result.returncode != 0:
|
|
402
|
+
print(f"Failed to launch {pipeline.name} (exit code: {result.returncode})")
|
|
403
|
+
|
|
404
|
+
print(f"\n{'=' * 60}")
|
|
405
|
+
print(f"FINISHED LAUNCHING {len(selected_pipelines)} PIPELINES")
|
|
406
|
+
print(f"{'=' * 60}\n")
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
if __name__ == "__main__":
|
|
410
|
+
main()
|
|
@@ -71,6 +71,8 @@ def submit_to_sqs(
|
|
|
71
71
|
realtime: bool = False,
|
|
72
72
|
dt: bool = False,
|
|
73
73
|
promote: bool = False,
|
|
74
|
+
test_promote: bool = False,
|
|
75
|
+
group_id: str | None = None,
|
|
74
76
|
) -> None:
|
|
75
77
|
"""
|
|
76
78
|
Upload script to S3 and submit message to SQS queue for processing.
|
|
@@ -81,6 +83,8 @@ def submit_to_sqs(
|
|
|
81
83
|
realtime: If True, sets serverless=False for real-time processing (default: False)
|
|
82
84
|
dt: If True, sets DT=True in environment (default: False)
|
|
83
85
|
promote: If True, sets PROMOTE=True in environment (default: False)
|
|
86
|
+
test_promote: If True, sets TEST_PROMOTE=True in environment (default: False)
|
|
87
|
+
group_id: Optional MessageGroupId override for dependency chains (default: derived from script)
|
|
84
88
|
|
|
85
89
|
Raises:
|
|
86
90
|
ValueError: If size is invalid or script file not found
|
|
@@ -99,7 +103,8 @@ def submit_to_sqs(
|
|
|
99
103
|
# Read script content and parse WORKBENCH_BATCH config
|
|
100
104
|
script_content = script_file.read_text()
|
|
101
105
|
batch_config = parse_workbench_batch(script_content)
|
|
102
|
-
group_id
|
|
106
|
+
if group_id is None:
|
|
107
|
+
group_id = get_message_group_id(batch_config)
|
|
103
108
|
outputs = (batch_config or {}).get("outputs", [])
|
|
104
109
|
inputs = (batch_config or {}).get("inputs", [])
|
|
105
110
|
|
|
@@ -108,6 +113,7 @@ def submit_to_sqs(
|
|
|
108
113
|
print(f"⚡ Mode: {'Real-time' if realtime else 'Serverless'} (serverless={'False' if realtime else 'True'})")
|
|
109
114
|
print(f"🔄 DynamicTraining: {dt}")
|
|
110
115
|
print(f"🆕 Promote: {promote}")
|
|
116
|
+
print(f"🧪 Test Promote: {test_promote}")
|
|
111
117
|
print(f"🪣 Bucket: {workbench_bucket}")
|
|
112
118
|
if outputs:
|
|
113
119
|
print(f"📤 Outputs: {outputs}")
|
|
@@ -174,6 +180,7 @@ def submit_to_sqs(
|
|
|
174
180
|
"SERVERLESS": "False" if realtime else "True",
|
|
175
181
|
"DT": str(dt),
|
|
176
182
|
"PROMOTE": str(promote),
|
|
183
|
+
"TEST_PROMOTE": str(test_promote),
|
|
177
184
|
}
|
|
178
185
|
|
|
179
186
|
# Send the message to SQS
|
|
@@ -200,6 +207,7 @@ def submit_to_sqs(
|
|
|
200
207
|
print(f"⚡ Mode: {'Real-time' if realtime else 'Serverless'} (SERVERLESS={'False' if realtime else 'True'})")
|
|
201
208
|
print(f"🔄 DynamicTraining: {dt}")
|
|
202
209
|
print(f"🆕 Promote: {promote}")
|
|
210
|
+
print(f"🧪 Test Promote: {test_promote}")
|
|
203
211
|
if outputs:
|
|
204
212
|
print(f"📤 Outputs: {outputs}")
|
|
205
213
|
if inputs:
|
|
@@ -234,7 +242,17 @@ def main():
|
|
|
234
242
|
parser.add_argument(
|
|
235
243
|
"--promote",
|
|
236
244
|
action="store_true",
|
|
237
|
-
help="Set Promote=True (models and endpoints will use promoted naming",
|
|
245
|
+
help="Set Promote=True (models and endpoints will use promoted naming)",
|
|
246
|
+
)
|
|
247
|
+
parser.add_argument(
|
|
248
|
+
"--test-promote",
|
|
249
|
+
action="store_true",
|
|
250
|
+
help="Set TEST_PROMOTE=True (creates test endpoint with '-test' suffix)",
|
|
251
|
+
)
|
|
252
|
+
parser.add_argument(
|
|
253
|
+
"--group-id",
|
|
254
|
+
default=None,
|
|
255
|
+
help="Override MessageGroupId for SQS (used for dependency chain ordering)",
|
|
238
256
|
)
|
|
239
257
|
args = parser.parse_args()
|
|
240
258
|
try:
|
|
@@ -244,6 +262,8 @@ def main():
|
|
|
244
262
|
realtime=args.realtime,
|
|
245
263
|
dt=args.dt,
|
|
246
264
|
promote=args.promote,
|
|
265
|
+
test_promote=args.test_promote,
|
|
266
|
+
group_id=args.group_id,
|
|
247
267
|
)
|
|
248
268
|
except Exception as e:
|
|
249
269
|
print(f"\n❌ ERROR: {e}")
|
|
@@ -96,18 +96,11 @@ class AGTable(PluginInterface):
|
|
|
96
96
|
|
|
97
97
|
if __name__ == "__main__":
|
|
98
98
|
# Run the Unit Test for the Plugin
|
|
99
|
+
from workbench.api import Meta
|
|
99
100
|
from workbench.web_interface.components.plugin_unit_test import PluginUnitTest
|
|
100
101
|
|
|
101
|
-
# Test data
|
|
102
|
-
|
|
103
|
-
"ID": [f"id_{i}" for i in range(10)],
|
|
104
|
-
"feat1": [1.0, 1.0, 1.1, 3.0, 4.0, 1.0, 1.0, 1.1, 3.0, 4.0],
|
|
105
|
-
"feat2": [1.0, 1.0, 1.1, 3.0, 4.0, 1.0, 1.0, 1.1, 3.0, 4.0],
|
|
106
|
-
"feat3": [0.1, 0.15, 0.2, 0.9, 2.8, 0.25, 0.35, 0.4, 1.6, 2.5],
|
|
107
|
-
"price": [31, 60, 62, 40, 20, 31, 61, 60, 40, 20],
|
|
108
|
-
"name": ["A", "B", "C", "D", "E", "F", "G", "H", "I", "Z" * 55],
|
|
109
|
-
}
|
|
110
|
-
test_df = pd.DataFrame(data)
|
|
102
|
+
# Test on model data
|
|
103
|
+
models_df = Meta().models(details=True)
|
|
111
104
|
|
|
112
105
|
# Run the Unit Test on the Plugin
|
|
113
|
-
PluginUnitTest(AGTable, theme="dark", input_data=
|
|
106
|
+
PluginUnitTest(AGTable, theme="dark", input_data=models_df, max_height=500).run()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: workbench
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.243
|
|
4
4
|
Summary: Workbench: A Dashboard and Python API for creating and deploying AWS SageMaker Model Pipelines
|
|
5
5
|
Author-email: SuperCowPowers LLC <support@supercowpowers.com>
|
|
6
6
|
License: MIT License
|
|
@@ -40,7 +40,7 @@ Requires-Dist: boto3>=1.31.76
|
|
|
40
40
|
Requires-Dist: botocore>=1.31.76
|
|
41
41
|
Requires-Dist: redis>=5.0.1
|
|
42
42
|
Requires-Dist: numpy>=1.26.4
|
|
43
|
-
Requires-Dist: pandas
|
|
43
|
+
Requires-Dist: pandas<3.0,>=2.2.1
|
|
44
44
|
Requires-Dist: awswrangler>=3.4.0
|
|
45
45
|
Requires-Dist: sagemaker<3.0,>=2.143
|
|
46
46
|
Requires-Dist: cryptography>=44.0.2
|
|
@@ -40,7 +40,7 @@ workbench/api/feature_set.py,sha256=-21ztp7JDqs7CKF3KtNdPoXppkiDqfb4JVK8xBK9rIY,
|
|
|
40
40
|
workbench/api/graph_store.py,sha256=LremJyPrQFgsHb7hxsctuCsoxx3p7TKtaY5qALHe6pc,4372
|
|
41
41
|
workbench/api/meta.py,sha256=1JxCpLn4JENiWUJaVjGgDL7WqhIy-s1swUbBzprI-uY,8595
|
|
42
42
|
workbench/api/meta_model.py,sha256=2DpjjBSw60QPMWQ2sTu2492PrFWFMXK8hH9U13gXzi8,11226
|
|
43
|
-
workbench/api/model.py,sha256=
|
|
43
|
+
workbench/api/model.py,sha256=h3TAlKT8X7q6tW6Q134ZTnr1I9au-2d72p1QovlIfd4,5507
|
|
44
44
|
workbench/api/monitor.py,sha256=Cez89Uac7Tzt47FxkjoX-YDGccEhvBcxw3sZFtw4ud8,4506
|
|
45
45
|
workbench/api/parameter_store.py,sha256=_3MmPxKiVy7_OIgCSRlUv9xbk8nuiOWiCtZgT-AxN1k,2574
|
|
46
46
|
workbench/api/pipeline.py,sha256=MSYGrDSXrRB_oQELtAlOwBfxSBTw3REAkHy5XBHau0Y,6261
|
|
@@ -54,13 +54,13 @@ workbench/cached/cached_pipeline.py,sha256=QOVnEKu5RbIdlNpJUi-0Ebh0_-C68RigSPwKh
|
|
|
54
54
|
workbench/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
55
|
workbench/core/artifacts/__init__.py,sha256=ukcgbYlI9m99bzwaBNO01K1h0-cQkzsbh_jT_GyQ-LY,1034
|
|
56
56
|
workbench/core/artifacts/artifact.py,sha256=scWUbX2Sk1rxT8VEm_Z7YTxbOzkDASNyqqXB56xLZ2w,17721
|
|
57
|
-
workbench/core/artifacts/athena_source.py,sha256=
|
|
57
|
+
workbench/core/artifacts/athena_source.py,sha256=w65c3fjbs1dlosoogj-fo0-DznXUifbfM2bsE-QUzRY,26195
|
|
58
58
|
workbench/core/artifacts/cached_artifact_mixin.py,sha256=ngqFLZ4cQx_TFouXZgXZQsv_7W6XCvxVGXXSfzzaft8,3775
|
|
59
59
|
workbench/core/artifacts/data_capture_core.py,sha256=q8f79rRTYiZ7T4IQRWXl8ZvPpcvZyNxYERwvo8o0OQc,14858
|
|
60
60
|
workbench/core/artifacts/data_source_abstract.py,sha256=5IRCzFVK-17cd4NXPMRfx99vQAmQ0WHE5jcm5RfsVTg,10619
|
|
61
61
|
workbench/core/artifacts/data_source_factory.py,sha256=YL_tA5fsgubbB3dPF6T4tO0rGgz-6oo3ge4i_YXVC-M,2380
|
|
62
62
|
workbench/core/artifacts/df_store_core.py,sha256=AueNr_JvuLLu_ByE7cb3u-isH9u0Q7cMP-UCgCX-Ctg,3536
|
|
63
|
-
workbench/core/artifacts/endpoint_core.py,sha256=
|
|
63
|
+
workbench/core/artifacts/endpoint_core.py,sha256=hEjMXag9nsLGu_eOLxPSSqcu1aP_1Vo9DHIGemLutvc,55695
|
|
64
64
|
workbench/core/artifacts/feature_set_core.py,sha256=IjSUpxpj2S611uo5LmnOK-aH3CZhfbC5ztC02PQ5gqE,42128
|
|
65
65
|
workbench/core/artifacts/model_core.py,sha256=wPkpdRlxnAXMqsDtJGPotGFO146Hm7NCfYbImHwZo9c,52343
|
|
66
66
|
workbench/core/artifacts/monitor_core.py,sha256=M307yz7tEzOEHgv-LmtVy9jKjSbM98fHW3ckmNYrwlU,27897
|
|
@@ -69,7 +69,7 @@ workbench/core/cloud_platform/cloud_meta.py,sha256=QFEsGfqhaCkw9Jl4PRln-xRaHnt-e
|
|
|
69
69
|
workbench/core/cloud_platform/aws/README.md,sha256=QT5IQXoUHbIA0qQ2wO6_2P2lYjYQFVYuezc22mWY4i8,97
|
|
70
70
|
workbench/core/cloud_platform/aws/aws_account_clamp.py,sha256=V5iVsoGvSRilARtTdExnt27QptzAcJaW0s3nm2B8-ow,8286
|
|
71
71
|
workbench/core/cloud_platform/aws/aws_graph_store.py,sha256=ytYxQTplUmeWbsPmxyZbf6mO9qyTl60ewlJG8MyfyEY,9414
|
|
72
|
-
workbench/core/cloud_platform/aws/aws_meta.py,sha256=
|
|
72
|
+
workbench/core/cloud_platform/aws/aws_meta.py,sha256=BwvQbEJVBW5b3wnw25ndFm33QptgoSMpeFh1Zc9vfmw,34853
|
|
73
73
|
workbench/core/cloud_platform/aws/aws_secrets_manager.py,sha256=TUnddp1gX-OwxJ_oO5ONh7OI4Z2HC_6euGkJ-himCCk,8615
|
|
74
74
|
workbench/core/cloud_platform/aws/aws_session.py,sha256=2Gc_k4Q87BBeQDgXgVR-w-qmsF6ncZR8wvTeNnixM6k,6926
|
|
75
75
|
workbench/core/cloud_platform/aws/cache_dataframe.py,sha256=VnObkVqcjg7v4fegrIkXR1j-K2AHTBpSAoriUXDe12A,2314
|
|
@@ -107,9 +107,9 @@ workbench/core/transforms/features_to_features/__init__.py,sha256=47DEQpj8HBSa-_
|
|
|
107
107
|
workbench/core/transforms/features_to_features/heavy/emr/Readme.md,sha256=YtQgCEQeKe0CQXQkhzMTYq9xOtCsCYb5P5LW2BmRKWQ,68
|
|
108
108
|
workbench/core/transforms/features_to_features/heavy/glue/Readme.md,sha256=TuyCatWfoDr99zUwvOcxf-TqMkQzaMqXlj5nmFcRzfo,48
|
|
109
109
|
workbench/core/transforms/features_to_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
110
|
-
workbench/core/transforms/features_to_model/features_to_model.py,sha256=
|
|
110
|
+
workbench/core/transforms/features_to_model/features_to_model.py,sha256=pYGdfnp-6xh79kxQ5iXySi7oYcaRuQ-xLDo1rFgDB7g,20876
|
|
111
111
|
workbench/core/transforms/model_to_endpoint/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
112
|
-
workbench/core/transforms/model_to_endpoint/model_to_endpoint.py,sha256=
|
|
112
|
+
workbench/core/transforms/model_to_endpoint/model_to_endpoint.py,sha256=I44_ziQ0IegudLQ_qJ-XNfWZInDkXWI9LsE-1o9855w,8823
|
|
113
113
|
workbench/core/transforms/pandas_transforms/__init__.py,sha256=xL4MT8-fZ1SFqDbTLc8XyxjupHtB1YR6Ej0AC2nwd7I,894
|
|
114
114
|
workbench/core/transforms/pandas_transforms/data_to_pandas.py,sha256=sJHPeuNF8Q8aQqgRnkdWkyvur5cbggdUVIwR-xF3Dlo,3621
|
|
115
115
|
workbench/core/transforms/pandas_transforms/features_to_pandas.py,sha256=af6xdPt2V4zhh-SzQa_UYxdmNMzMLXbrbsznV5QoIJg,3441
|
|
@@ -128,11 +128,11 @@ workbench/core/views/view.py,sha256=DvmEA1xdvL980GET_cnbmHzqSy6IhlNaZcoQnVTtYis,
|
|
|
128
128
|
workbench/core/views/view_utils.py,sha256=CwOlpqXpumCr6REi-ey7Qjz5_tpg-s4oWHmlOVu8POQ,12270
|
|
129
129
|
workbench/core/views/storage/mdq_view.py,sha256=qf_ep1KwaXOIfO930laEwNIiCYP7VNOqjE3VdHfopRE,5195
|
|
130
130
|
workbench/model_script_utils/model_script_utils.py,sha256=aM3ZaJxyMy7smokIF83fXUx3YSzLs8BNNMLfJDCoe8I,21231
|
|
131
|
-
workbench/model_script_utils/pytorch_utils.py,sha256=
|
|
131
|
+
workbench/model_script_utils/pytorch_utils.py,sha256=kQCTRqdbszlurMrzyflyOo2amDJYx3Pni1rRhGHWXm4,13738
|
|
132
132
|
workbench/model_script_utils/uq_harness.py,sha256=Qv5UQdjn72Ssa3NWGGsnSB_wDp0au2TXVauFK81Ebr0,11498
|
|
133
133
|
workbench/model_scripts/script_generation.py,sha256=Sv0OJdASNKk1KXr8goiZWUL5W7i8G8gBb_R_OTb8caI,8257
|
|
134
|
-
workbench/model_scripts/chemprop/chemprop.template,sha256=
|
|
135
|
-
workbench/model_scripts/chemprop/generated_model_script.py,sha256=
|
|
134
|
+
workbench/model_scripts/chemprop/chemprop.template,sha256=otuR2Ee-GogsNo4z1MlefXY9G--ZOTgg4rFc_5NXivw,36941
|
|
135
|
+
workbench/model_scripts/chemprop/generated_model_script.py,sha256=6duTkJUH1eRrsGHAZN1DWRKR74K5tsXKcQPrWd3vjxQ,36724
|
|
136
136
|
workbench/model_scripts/chemprop/model_script_utils.py,sha256=aM3ZaJxyMy7smokIF83fXUx3YSzLs8BNNMLfJDCoe8I,21231
|
|
137
137
|
workbench/model_scripts/chemprop/requirements.txt,sha256=2IBHZZNYqhX9Ed7AmRVgN06tO3EHeBbN2EM8-tjWZhs,216
|
|
138
138
|
workbench/model_scripts/custom_models/chem_info/Readme.md,sha256=mH1lxJ4Pb7F5nBnVXaiuxpi8zS_yjUw_LBJepVKXhlA,574
|
|
@@ -160,10 +160,10 @@ workbench/model_scripts/ensemble_xgb/ensemble_xgb.template,sha256=lMEx0IkawcpTI5
|
|
|
160
160
|
workbench/model_scripts/ensemble_xgb/requirements.txt,sha256=jWlGc7HH7vqyukTm38LN4EyDi8jDUPEay4n45z-30uc,104
|
|
161
161
|
workbench/model_scripts/meta_model/generated_model_script.py,sha256=ncPrHd9-R8l_98vAiuTUJ92C9PKpEgAtpIrmd7TuqSQ,8341
|
|
162
162
|
workbench/model_scripts/meta_model/meta_model.template,sha256=viz-AKVq3YRwOUBt8-rUO1TwdEPFzyP7nnifqcIJurw,8244
|
|
163
|
-
workbench/model_scripts/pytorch_model/generated_model_script.py,sha256=
|
|
163
|
+
workbench/model_scripts/pytorch_model/generated_model_script.py,sha256=1B4RortOxbB7feTrr5Kf9qUqdqG4Qc1a6evdNUYLSNg,27011
|
|
164
164
|
workbench/model_scripts/pytorch_model/model_script_utils.py,sha256=aM3ZaJxyMy7smokIF83fXUx3YSzLs8BNNMLfJDCoe8I,21231
|
|
165
|
-
workbench/model_scripts/pytorch_model/pytorch.template,sha256=
|
|
166
|
-
workbench/model_scripts/pytorch_model/pytorch_utils.py,sha256=
|
|
165
|
+
workbench/model_scripts/pytorch_model/pytorch.template,sha256=78TBsT1NoPkVL-cINZMjA1SE91abUgKtNqedOCvS7lU,22766
|
|
166
|
+
workbench/model_scripts/pytorch_model/pytorch_utils.py,sha256=kQCTRqdbszlurMrzyflyOo2amDJYx3Pni1rRhGHWXm4,13738
|
|
167
167
|
workbench/model_scripts/pytorch_model/requirements.txt,sha256=ES7YehHEL4E5oV8FScHm3oNQmkMI4ODgbC1fSbaY7T4,183
|
|
168
168
|
workbench/model_scripts/pytorch_model/uq_harness.py,sha256=Qv5UQdjn72Ssa3NWGGsnSB_wDp0au2TXVauFK81Ebr0,11498
|
|
169
169
|
workbench/model_scripts/scikit_learn/generated_model_script.py,sha256=xhQIglpAgPRCH9iwI3wI0N0V6p9AgqW0mVOMuSXzUCk,17187
|
|
@@ -184,8 +184,9 @@ workbench/scripts/endpoint_test.py,sha256=RV52DZZTOD_ou-ywZjaxQ2_wqnSJqvlnHQZbvf
|
|
|
184
184
|
workbench/scripts/glue_launcher.py,sha256=bIKQvfGxpAhzbeNvTnHfRW_5kQhY-169_868ZnCejJk,10692
|
|
185
185
|
workbench/scripts/lambda_test.py,sha256=SLAPIXeGQn82neQ6-Hif3VS3LWLwT0-dGw8yWw2aXRQ,2077
|
|
186
186
|
workbench/scripts/meta_model_sim.py,sha256=6iGpInA-nH6DSjk0z63fcoL8P7icqnZmKLE5Sqyrh7E,1026
|
|
187
|
-
workbench/scripts/ml_pipeline_batch.py,sha256=
|
|
188
|
-
workbench/scripts/
|
|
187
|
+
workbench/scripts/ml_pipeline_batch.py,sha256=EbtOMtXIzvE07cLw4xV3nDM5NL_bYskO_kWySdegCjw,6567
|
|
188
|
+
workbench/scripts/ml_pipeline_launcher.py,sha256=Xxg5m5Q0Ji4tiIp3Vo4JdNNuQ3n1L0Dx19Hrzb-vqBc,13801
|
|
189
|
+
workbench/scripts/ml_pipeline_sqs.py,sha256=YFbc-tLvRFAnj4ABlpnGUTf5sz1MxriHIP-n4dGLitM,9537
|
|
189
190
|
workbench/scripts/monitor_cloud_watch.py,sha256=s7MY4bsHts0nup9G0lWESCvgJZ9Mw1Eo-c8aKRgLjMw,9235
|
|
190
191
|
workbench/scripts/redis_expire.py,sha256=DxI_RKSNlrW2BsJZXcsSbaWGBgPZdPhtzHjV9SUtElE,1120
|
|
191
192
|
workbench/scripts/redis_report.py,sha256=iaJSuGPyLCs6e0TMcZDoT0YyJ43xJ1u74YD8FLnnUg4,990
|
|
@@ -281,7 +282,7 @@ workbench/web_interface/components/settings_menu.py,sha256=HdMhi0Lm7s6U9c7qzQJdU
|
|
|
281
282
|
workbench/web_interface/components/violin_plots.py,sha256=3_T85hIs_R_WZpfFkSrqY2eYXmYzWsywDqsLhB7W1RQ,5320
|
|
282
283
|
workbench/web_interface/components/experiments/dashboard_metric_plots.py,sha256=DPIw13tO9XOGxA6IeRPLgl-C3XUJ2N287JkSEg73Rjg,2984
|
|
283
284
|
workbench/web_interface/components/experiments/outlier_plot.py,sha256=5yGVnVScM0TR80OjPypx_83Ksg7r5HDR3hGjpT4Ub14,3646
|
|
284
|
-
workbench/web_interface/components/plugins/ag_table.py,sha256=
|
|
285
|
+
workbench/web_interface/components/plugins/ag_table.py,sha256=mIfpeBn0-zrPj0hmlSTSuo17Od2OSRnIHwFPNdfuTMA,3573
|
|
285
286
|
workbench/web_interface/components/plugins/confusion_matrix.py,sha256=gkmbAOWsZRVBoPQSav-aglDtw0Nt54YcDya9Z4OG0Vc,7387
|
|
286
287
|
workbench/web_interface/components/plugins/dashboard_status.py,sha256=4plmoiXj3dDjoQerUNpep_jfk50pI9rHvcoSP20UbE8,5832
|
|
287
288
|
workbench/web_interface/components/plugins/data_details.py,sha256=pZm1AbM_0EXQwx77qUkfyrU9MedAs4Wlkp6iOtSrUtI,11104
|
|
@@ -304,9 +305,9 @@ workbench/web_interface/page_views/main_page.py,sha256=DyChwOGX_KtbJ09pw2Iswofba
|
|
|
304
305
|
workbench/web_interface/page_views/models_page_view.py,sha256=M0bdC7bAzLyIaE2jviY12FF4abdMFZmg6sFuOY_LaGI,2650
|
|
305
306
|
workbench/web_interface/page_views/page_view.py,sha256=Gh6YnpOGlUejx-bHZAf5pzqoQ1H1R0OSwOpGhOBO06w,455
|
|
306
307
|
workbench/web_interface/page_views/pipelines_page_view.py,sha256=v2pxrIbsHBcYiblfius3JK766NZ7ciD2yPx0t3E5IJo,2656
|
|
307
|
-
workbench-0.8.
|
|
308
|
-
workbench-0.8.
|
|
309
|
-
workbench-0.8.
|
|
310
|
-
workbench-0.8.
|
|
311
|
-
workbench-0.8.
|
|
312
|
-
workbench-0.8.
|
|
308
|
+
workbench-0.8.243.dist-info/licenses/LICENSE,sha256=RTBoTMeEwTgEhS-n8vgQ-VUo5qig0PWVd8xFPKU6Lck,1080
|
|
309
|
+
workbench-0.8.243.dist-info/METADATA,sha256=mWeiFAV-J1TZKygpTEymX4gPhl2dFfscZKPOFitOMFo,10038
|
|
310
|
+
workbench-0.8.243.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
311
|
+
workbench-0.8.243.dist-info/entry_points.txt,sha256=Stivs_FFse2pHLXfWNpyh649z0bj7Ks5laQy8LuexCA,633
|
|
312
|
+
workbench-0.8.243.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
|
|
313
|
+
workbench-0.8.243.dist-info/RECORD,,
|
|
@@ -5,6 +5,7 @@ glue_launcher = workbench.scripts.glue_launcher:main
|
|
|
5
5
|
lambda_test = workbench.scripts.lambda_test:main
|
|
6
6
|
meta_model_sim = workbench.scripts.meta_model_sim:main
|
|
7
7
|
ml_pipeline_batch = workbench.scripts.ml_pipeline_batch:main
|
|
8
|
+
ml_pipeline_launcher = workbench.scripts.ml_pipeline_launcher:main
|
|
8
9
|
ml_pipeline_sqs = workbench.scripts.ml_pipeline_sqs:main
|
|
9
10
|
training_test = workbench.scripts.training_test:main
|
|
10
11
|
workbench = workbench.repl.workbench_shell:launch_shell
|
|
File without changes
|
|
File without changes
|