workbench 0.8.162__py3-none-any.whl → 0.8.220__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of workbench might be problematic. Click here for more details.
- workbench/algorithms/dataframe/__init__.py +1 -2
- workbench/algorithms/dataframe/compound_dataset_overlap.py +321 -0
- workbench/algorithms/dataframe/feature_space_proximity.py +168 -75
- workbench/algorithms/dataframe/fingerprint_proximity.py +422 -86
- workbench/algorithms/dataframe/projection_2d.py +44 -21
- workbench/algorithms/dataframe/proximity.py +259 -305
- workbench/algorithms/graph/light/proximity_graph.py +14 -12
- workbench/algorithms/models/cleanlab_model.py +382 -0
- workbench/algorithms/models/noise_model.py +388 -0
- workbench/algorithms/sql/outliers.py +3 -3
- workbench/api/__init__.py +5 -1
- workbench/api/compound.py +1 -1
- workbench/api/df_store.py +17 -108
- workbench/api/endpoint.py +18 -5
- workbench/api/feature_set.py +121 -15
- workbench/api/meta.py +5 -2
- workbench/api/meta_model.py +289 -0
- workbench/api/model.py +55 -21
- workbench/api/monitor.py +1 -16
- workbench/api/parameter_store.py +3 -52
- workbench/cached/cached_model.py +4 -4
- workbench/core/artifacts/__init__.py +11 -2
- workbench/core/artifacts/artifact.py +16 -8
- workbench/core/artifacts/data_capture_core.py +355 -0
- workbench/core/artifacts/df_store_core.py +114 -0
- workbench/core/artifacts/endpoint_core.py +382 -253
- workbench/core/artifacts/feature_set_core.py +249 -45
- workbench/core/artifacts/model_core.py +135 -80
- workbench/core/artifacts/monitor_core.py +33 -248
- workbench/core/artifacts/parameter_store_core.py +98 -0
- workbench/core/cloud_platform/aws/aws_account_clamp.py +50 -1
- workbench/core/cloud_platform/aws/aws_meta.py +12 -5
- workbench/core/cloud_platform/aws/aws_session.py +4 -4
- workbench/core/pipelines/pipeline_executor.py +1 -1
- workbench/core/transforms/data_to_features/light/molecular_descriptors.py +4 -4
- workbench/core/transforms/features_to_model/features_to_model.py +62 -40
- workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +76 -15
- workbench/core/transforms/pandas_transforms/pandas_to_features.py +38 -2
- workbench/core/views/training_view.py +113 -42
- workbench/core/views/view.py +53 -3
- workbench/core/views/view_utils.py +4 -4
- workbench/model_script_utils/model_script_utils.py +339 -0
- workbench/model_script_utils/pytorch_utils.py +405 -0
- workbench/model_script_utils/uq_harness.py +278 -0
- workbench/model_scripts/chemprop/chemprop.template +649 -0
- workbench/model_scripts/chemprop/generated_model_script.py +649 -0
- workbench/model_scripts/chemprop/model_script_utils.py +339 -0
- workbench/model_scripts/chemprop/requirements.txt +3 -0
- workbench/model_scripts/custom_models/chem_info/fingerprints.py +175 -0
- workbench/model_scripts/custom_models/chem_info/mol_descriptors.py +483 -0
- workbench/model_scripts/custom_models/chem_info/mol_standardize.py +450 -0
- workbench/model_scripts/custom_models/chem_info/molecular_descriptors.py +7 -9
- workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py +1 -1
- workbench/model_scripts/custom_models/proximity/feature_space_proximity.py +194 -0
- workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +8 -10
- workbench/model_scripts/custom_models/uq_models/bayesian_ridge.template +7 -8
- workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +20 -21
- workbench/model_scripts/custom_models/uq_models/feature_space_proximity.py +194 -0
- workbench/model_scripts/custom_models/uq_models/gaussian_process.template +5 -11
- workbench/model_scripts/custom_models/uq_models/ngboost.template +30 -18
- workbench/model_scripts/custom_models/uq_models/requirements.txt +1 -3
- workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +15 -17
- workbench/model_scripts/meta_model/generated_model_script.py +209 -0
- workbench/model_scripts/meta_model/meta_model.template +209 -0
- workbench/model_scripts/pytorch_model/generated_model_script.py +444 -500
- workbench/model_scripts/pytorch_model/model_script_utils.py +339 -0
- workbench/model_scripts/pytorch_model/pytorch.template +440 -496
- workbench/model_scripts/pytorch_model/pytorch_utils.py +405 -0
- workbench/model_scripts/pytorch_model/requirements.txt +1 -1
- workbench/model_scripts/pytorch_model/uq_harness.py +278 -0
- workbench/model_scripts/scikit_learn/generated_model_script.py +7 -12
- workbench/model_scripts/scikit_learn/scikit_learn.template +4 -9
- workbench/model_scripts/script_generation.py +20 -11
- workbench/model_scripts/uq_models/generated_model_script.py +248 -0
- workbench/model_scripts/xgb_model/generated_model_script.py +372 -404
- workbench/model_scripts/xgb_model/model_script_utils.py +339 -0
- workbench/model_scripts/xgb_model/uq_harness.py +278 -0
- workbench/model_scripts/xgb_model/xgb_model.template +369 -401
- workbench/repl/workbench_shell.py +28 -19
- workbench/resources/open_source_api.key +1 -1
- workbench/scripts/endpoint_test.py +162 -0
- workbench/scripts/lambda_test.py +73 -0
- workbench/scripts/meta_model_sim.py +35 -0
- workbench/scripts/ml_pipeline_batch.py +137 -0
- workbench/scripts/ml_pipeline_sqs.py +186 -0
- workbench/scripts/monitor_cloud_watch.py +20 -100
- workbench/scripts/training_test.py +85 -0
- workbench/utils/aws_utils.py +4 -3
- workbench/utils/chem_utils/__init__.py +0 -0
- workbench/utils/chem_utils/fingerprints.py +175 -0
- workbench/utils/chem_utils/misc.py +194 -0
- workbench/utils/chem_utils/mol_descriptors.py +483 -0
- workbench/utils/chem_utils/mol_standardize.py +450 -0
- workbench/utils/chem_utils/mol_tagging.py +348 -0
- workbench/utils/chem_utils/projections.py +219 -0
- workbench/utils/chem_utils/salts.py +256 -0
- workbench/utils/chem_utils/sdf.py +292 -0
- workbench/utils/chem_utils/toxicity.py +250 -0
- workbench/utils/chem_utils/vis.py +253 -0
- workbench/utils/chemprop_utils.py +141 -0
- workbench/utils/cloudwatch_handler.py +1 -1
- workbench/utils/cloudwatch_utils.py +137 -0
- workbench/utils/config_manager.py +3 -7
- workbench/utils/endpoint_utils.py +5 -7
- workbench/utils/license_manager.py +2 -6
- workbench/utils/meta_model_simulator.py +499 -0
- workbench/utils/metrics_utils.py +256 -0
- workbench/utils/model_utils.py +278 -79
- workbench/utils/monitor_utils.py +44 -62
- workbench/utils/pandas_utils.py +3 -3
- workbench/utils/pytorch_utils.py +87 -0
- workbench/utils/shap_utils.py +11 -57
- workbench/utils/workbench_logging.py +0 -3
- workbench/utils/workbench_sqs.py +1 -1
- workbench/utils/xgboost_local_crossfold.py +267 -0
- workbench/utils/xgboost_model_utils.py +127 -219
- workbench/web_interface/components/model_plot.py +14 -2
- workbench/web_interface/components/plugin_unit_test.py +5 -2
- workbench/web_interface/components/plugins/dashboard_status.py +3 -1
- workbench/web_interface/components/plugins/generated_compounds.py +1 -1
- workbench/web_interface/components/plugins/model_details.py +38 -74
- workbench/web_interface/components/plugins/scatter_plot.py +6 -10
- {workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/METADATA +31 -9
- {workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/RECORD +128 -96
- workbench-0.8.220.dist-info/entry_points.txt +11 -0
- {workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/licenses/LICENSE +1 -1
- workbench/core/cloud_platform/aws/aws_df_store.py +0 -404
- workbench/core/cloud_platform/aws/aws_parameter_store.py +0 -280
- workbench/model_scripts/custom_models/chem_info/local_utils.py +0 -769
- workbench/model_scripts/custom_models/chem_info/tautomerize.py +0 -83
- workbench/model_scripts/custom_models/meta_endpoints/example.py +0 -53
- workbench/model_scripts/custom_models/proximity/generated_model_script.py +0 -138
- workbench/model_scripts/custom_models/proximity/proximity.py +0 -384
- workbench/model_scripts/custom_models/uq_models/generated_model_script.py +0 -393
- workbench/model_scripts/custom_models/uq_models/mapie_xgb.template +0 -203
- workbench/model_scripts/custom_models/uq_models/meta_uq.template +0 -273
- workbench/model_scripts/custom_models/uq_models/proximity.py +0 -384
- workbench/model_scripts/ensemble_xgb/generated_model_script.py +0 -279
- workbench/model_scripts/quant_regression/quant_regression.template +0 -279
- workbench/model_scripts/quant_regression/requirements.txt +0 -1
- workbench/utils/chem_utils.py +0 -1556
- workbench/utils/execution_environment.py +0 -211
- workbench/utils/fast_inference.py +0 -167
- workbench/utils/resource_utils.py +0 -39
- workbench-0.8.162.dist-info/entry_points.txt +0 -5
- {workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/WHEEL +0 -0
- {workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/top_level.txt +0 -0
|
@@ -12,16 +12,8 @@ from typing import Union, Optional
|
|
|
12
12
|
import hashlib
|
|
13
13
|
|
|
14
14
|
# Model Performance Scores
|
|
15
|
-
from sklearn.metrics import
|
|
16
|
-
|
|
17
|
-
r2_score,
|
|
18
|
-
median_absolute_error,
|
|
19
|
-
roc_auc_score,
|
|
20
|
-
confusion_matrix,
|
|
21
|
-
precision_recall_fscore_support,
|
|
22
|
-
mean_squared_error,
|
|
23
|
-
)
|
|
24
|
-
from sklearn.preprocessing import OneHotEncoder
|
|
15
|
+
from sklearn.metrics import confusion_matrix
|
|
16
|
+
from workbench.utils.metrics_utils import compute_regression_metrics, compute_classification_metrics
|
|
25
17
|
|
|
26
18
|
# SageMaker Imports
|
|
27
19
|
from sagemaker.serializers import CSVSerializer
|
|
@@ -30,13 +22,15 @@ from sagemaker import Predictor
|
|
|
30
22
|
|
|
31
23
|
# Workbench Imports
|
|
32
24
|
from workbench.core.artifacts.artifact import Artifact
|
|
33
|
-
from workbench.core.artifacts import FeatureSetCore, ModelCore, ModelType
|
|
25
|
+
from workbench.core.artifacts import FeatureSetCore, ModelCore, ModelType, ModelFramework
|
|
34
26
|
from workbench.utils.endpoint_metrics import EndpointMetrics
|
|
35
|
-
from workbench.utils.fast_inference import fast_inference
|
|
36
27
|
from workbench.utils.cache import Cache
|
|
37
28
|
from workbench.utils.s3_utils import compute_s3_object_hash
|
|
38
29
|
from workbench.utils.model_utils import uq_metrics
|
|
39
|
-
from workbench.utils.xgboost_model_utils import
|
|
30
|
+
from workbench.utils.xgboost_model_utils import pull_cv_results as xgboost_pull_cv
|
|
31
|
+
from workbench.utils.pytorch_utils import pull_cv_results as pytorch_pull_cv
|
|
32
|
+
from workbench.utils.chemprop_utils import pull_cv_results as chemprop_pull_cv
|
|
33
|
+
from workbench_bridges.endpoints.fast_inference import fast_inference
|
|
40
34
|
|
|
41
35
|
|
|
42
36
|
class EndpointCore(Artifact):
|
|
@@ -164,11 +158,17 @@ class EndpointCore(Artifact):
|
|
|
164
158
|
"""
|
|
165
159
|
return "Serverless" in self.endpoint_meta["InstanceType"]
|
|
166
160
|
|
|
167
|
-
def
|
|
161
|
+
def data_capture(self):
|
|
162
|
+
"""Get the MonitorCore class for this endpoint"""
|
|
163
|
+
from workbench.core.artifacts.data_capture_core import DataCaptureCore
|
|
164
|
+
|
|
165
|
+
return DataCaptureCore(self.endpoint_name)
|
|
166
|
+
|
|
167
|
+
def enable_data_capture(self):
|
|
168
168
|
"""Add data capture to the endpoint"""
|
|
169
|
-
self.
|
|
169
|
+
self.data_capture().enable()
|
|
170
170
|
|
|
171
|
-
def
|
|
171
|
+
def monitor(self):
|
|
172
172
|
"""Get the MonitorCore class for this endpoint"""
|
|
173
173
|
from workbench.core.artifacts.monitor_core import MonitorCore
|
|
174
174
|
|
|
@@ -330,12 +330,8 @@ class EndpointCore(Artifact):
|
|
|
330
330
|
self.details()
|
|
331
331
|
return True
|
|
332
332
|
|
|
333
|
-
def auto_inference(self
|
|
334
|
-
"""Run inference on the endpoint using
|
|
335
|
-
|
|
336
|
-
Args:
|
|
337
|
-
capture (bool, optional): Capture the inference results and metrics (default=False)
|
|
338
|
-
"""
|
|
333
|
+
def auto_inference(self) -> pd.DataFrame:
|
|
334
|
+
"""Run inference on the endpoint using the test data from the model training view"""
|
|
339
335
|
|
|
340
336
|
# Sanity Check that we have a model
|
|
341
337
|
model = ModelCore(self.get_input())
|
|
@@ -343,22 +339,40 @@ class EndpointCore(Artifact):
|
|
|
343
339
|
self.log.error("No model found for this endpoint. Returning empty DataFrame.")
|
|
344
340
|
return pd.DataFrame()
|
|
345
341
|
|
|
346
|
-
#
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
342
|
+
# Grab the evaluation data from the Model's training view
|
|
343
|
+
all_df = model.training_view().pull_dataframe()
|
|
344
|
+
eval_df = all_df[~all_df["training"]]
|
|
345
|
+
|
|
346
|
+
# Remove AWS created columns
|
|
347
|
+
aws_cols = ["write_time", "api_invocation_time", "is_deleted", "event_time"]
|
|
348
|
+
eval_df = eval_df.drop(columns=aws_cols, errors="ignore")
|
|
349
|
+
|
|
350
|
+
# Run inference
|
|
351
|
+
return self.inference(eval_df, "auto_inference")
|
|
352
|
+
|
|
353
|
+
def full_inference(self) -> pd.DataFrame:
|
|
354
|
+
"""Run inference on the endpoint using all the data from the model training view"""
|
|
355
|
+
|
|
356
|
+
# Sanity Check that we have a model
|
|
357
|
+
model = ModelCore(self.get_input())
|
|
358
|
+
if not model.exists():
|
|
359
|
+
self.log.error("No model found for this endpoint. Returning empty DataFrame.")
|
|
350
360
|
return pd.DataFrame()
|
|
351
361
|
|
|
352
|
-
# Grab the
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
362
|
+
# Grab the full data from the Model's training view
|
|
363
|
+
eval_df = model.training_view().pull_dataframe()
|
|
364
|
+
|
|
365
|
+
# Remove AWS created columns
|
|
366
|
+
aws_cols = ["write_time", "api_invocation_time", "is_deleted", "event_time"]
|
|
367
|
+
eval_df = eval_df.drop(columns=aws_cols, errors="ignore")
|
|
368
|
+
|
|
369
|
+
# Run inference
|
|
370
|
+
return self.inference(eval_df, "full_inference")
|
|
357
371
|
|
|
358
372
|
def inference(
|
|
359
373
|
self, eval_df: pd.DataFrame, capture_name: str = None, id_column: str = None, drop_error_rows: bool = False
|
|
360
374
|
) -> pd.DataFrame:
|
|
361
|
-
"""Run inference
|
|
375
|
+
"""Run inference on the Endpoint using the provided DataFrame
|
|
362
376
|
|
|
363
377
|
Args:
|
|
364
378
|
eval_df (pd.DataFrame): DataFrame to run predictions on (must have superset of features)
|
|
@@ -378,63 +392,219 @@ class EndpointCore(Artifact):
|
|
|
378
392
|
self.log.important("No model associated with this endpoint, running 'no frills' inference...")
|
|
379
393
|
return self.fast_inference(eval_df)
|
|
380
394
|
|
|
395
|
+
# Grab the model features and target column
|
|
396
|
+
model = ModelCore(self.model_name)
|
|
397
|
+
features = model.features()
|
|
398
|
+
targets = model.target() # Note: We have multi-target models (so this could be a list)
|
|
399
|
+
|
|
381
400
|
# Run predictions on the evaluation data
|
|
382
|
-
prediction_df = self._predict(eval_df, drop_error_rows)
|
|
401
|
+
prediction_df = self._predict(eval_df, features, drop_error_rows)
|
|
383
402
|
if prediction_df.empty:
|
|
384
403
|
self.log.warning("No predictions were made. Returning empty DataFrame.")
|
|
385
404
|
return prediction_df
|
|
386
405
|
|
|
387
|
-
#
|
|
388
|
-
|
|
389
|
-
|
|
406
|
+
# Normalize targets to handle both string and list formats
|
|
407
|
+
if isinstance(targets, list):
|
|
408
|
+
primary_target = targets[0] if targets else None
|
|
409
|
+
else:
|
|
410
|
+
primary_target = targets
|
|
390
411
|
|
|
391
412
|
# Sanity Check that the target column is present
|
|
392
|
-
if
|
|
393
|
-
self.log.important(f"Target Column {
|
|
413
|
+
if primary_target not in prediction_df.columns:
|
|
414
|
+
self.log.important(f"Target Column {primary_target} not found in prediction_df!")
|
|
394
415
|
self.log.important("In order to compute metrics, the target column must be present!")
|
|
395
|
-
|
|
416
|
+
metrics = pd.DataFrame()
|
|
396
417
|
|
|
397
418
|
# Compute the standard performance metrics for this model
|
|
398
|
-
model_type = model.model_type
|
|
399
|
-
if model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
|
|
400
|
-
prediction_df = self.residuals(target_column, prediction_df)
|
|
401
|
-
metrics = self.regression_metrics(target_column, prediction_df)
|
|
402
|
-
elif model_type == ModelType.CLASSIFIER:
|
|
403
|
-
metrics = self.classification_metrics(target_column, prediction_df)
|
|
404
419
|
else:
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
420
|
+
if model.model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
|
|
421
|
+
prediction_df = self.residuals(primary_target, prediction_df)
|
|
422
|
+
metrics = self.regression_metrics(primary_target, prediction_df)
|
|
423
|
+
elif model.model_type == ModelType.CLASSIFIER:
|
|
424
|
+
metrics = self.classification_metrics(primary_target, prediction_df)
|
|
425
|
+
else:
|
|
426
|
+
# For other model types, we don't compute metrics
|
|
427
|
+
self.log.info(f"Model Type: {model.model_type} doesn't have metrics...")
|
|
428
|
+
metrics = pd.DataFrame()
|
|
408
429
|
|
|
409
430
|
# Print out the metrics
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
print(metrics.head())
|
|
431
|
+
print(f"Performance Metrics for {self.model_name} on {self.name}")
|
|
432
|
+
print(metrics.head())
|
|
413
433
|
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
434
|
+
# Capture the inference results and metrics
|
|
435
|
+
if primary_target and capture_name:
|
|
436
|
+
|
|
437
|
+
# If we don't have an id_column, we'll pull it from the model's FeatureSet
|
|
438
|
+
if id_column is None:
|
|
439
|
+
fs = FeatureSetCore(model.get_input())
|
|
440
|
+
id_column = fs.id_column
|
|
441
|
+
|
|
442
|
+
# Normalize targets to a list for iteration
|
|
443
|
+
target_list = targets if isinstance(targets, list) else [targets]
|
|
444
|
+
primary_target = target_list[0]
|
|
421
445
|
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
# Now put into the Parameter Store Model Inference Namespace
|
|
426
|
-
self.param_store.upsert(f"/workbench/models/{model.name}/inference/cross_fold", cross_fold_metrics)
|
|
446
|
+
# For single-target models (99% of cases), just save with capture_name
|
|
447
|
+
# For multi-target models, save each as {prefix}_{target} plus primary as capture_name
|
|
448
|
+
is_multi_target = len(target_list) > 1
|
|
427
449
|
|
|
428
|
-
|
|
429
|
-
if
|
|
430
|
-
metrics = uq_metrics(prediction_df, target_column)
|
|
450
|
+
if is_multi_target:
|
|
451
|
+
prefix = "auto" if capture_name == "auto_inference" else capture_name
|
|
431
452
|
|
|
432
|
-
|
|
433
|
-
|
|
453
|
+
for target in target_list:
|
|
454
|
+
# Drop rows with NaN target values for metrics/plots
|
|
455
|
+
target_df = prediction_df.dropna(subset=[target])
|
|
456
|
+
|
|
457
|
+
# For multi-target models, prediction column is {target}_pred, otherwise "prediction"
|
|
458
|
+
pred_col = f"{target}_pred" if is_multi_target else "prediction"
|
|
459
|
+
|
|
460
|
+
# Compute per-target metrics
|
|
461
|
+
if model.model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
|
|
462
|
+
target_metrics = self.regression_metrics(target, target_df, prediction_col=pred_col)
|
|
463
|
+
elif model.model_type == ModelType.CLASSIFIER:
|
|
464
|
+
target_metrics = self.classification_metrics(target, target_df, prediction_col=pred_col)
|
|
465
|
+
else:
|
|
466
|
+
target_metrics = pd.DataFrame()
|
|
467
|
+
|
|
468
|
+
if is_multi_target:
|
|
469
|
+
# Multi-target: save as {prefix}_{target}
|
|
470
|
+
target_capture_name = f"{prefix}_{target}"
|
|
471
|
+
description = target_capture_name.replace("_", " ").title()
|
|
472
|
+
self._capture_inference_results(
|
|
473
|
+
target_capture_name,
|
|
474
|
+
target_df,
|
|
475
|
+
target,
|
|
476
|
+
model.model_type,
|
|
477
|
+
target_metrics,
|
|
478
|
+
description,
|
|
479
|
+
features,
|
|
480
|
+
id_column,
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
# Save primary target (or single target) with original capture_name
|
|
484
|
+
if target == primary_target:
|
|
485
|
+
self._capture_inference_results(
|
|
486
|
+
capture_name,
|
|
487
|
+
target_df,
|
|
488
|
+
target,
|
|
489
|
+
model.model_type,
|
|
490
|
+
target_metrics,
|
|
491
|
+
capture_name.replace("_", " ").title(),
|
|
492
|
+
features,
|
|
493
|
+
id_column,
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
# Capture uncertainty metrics if prediction_std is available (UQ, ChemProp, etc.)
|
|
497
|
+
if "prediction_std" in prediction_df.columns:
|
|
498
|
+
metrics = uq_metrics(prediction_df, primary_target)
|
|
499
|
+
self.param_store.upsert(f"/workbench/models/{model.name}/inference/{capture_name}", metrics)
|
|
434
500
|
|
|
435
501
|
# Return the prediction DataFrame
|
|
436
502
|
return prediction_df
|
|
437
503
|
|
|
504
|
+
def cross_fold_inference(self) -> pd.DataFrame:
|
|
505
|
+
"""Pull cross-fold inference training results for this Endpoint's model
|
|
506
|
+
|
|
507
|
+
Returns:
|
|
508
|
+
pd.DataFrame: A DataFrame with cross fold predictions
|
|
509
|
+
"""
|
|
510
|
+
|
|
511
|
+
# Grab our model
|
|
512
|
+
model = ModelCore(self.model_name)
|
|
513
|
+
|
|
514
|
+
# Compute CrossFold (Metrics and Prediction Dataframe)
|
|
515
|
+
# For PyTorch and ChemProp, pull pre-computed CV results from training
|
|
516
|
+
if model.model_framework in [ModelFramework.UNKNOWN, ModelFramework.XGBOOST]:
|
|
517
|
+
cross_fold_metrics, out_of_fold_df = xgboost_pull_cv(model)
|
|
518
|
+
elif model.model_framework == ModelFramework.PYTORCH:
|
|
519
|
+
cross_fold_metrics, out_of_fold_df = pytorch_pull_cv(model)
|
|
520
|
+
elif model.model_framework == ModelFramework.CHEMPROP:
|
|
521
|
+
cross_fold_metrics, out_of_fold_df = chemprop_pull_cv(model)
|
|
522
|
+
else:
|
|
523
|
+
self.log.error(f"Cross-Fold Inference not supported for Model Framework: {model.model_framework}.")
|
|
524
|
+
return pd.DataFrame()
|
|
525
|
+
|
|
526
|
+
# If the metrics dataframe isn't empty save to the param store
|
|
527
|
+
if not cross_fold_metrics.empty:
|
|
528
|
+
# Convert to list of dictionaries
|
|
529
|
+
metrics = cross_fold_metrics.to_dict(orient="records")
|
|
530
|
+
self.param_store.upsert(f"/workbench/models/{model.name}/inference/cross_fold", metrics)
|
|
531
|
+
|
|
532
|
+
# If the out_of_fold_df is empty return it
|
|
533
|
+
if out_of_fold_df.empty:
|
|
534
|
+
self.log.warning("No out-of-fold predictions were made. Returning empty DataFrame.")
|
|
535
|
+
return out_of_fold_df
|
|
536
|
+
|
|
537
|
+
# Capture the results
|
|
538
|
+
targets = model.target() # Note: We have multi-target models (so this could be a list)
|
|
539
|
+
model_type = model.model_type
|
|
540
|
+
|
|
541
|
+
# Get the id_column from the model's FeatureSet
|
|
542
|
+
fs = FeatureSetCore(model.get_input())
|
|
543
|
+
id_column = fs.id_column
|
|
544
|
+
|
|
545
|
+
# Normalize targets to a list for iteration
|
|
546
|
+
target_list = targets if isinstance(targets, list) else [targets]
|
|
547
|
+
primary_target = target_list[0]
|
|
548
|
+
|
|
549
|
+
# Collect UQ columns (q_*, confidence) for additional tracking
|
|
550
|
+
additional_columns = [col for col in out_of_fold_df.columns if col.startswith("q_") or col == "confidence"]
|
|
551
|
+
if additional_columns:
|
|
552
|
+
self.log.info(f"UQ columns from training: {', '.join(additional_columns)}")
|
|
553
|
+
|
|
554
|
+
# Capture uncertainty metrics if prediction_std is available (UQ, ChemProp, etc.)
|
|
555
|
+
if "prediction_std" in out_of_fold_df.columns:
|
|
556
|
+
metrics = uq_metrics(out_of_fold_df, primary_target)
|
|
557
|
+
self.param_store.upsert(f"/workbench/models/{model.name}/inference/full_cross_fold", metrics)
|
|
558
|
+
|
|
559
|
+
# For single-target models (99% of cases), just save as "full_cross_fold"
|
|
560
|
+
# For multi-target models, save each as cv_{target} plus primary as "full_cross_fold"
|
|
561
|
+
is_multi_target = len(target_list) > 1
|
|
562
|
+
|
|
563
|
+
for target in target_list:
|
|
564
|
+
# Drop rows with NaN target values for metrics/plots
|
|
565
|
+
target_df = out_of_fold_df.dropna(subset=[target])
|
|
566
|
+
|
|
567
|
+
# For multi-target models, prediction column is {target}_pred, otherwise "prediction"
|
|
568
|
+
pred_col = f"{target}_pred" if is_multi_target else "prediction"
|
|
569
|
+
|
|
570
|
+
# Compute per-target metrics
|
|
571
|
+
if model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
|
|
572
|
+
target_metrics = self.regression_metrics(target, target_df, prediction_col=pred_col)
|
|
573
|
+
elif model_type == ModelType.CLASSIFIER:
|
|
574
|
+
target_metrics = self.classification_metrics(target, target_df, prediction_col=pred_col)
|
|
575
|
+
else:
|
|
576
|
+
target_metrics = pd.DataFrame()
|
|
577
|
+
|
|
578
|
+
if is_multi_target:
|
|
579
|
+
# Multi-target: save as cv_{target}
|
|
580
|
+
capture_name = f"cv_{target}"
|
|
581
|
+
description = capture_name.replace("_", " ").title()
|
|
582
|
+
self._capture_inference_results(
|
|
583
|
+
capture_name,
|
|
584
|
+
target_df,
|
|
585
|
+
target,
|
|
586
|
+
model_type,
|
|
587
|
+
target_metrics,
|
|
588
|
+
description,
|
|
589
|
+
features=additional_columns,
|
|
590
|
+
id_column=id_column,
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
# Save primary target (or single target) as "full_cross_fold"
|
|
594
|
+
if target == primary_target:
|
|
595
|
+
self._capture_inference_results(
|
|
596
|
+
"full_cross_fold",
|
|
597
|
+
target_df,
|
|
598
|
+
target,
|
|
599
|
+
model_type,
|
|
600
|
+
target_metrics,
|
|
601
|
+
"Full Cross Fold",
|
|
602
|
+
features=additional_columns,
|
|
603
|
+
id_column=id_column,
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
return out_of_fold_df
|
|
607
|
+
|
|
438
608
|
def fast_inference(self, eval_df: pd.DataFrame, threads: int = 4) -> pd.DataFrame:
|
|
439
609
|
"""Run inference on the Endpoint using the provided DataFrame
|
|
440
610
|
|
|
@@ -450,11 +620,12 @@ class EndpointCore(Artifact):
|
|
|
450
620
|
"""
|
|
451
621
|
return fast_inference(self.name, eval_df, self.sm_session, threads=threads)
|
|
452
622
|
|
|
453
|
-
def _predict(self, eval_df: pd.DataFrame, drop_error_rows: bool = False) -> pd.DataFrame:
|
|
454
|
-
"""Internal: Run prediction on
|
|
623
|
+
def _predict(self, eval_df: pd.DataFrame, features: list[str], drop_error_rows: bool = False) -> pd.DataFrame:
|
|
624
|
+
"""Internal: Run prediction on observations in the given DataFrame
|
|
455
625
|
|
|
456
626
|
Args:
|
|
457
627
|
eval_df (pd.DataFrame): DataFrame to run predictions on (must have superset of features)
|
|
628
|
+
features (list[str]): List of feature column names needed for prediction
|
|
458
629
|
drop_error_rows (bool): If True, drop rows that had endpoint errors/issues (default=False)
|
|
459
630
|
Returns:
|
|
460
631
|
pd.DataFrame: Return the DataFrame with additional columns, prediction and any _proba columns
|
|
@@ -465,19 +636,12 @@ class EndpointCore(Artifact):
|
|
|
465
636
|
self.log.warning("Evaluation DataFrame has 0 rows. No predictions to run.")
|
|
466
637
|
return pd.DataFrame(columns=eval_df.columns) # Return empty DataFrame with same structure
|
|
467
638
|
|
|
468
|
-
# Sanity check: Does the
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
df_columns_lower = set(col.lower() for col in eval_df.columns)
|
|
475
|
-
features_lower = set(feature.lower() for feature in features)
|
|
476
|
-
|
|
477
|
-
# Check if the features are a subset of the DataFrame columns (case-insensitive)
|
|
478
|
-
if not features_lower.issubset(df_columns_lower):
|
|
479
|
-
missing_features = features_lower - df_columns_lower
|
|
480
|
-
raise ValueError(f"DataFrame does not contain required features: {missing_features}")
|
|
639
|
+
# Sanity check: Does the DataFrame have the required features?
|
|
640
|
+
df_columns_lower = set(col.lower() for col in eval_df.columns)
|
|
641
|
+
features_lower = set(feature.lower() for feature in features)
|
|
642
|
+
if not features_lower.issubset(df_columns_lower):
|
|
643
|
+
missing_features = features_lower - df_columns_lower
|
|
644
|
+
raise ValueError(f"DataFrame does not contain required features: {missing_features}")
|
|
481
645
|
|
|
482
646
|
# Create our Endpoint Predictor Class
|
|
483
647
|
predictor = Predictor(
|
|
@@ -634,6 +798,10 @@ class EndpointCore(Artifact):
|
|
|
634
798
|
@staticmethod
|
|
635
799
|
def _hash_dataframe(df: pd.DataFrame, hash_length: int = 8):
|
|
636
800
|
# Internal: Compute a data hash for the dataframe
|
|
801
|
+
if df.empty:
|
|
802
|
+
return "--hash--"
|
|
803
|
+
|
|
804
|
+
# Sort the dataframe by columns to ensure consistent ordering
|
|
637
805
|
df = df.copy()
|
|
638
806
|
df = df.sort_values(by=sorted(df.columns.tolist()))
|
|
639
807
|
row_hashes = pd.util.hash_pandas_object(df, index=False)
|
|
@@ -644,19 +812,19 @@ class EndpointCore(Artifact):
|
|
|
644
812
|
self,
|
|
645
813
|
capture_name: str,
|
|
646
814
|
pred_results_df: pd.DataFrame,
|
|
647
|
-
|
|
815
|
+
target: str,
|
|
648
816
|
model_type: ModelType,
|
|
649
817
|
metrics: pd.DataFrame,
|
|
650
818
|
description: str,
|
|
651
819
|
features: list,
|
|
652
820
|
id_column: str = None,
|
|
653
821
|
):
|
|
654
|
-
"""Internal: Capture the inference results and metrics to S3
|
|
822
|
+
"""Internal: Capture the inference results and metrics to S3 for a single target
|
|
655
823
|
|
|
656
824
|
Args:
|
|
657
825
|
capture_name (str): Name of the inference capture
|
|
658
826
|
pred_results_df (pd.DataFrame): DataFrame with the prediction results
|
|
659
|
-
|
|
827
|
+
target (str): Target column name
|
|
660
828
|
model_type (ModelType): Type of the model (e.g. REGRESSOR, CLASSIFIER)
|
|
661
829
|
metrics (pd.DataFrame): DataFrame with the performance metrics
|
|
662
830
|
description (str): Description of the inference results
|
|
@@ -687,86 +855,69 @@ class EndpointCore(Artifact):
|
|
|
687
855
|
self.log.info(f"Writing metrics to {inference_capture_path}/inference_metrics.csv")
|
|
688
856
|
wr.s3.to_csv(metrics, f"{inference_capture_path}/inference_metrics.csv", index=False)
|
|
689
857
|
|
|
690
|
-
#
|
|
691
|
-
|
|
692
|
-
output_columns = [target_column, prediction_col]
|
|
693
|
-
|
|
694
|
-
# Add any _proba columns to the output columns
|
|
695
|
-
output_columns += [col for col in pred_results_df.columns if col.endswith("_proba")]
|
|
696
|
-
|
|
697
|
-
# Add any quantile columns to the output columns
|
|
698
|
-
output_columns += [col for col in pred_results_df.columns if col.startswith("q_") or col.startswith("qr_")]
|
|
699
|
-
|
|
700
|
-
# Add the ID column
|
|
701
|
-
if id_column and id_column in pred_results_df.columns:
|
|
702
|
-
output_columns.append(id_column)
|
|
703
|
-
|
|
704
|
-
# Write the predictions to our S3 Model Inference Folder
|
|
705
|
-
self.log.info(f"Writing predictions to {inference_capture_path}/inference_predictions.csv")
|
|
706
|
-
subset_df = pred_results_df[output_columns]
|
|
707
|
-
wr.s3.to_csv(subset_df, f"{inference_capture_path}/inference_predictions.csv", index=False)
|
|
858
|
+
# Save the inference predictions for this target
|
|
859
|
+
self._save_target_inference(inference_capture_path, pred_results_df, target, id_column)
|
|
708
860
|
|
|
709
861
|
# CLASSIFIER: Write the confusion matrix to our S3 Model Inference Folder
|
|
710
862
|
if model_type == ModelType.CLASSIFIER:
|
|
711
|
-
conf_mtx = self.generate_confusion_matrix(
|
|
863
|
+
conf_mtx = self.generate_confusion_matrix(target, pred_results_df)
|
|
712
864
|
self.log.info(f"Writing confusion matrix to {inference_capture_path}/inference_cm.csv")
|
|
713
865
|
# Note: Unlike other dataframes here, we want to write the index (labels) to the CSV
|
|
714
866
|
wr.s3.to_csv(conf_mtx, f"{inference_capture_path}/inference_cm.csv", index=True)
|
|
715
867
|
|
|
716
|
-
# Generate SHAP values for our Prediction Dataframe
|
|
717
|
-
# generate_shap_values(self.endpoint_name, model_type.value, pred_results_df, inference_capture_path)
|
|
718
|
-
|
|
719
868
|
# Now recompute the details for our Model
|
|
720
|
-
self.log.important(f"
|
|
869
|
+
self.log.important(f"Loading inference metrics for {self.model_name}...")
|
|
721
870
|
model = ModelCore(self.model_name)
|
|
722
871
|
model._load_inference_metrics(capture_name)
|
|
723
|
-
model.details()
|
|
724
872
|
|
|
725
|
-
|
|
726
|
-
self
|
|
727
|
-
|
|
873
|
+
def _save_target_inference(
|
|
874
|
+
self,
|
|
875
|
+
inference_capture_path: str,
|
|
876
|
+
pred_results_df: pd.DataFrame,
|
|
877
|
+
target: str,
|
|
878
|
+
id_column: str = None,
|
|
879
|
+
):
|
|
880
|
+
"""Save inference results for a single target.
|
|
881
|
+
|
|
882
|
+
Args:
|
|
883
|
+
inference_capture_path (str): S3 path for inference capture
|
|
884
|
+
pred_results_df (pd.DataFrame): DataFrame with prediction results
|
|
885
|
+
target (str): Target column name
|
|
886
|
+
id_column (str, optional): Name of the ID column
|
|
887
|
+
"""
|
|
888
|
+
cols = pred_results_df.columns
|
|
889
|
+
|
|
890
|
+
# Build output columns: id, target, prediction, prediction_std, UQ columns, proba columns
|
|
891
|
+
output_columns = []
|
|
892
|
+
if id_column and id_column in cols:
|
|
893
|
+
output_columns.append(id_column)
|
|
894
|
+
if target and target in cols:
|
|
895
|
+
output_columns.append(target)
|
|
896
|
+
|
|
897
|
+
output_columns += [c for c in ["prediction", "prediction_std"] if c in cols]
|
|
728
898
|
|
|
729
|
-
|
|
899
|
+
# Add UQ columns (q_*, confidence) and proba columns
|
|
900
|
+
output_columns += [c for c in cols if c.startswith("q_") or c == "confidence" or c.endswith("_proba")]
|
|
901
|
+
|
|
902
|
+
# Write the predictions to S3
|
|
903
|
+
output_file = f"{inference_capture_path}/inference_predictions.csv"
|
|
904
|
+
self.log.info(f"Writing predictions to {output_file}")
|
|
905
|
+
wr.s3.to_csv(pred_results_df[output_columns], output_file, index=False)
|
|
906
|
+
|
|
907
|
+
def regression_metrics(
|
|
908
|
+
self, target_column: str, prediction_df: pd.DataFrame, prediction_col: str = "prediction"
|
|
909
|
+
) -> pd.DataFrame:
|
|
730
910
|
"""Compute the performance metrics for this Endpoint
|
|
911
|
+
|
|
731
912
|
Args:
|
|
732
913
|
target_column (str): Name of the target column
|
|
733
914
|
prediction_df (pd.DataFrame): DataFrame with the prediction results
|
|
915
|
+
prediction_col (str): Name of the prediction column (default: "prediction")
|
|
916
|
+
|
|
734
917
|
Returns:
|
|
735
918
|
pd.DataFrame: DataFrame with the performance metrics
|
|
736
919
|
"""
|
|
737
|
-
|
|
738
|
-
# Sanity Check the prediction DataFrame
|
|
739
|
-
if prediction_df.empty:
|
|
740
|
-
self.log.warning("No predictions were made. Returning empty DataFrame.")
|
|
741
|
-
return pd.DataFrame()
|
|
742
|
-
|
|
743
|
-
# Compute the metrics
|
|
744
|
-
try:
|
|
745
|
-
y_true = prediction_df[target_column]
|
|
746
|
-
prediction_col = "prediction" if "prediction" in prediction_df.columns else "predictions"
|
|
747
|
-
y_pred = prediction_df[prediction_col]
|
|
748
|
-
|
|
749
|
-
mae = mean_absolute_error(y_true, y_pred)
|
|
750
|
-
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
|
|
751
|
-
r2 = r2_score(y_true, y_pred)
|
|
752
|
-
# Mean Absolute Percentage Error
|
|
753
|
-
mape = np.mean(np.where(y_true != 0, np.abs((y_true - y_pred) / y_true), np.abs(y_true - y_pred))) * 100
|
|
754
|
-
# Median Absolute Error
|
|
755
|
-
medae = median_absolute_error(y_true, y_pred)
|
|
756
|
-
|
|
757
|
-
# Organize and return the metrics
|
|
758
|
-
metrics = {
|
|
759
|
-
"MAE": round(mae, 3),
|
|
760
|
-
"RMSE": round(rmse, 3),
|
|
761
|
-
"R2": round(r2, 3),
|
|
762
|
-
"MAPE": round(mape, 3),
|
|
763
|
-
"MedAE": round(medae, 3),
|
|
764
|
-
"NumRows": len(prediction_df),
|
|
765
|
-
}
|
|
766
|
-
return pd.DataFrame.from_records([metrics])
|
|
767
|
-
except Exception as e:
|
|
768
|
-
self.log.warning(f"Error computing regression metrics: {str(e)}")
|
|
769
|
-
return pd.DataFrame()
|
|
920
|
+
return compute_regression_metrics(prediction_df, target_column, prediction_col)
|
|
770
921
|
|
|
771
922
|
def residuals(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
|
|
772
923
|
"""Add the residuals to the prediction DataFrame
|
|
@@ -776,11 +927,13 @@ class EndpointCore(Artifact):
|
|
|
776
927
|
Returns:
|
|
777
928
|
pd.DataFrame: DataFrame with two new columns called 'residuals' and 'residuals_abs'
|
|
778
929
|
"""
|
|
930
|
+
# Check for prediction column
|
|
931
|
+
if "prediction" not in prediction_df.columns:
|
|
932
|
+
self.log.warning("No 'prediction' column found. Cannot compute residuals.")
|
|
933
|
+
return prediction_df
|
|
779
934
|
|
|
780
|
-
# Compute the residuals
|
|
781
935
|
y_true = prediction_df[target_column]
|
|
782
|
-
|
|
783
|
-
y_pred = prediction_df[prediction_col]
|
|
936
|
+
y_pred = prediction_df["prediction"]
|
|
784
937
|
|
|
785
938
|
# Check for classification scenario
|
|
786
939
|
if not pd.api.types.is_numeric_dtype(y_true) or not pd.api.types.is_numeric_dtype(y_pred):
|
|
@@ -794,118 +947,62 @@ class EndpointCore(Artifact):
|
|
|
794
947
|
|
|
795
948
|
return prediction_df
|
|
796
949
|
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
Args:
|
|
802
|
-
prediction_df (pd.DataFrame): DataFrame with the prediction results
|
|
803
|
-
class_labels (list): List of class labels
|
|
804
|
-
guessing (bool, optional): Whether we're guessing the class labels. Defaults to False.
|
|
805
|
-
"""
|
|
806
|
-
proba_columns = [col.replace("_proba", "") for col in prediction_df.columns if col.endswith("_proba")]
|
|
807
|
-
|
|
808
|
-
if sorted(class_labels) != sorted(proba_columns):
|
|
809
|
-
if guessing:
|
|
810
|
-
raise ValueError(f"_proba columns {proba_columns} != GUESSED class_labels {class_labels}!")
|
|
811
|
-
else:
|
|
812
|
-
raise ValueError(f"_proba columns {proba_columns} != class_labels {class_labels}!")
|
|
813
|
-
|
|
814
|
-
def classification_metrics(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
|
|
950
|
+
def classification_metrics(
|
|
951
|
+
self, target_column: str, prediction_df: pd.DataFrame, prediction_col: str = "prediction"
|
|
952
|
+
) -> pd.DataFrame:
|
|
815
953
|
"""Compute the performance metrics for this Endpoint
|
|
816
954
|
|
|
817
955
|
Args:
|
|
818
956
|
target_column (str): Name of the target column
|
|
819
957
|
prediction_df (pd.DataFrame): DataFrame with the prediction results
|
|
958
|
+
prediction_col (str): Name of the prediction column (default: "prediction")
|
|
820
959
|
|
|
821
960
|
Returns:
|
|
822
961
|
pd.DataFrame: DataFrame with the performance metrics
|
|
823
962
|
"""
|
|
824
|
-
# Get
|
|
963
|
+
# Get class labels from the model (metrics_utils will infer if None)
|
|
825
964
|
class_labels = ModelCore(self.model_name).class_labels()
|
|
826
|
-
|
|
827
|
-
self.log.warning(
|
|
828
|
-
"Class labels not found in the model. Guessing class labels from the prediction DataFrame."
|
|
829
|
-
)
|
|
830
|
-
class_labels = prediction_df[target_column].unique().tolist()
|
|
831
|
-
self.validate_proba_columns(prediction_df, class_labels, guessing=True)
|
|
832
|
-
else:
|
|
833
|
-
self.validate_proba_columns(prediction_df, class_labels)
|
|
834
|
-
|
|
835
|
-
# Calculate precision, recall, fscore, and support, handling zero division
|
|
836
|
-
prediction_col = "prediction" if "prediction" in prediction_df.columns else "predictions"
|
|
837
|
-
scores = precision_recall_fscore_support(
|
|
838
|
-
prediction_df[target_column],
|
|
839
|
-
prediction_df[prediction_col],
|
|
840
|
-
average=None,
|
|
841
|
-
labels=class_labels,
|
|
842
|
-
zero_division=0,
|
|
843
|
-
)
|
|
844
|
-
|
|
845
|
-
# Identify the probability columns and keep them as a Pandas DataFrame
|
|
846
|
-
proba_columns = [f"{label}_proba" for label in class_labels]
|
|
847
|
-
y_score = prediction_df[proba_columns]
|
|
848
|
-
|
|
849
|
-
# One-hot encode the true labels using all class labels (fit with class_labels)
|
|
850
|
-
encoder = OneHotEncoder(categories=[class_labels], sparse_output=False)
|
|
851
|
-
y_true = encoder.fit_transform(prediction_df[[target_column]])
|
|
852
|
-
|
|
853
|
-
# Calculate ROC AUC per label and handle exceptions for missing classes
|
|
854
|
-
roc_auc_per_label = []
|
|
855
|
-
for i, label in enumerate(class_labels):
|
|
856
|
-
try:
|
|
857
|
-
roc_auc = roc_auc_score(y_true[:, i], y_score.iloc[:, i])
|
|
858
|
-
except ValueError as e:
|
|
859
|
-
self.log.warning(f"ROC AUC calculation failed for label {label}.")
|
|
860
|
-
self.log.warning(f"{str(e)}")
|
|
861
|
-
roc_auc = 0.0
|
|
862
|
-
roc_auc_per_label.append(roc_auc)
|
|
863
|
-
|
|
864
|
-
# Put the scores into a DataFrame
|
|
865
|
-
score_df = pd.DataFrame(
|
|
866
|
-
{
|
|
867
|
-
target_column: class_labels,
|
|
868
|
-
"precision": scores[0],
|
|
869
|
-
"recall": scores[1],
|
|
870
|
-
"fscore": scores[2],
|
|
871
|
-
"roc_auc": roc_auc_per_label,
|
|
872
|
-
"support": scores[3],
|
|
873
|
-
}
|
|
874
|
-
)
|
|
875
|
-
return score_df
|
|
965
|
+
return compute_classification_metrics(prediction_df, target_column, class_labels, prediction_col)
|
|
876
966
|
|
|
877
967
|
def generate_confusion_matrix(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
|
|
878
968
|
"""Compute the confusion matrix for this Endpoint
|
|
969
|
+
|
|
879
970
|
Args:
|
|
880
971
|
target_column (str): Name of the target column
|
|
881
972
|
prediction_df (pd.DataFrame): DataFrame with the prediction results
|
|
973
|
+
|
|
882
974
|
Returns:
|
|
883
975
|
pd.DataFrame: DataFrame with the confusion matrix
|
|
884
976
|
"""
|
|
977
|
+
# Check for prediction column
|
|
978
|
+
if "prediction" not in prediction_df.columns:
|
|
979
|
+
self.log.warning("No 'prediction' column found in DataFrame")
|
|
980
|
+
return pd.DataFrame()
|
|
981
|
+
|
|
982
|
+
# Drop rows with NaN predictions (can't include in confusion matrix)
|
|
983
|
+
nan_mask = prediction_df["prediction"].isna()
|
|
984
|
+
if nan_mask.any():
|
|
985
|
+
n_nan = nan_mask.sum()
|
|
986
|
+
self.log.warning(f"Dropping {n_nan} rows with NaN predictions for confusion matrix")
|
|
987
|
+
prediction_df = prediction_df[~nan_mask].copy()
|
|
885
988
|
|
|
886
989
|
y_true = prediction_df[target_column]
|
|
887
|
-
|
|
888
|
-
y_pred = prediction_df[prediction_col]
|
|
990
|
+
y_pred = prediction_df["prediction"]
|
|
889
991
|
|
|
890
|
-
#
|
|
891
|
-
|
|
892
|
-
if class_labels is None:
|
|
893
|
-
class_labels = sorted(list(set(y_true) | set(y_pred)))
|
|
992
|
+
# Get model class labels
|
|
993
|
+
model_class_labels = ModelCore(self.model_name).class_labels()
|
|
894
994
|
|
|
895
|
-
#
|
|
896
|
-
|
|
995
|
+
# Use model labels if available, otherwise infer from data
|
|
996
|
+
if model_class_labels:
|
|
997
|
+
self.log.important("Using model class labels for confusion matrix ordering...")
|
|
998
|
+
labels = model_class_labels
|
|
999
|
+
else:
|
|
1000
|
+
labels = sorted(list(set(y_true) | set(y_pred)))
|
|
897
1001
|
|
|
898
|
-
#
|
|
899
|
-
|
|
1002
|
+
# Compute confusion matrix and create DataFrame
|
|
1003
|
+
conf_mtx = confusion_matrix(y_true, y_pred, labels=labels)
|
|
1004
|
+
conf_mtx_df = pd.DataFrame(conf_mtx, index=labels, columns=labels)
|
|
900
1005
|
conf_mtx_df.index.name = "labels"
|
|
901
|
-
|
|
902
|
-
# Check if our model has class labels. If so make the index and columns ordered
|
|
903
|
-
model_class_labels = ModelCore(self.model_name).class_labels()
|
|
904
|
-
if model_class_labels:
|
|
905
|
-
self.log.important("Reordering the confusion matrix based on model class labels...")
|
|
906
|
-
conf_mtx_df.index = pd.Categorical(conf_mtx_df.index, categories=model_class_labels, ordered=True)
|
|
907
|
-
conf_mtx_df.columns = pd.Categorical(conf_mtx_df.columns, categories=model_class_labels, ordered=True)
|
|
908
|
-
conf_mtx_df = conf_mtx_df.sort_index().sort_index(axis=1)
|
|
909
1006
|
return conf_mtx_df
|
|
910
1007
|
|
|
911
1008
|
def endpoint_config_name(self) -> str:
|
|
@@ -932,9 +1029,9 @@ class EndpointCore(Artifact):
|
|
|
932
1029
|
self.upsert_workbench_meta({"workbench_input": input})
|
|
933
1030
|
|
|
934
1031
|
def delete(self):
|
|
935
|
-
"""
|
|
1032
|
+
"""Delete an existing Endpoint: Underlying Models, Configuration, and Endpoint"""
|
|
936
1033
|
if not self.exists():
|
|
937
|
-
self.log.warning(f"Trying to delete an
|
|
1034
|
+
self.log.warning(f"Trying to delete an Endpoint that doesn't exist: {self.name}")
|
|
938
1035
|
|
|
939
1036
|
# Remove this endpoint from the list of registered endpoints
|
|
940
1037
|
self.log.info(f"Removing {self.name} from the list of registered endpoints...")
|
|
@@ -975,12 +1072,23 @@ class EndpointCore(Artifact):
|
|
|
975
1072
|
cls.log.info(f"Deleting Monitoring Schedule {schedule['MonitoringScheduleName']}...")
|
|
976
1073
|
cls.sm_client.delete_monitoring_schedule(MonitoringScheduleName=schedule["MonitoringScheduleName"])
|
|
977
1074
|
|
|
978
|
-
# Recursively delete all endpoint S3 artifacts (inference,
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
1075
|
+
# Recursively delete all endpoint S3 artifacts (inference, etc)
|
|
1076
|
+
# Note: We do not want to delete the data_capture/ files since these
|
|
1077
|
+
# might be used for collection and data drift analysis
|
|
1078
|
+
base_endpoint_path = f"{cls.endpoints_s3_path}/{endpoint_name}/"
|
|
1079
|
+
all_s3_objects = wr.s3.list_objects(base_endpoint_path, boto3_session=cls.boto3_session)
|
|
1080
|
+
|
|
1081
|
+
# Filter out objects that contain 'data_capture/' in their path
|
|
1082
|
+
s3_objects_to_delete = [obj for obj in all_s3_objects if "/data_capture/" not in obj]
|
|
1083
|
+
cls.log.info(f"Found {len(all_s3_objects)} total objects at {base_endpoint_path}")
|
|
1084
|
+
cls.log.info(f"Filtering out data_capture files, will delete {len(s3_objects_to_delete)} objects...")
|
|
1085
|
+
cls.log.info(f"Objects to delete: {s3_objects_to_delete}")
|
|
1086
|
+
|
|
1087
|
+
if s3_objects_to_delete:
|
|
1088
|
+
wr.s3.delete_objects(s3_objects_to_delete, boto3_session=cls.boto3_session)
|
|
1089
|
+
cls.log.info(f"Successfully deleted {len(s3_objects_to_delete)} objects")
|
|
1090
|
+
else:
|
|
1091
|
+
cls.log.info("No objects to delete (only data_capture files found)")
|
|
984
1092
|
|
|
985
1093
|
# Delete any dataframes that were stored in the Dataframe Cache
|
|
986
1094
|
cls.log.info("Deleting Dataframe Cache...")
|
|
@@ -1031,7 +1139,7 @@ class EndpointCore(Artifact):
|
|
|
1031
1139
|
if __name__ == "__main__":
|
|
1032
1140
|
"""Exercise the Endpoint Class"""
|
|
1033
1141
|
from workbench.api import FeatureSet
|
|
1034
|
-
from workbench.utils.endpoint_utils import
|
|
1142
|
+
from workbench.utils.endpoint_utils import get_evaluation_data
|
|
1035
1143
|
import random
|
|
1036
1144
|
|
|
1037
1145
|
# Grab an EndpointCore object and pull some information from it
|
|
@@ -1039,7 +1147,7 @@ if __name__ == "__main__":
|
|
|
1039
1147
|
|
|
1040
1148
|
# Test various error conditions (set row 42 length to pd.NA)
|
|
1041
1149
|
# Note: This test should return ALL rows
|
|
1042
|
-
my_eval_df =
|
|
1150
|
+
my_eval_df = get_evaluation_data(my_endpoint)
|
|
1043
1151
|
my_eval_df.at[42, "length"] = pd.NA
|
|
1044
1152
|
pred_results = my_endpoint.inference(my_eval_df, drop_error_rows=True)
|
|
1045
1153
|
print(f"Sent rows: {len(my_eval_df)}")
|
|
@@ -1047,6 +1155,9 @@ if __name__ == "__main__":
|
|
|
1047
1155
|
assert len(pred_results) == len(my_eval_df), "Predictions should match the number of sent rows"
|
|
1048
1156
|
|
|
1049
1157
|
# Now we put in an invalid value
|
|
1158
|
+
print("*" * 80)
|
|
1159
|
+
print("NOW TESTING ERROR CONDITIONS...")
|
|
1160
|
+
print("*" * 80)
|
|
1050
1161
|
my_eval_df.at[42, "length"] = "invalid_value"
|
|
1051
1162
|
pred_results = my_endpoint.inference(my_eval_df, drop_error_rows=True)
|
|
1052
1163
|
print(f"Sent rows: {len(my_eval_df)}")
|
|
@@ -1086,13 +1197,20 @@ if __name__ == "__main__":
|
|
|
1086
1197
|
df = fs.pull_dataframe()[:100]
|
|
1087
1198
|
cap_df = df.copy()
|
|
1088
1199
|
cap_df.columns = [col.upper() for col in cap_df.columns]
|
|
1089
|
-
my_endpoint.
|
|
1200
|
+
my_endpoint.inference(cap_df)
|
|
1090
1201
|
|
|
1091
1202
|
# Boolean Type Test
|
|
1092
1203
|
df["bool_column"] = [random.choice([True, False]) for _ in range(len(df))]
|
|
1093
|
-
result_df = my_endpoint.
|
|
1204
|
+
result_df = my_endpoint.inference(df)
|
|
1094
1205
|
assert result_df["bool_column"].dtype == bool
|
|
1095
1206
|
|
|
1207
|
+
# Missing Feature Test
|
|
1208
|
+
missing_df = df.drop(columns=["length"])
|
|
1209
|
+
try:
|
|
1210
|
+
my_endpoint.inference(missing_df)
|
|
1211
|
+
except ValueError as e:
|
|
1212
|
+
print(f"Expected error for missing feature: {e}")
|
|
1213
|
+
|
|
1096
1214
|
# Run Auto Inference on the Endpoint (uses the FeatureSet)
|
|
1097
1215
|
print("Running Auto Inference...")
|
|
1098
1216
|
my_endpoint.auto_inference()
|
|
@@ -1100,13 +1218,21 @@ if __name__ == "__main__":
|
|
|
1100
1218
|
# Run Inference where we provide the data
|
|
1101
1219
|
# Note: This dataframe could be from a FeatureSet or any other source
|
|
1102
1220
|
print("Running Inference...")
|
|
1103
|
-
my_eval_df =
|
|
1221
|
+
my_eval_df = get_evaluation_data(my_endpoint)
|
|
1104
1222
|
pred_results = my_endpoint.inference(my_eval_df)
|
|
1105
1223
|
|
|
1106
1224
|
# Now set capture=True to save inference results and metrics
|
|
1107
|
-
my_eval_df =
|
|
1225
|
+
my_eval_df = get_evaluation_data(my_endpoint)
|
|
1108
1226
|
pred_results = my_endpoint.inference(my_eval_df, capture_name="holdout_xyz")
|
|
1109
1227
|
|
|
1228
|
+
# Run predictions using the fast_inference method
|
|
1229
|
+
fast_results = my_endpoint.fast_inference(my_eval_df)
|
|
1230
|
+
|
|
1231
|
+
# Test the cross_fold_inference method
|
|
1232
|
+
print("Running Cross-Fold Inference...")
|
|
1233
|
+
all_results = my_endpoint.cross_fold_inference()
|
|
1234
|
+
print(all_results)
|
|
1235
|
+
|
|
1110
1236
|
# Run Inference and metrics for a Classification Endpoint
|
|
1111
1237
|
class_endpoint = EndpointCore("wine-classification")
|
|
1112
1238
|
auto_predictions = class_endpoint.auto_inference()
|
|
@@ -1115,8 +1241,11 @@ if __name__ == "__main__":
|
|
|
1115
1241
|
target = "wine_class"
|
|
1116
1242
|
print(class_endpoint.generate_confusion_matrix(target, auto_predictions))
|
|
1117
1243
|
|
|
1118
|
-
#
|
|
1119
|
-
|
|
1244
|
+
# Test the cross_fold_inference method
|
|
1245
|
+
print("Running Cross-Fold Inference...")
|
|
1246
|
+
all_results = class_endpoint.cross_fold_inference()
|
|
1247
|
+
print(all_results)
|
|
1248
|
+
print("All done...")
|
|
1120
1249
|
|
|
1121
1250
|
# Test the class method delete (commented out for now)
|
|
1122
1251
|
# from workbench.api import Model
|