workbench 0.8.202__py3-none-any.whl → 0.8.220__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of workbench might be problematic. Click here for more details.
- workbench/algorithms/dataframe/compound_dataset_overlap.py +321 -0
- workbench/algorithms/dataframe/feature_space_proximity.py +168 -75
- workbench/algorithms/dataframe/fingerprint_proximity.py +421 -85
- workbench/algorithms/dataframe/projection_2d.py +44 -21
- workbench/algorithms/dataframe/proximity.py +78 -150
- workbench/algorithms/graph/light/proximity_graph.py +5 -5
- workbench/algorithms/models/cleanlab_model.py +382 -0
- workbench/algorithms/models/noise_model.py +388 -0
- workbench/algorithms/sql/outliers.py +3 -3
- workbench/api/__init__.py +3 -0
- workbench/api/df_store.py +17 -108
- workbench/api/endpoint.py +13 -11
- workbench/api/feature_set.py +111 -8
- workbench/api/meta_model.py +289 -0
- workbench/api/model.py +45 -12
- workbench/api/parameter_store.py +3 -52
- workbench/cached/cached_model.py +4 -4
- workbench/core/artifacts/artifact.py +5 -5
- workbench/core/artifacts/df_store_core.py +114 -0
- workbench/core/artifacts/endpoint_core.py +228 -237
- workbench/core/artifacts/feature_set_core.py +185 -230
- workbench/core/artifacts/model_core.py +34 -26
- workbench/core/artifacts/parameter_store_core.py +98 -0
- workbench/core/pipelines/pipeline_executor.py +1 -1
- workbench/core/transforms/features_to_model/features_to_model.py +22 -10
- workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +41 -10
- workbench/core/transforms/pandas_transforms/pandas_to_features.py +11 -2
- workbench/model_script_utils/model_script_utils.py +339 -0
- workbench/model_script_utils/pytorch_utils.py +405 -0
- workbench/model_script_utils/uq_harness.py +278 -0
- workbench/model_scripts/chemprop/chemprop.template +428 -631
- workbench/model_scripts/chemprop/generated_model_script.py +432 -635
- workbench/model_scripts/chemprop/model_script_utils.py +339 -0
- workbench/model_scripts/chemprop/requirements.txt +2 -10
- workbench/model_scripts/custom_models/chem_info/fingerprints.py +87 -46
- workbench/model_scripts/custom_models/proximity/feature_space_proximity.py +194 -0
- workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +6 -6
- workbench/model_scripts/custom_models/uq_models/feature_space_proximity.py +194 -0
- workbench/model_scripts/meta_model/generated_model_script.py +209 -0
- workbench/model_scripts/meta_model/meta_model.template +209 -0
- workbench/model_scripts/pytorch_model/generated_model_script.py +374 -613
- workbench/model_scripts/pytorch_model/model_script_utils.py +339 -0
- workbench/model_scripts/pytorch_model/pytorch.template +370 -609
- workbench/model_scripts/pytorch_model/pytorch_utils.py +405 -0
- workbench/model_scripts/pytorch_model/requirements.txt +1 -1
- workbench/model_scripts/pytorch_model/uq_harness.py +278 -0
- workbench/model_scripts/script_generation.py +6 -5
- workbench/model_scripts/uq_models/generated_model_script.py +65 -422
- workbench/model_scripts/xgb_model/generated_model_script.py +372 -395
- workbench/model_scripts/xgb_model/model_script_utils.py +339 -0
- workbench/model_scripts/xgb_model/uq_harness.py +278 -0
- workbench/model_scripts/xgb_model/xgb_model.template +366 -396
- workbench/repl/workbench_shell.py +0 -5
- workbench/resources/open_source_api.key +1 -1
- workbench/scripts/endpoint_test.py +2 -2
- workbench/scripts/meta_model_sim.py +35 -0
- workbench/scripts/training_test.py +85 -0
- workbench/utils/chem_utils/fingerprints.py +87 -46
- workbench/utils/chem_utils/projections.py +16 -6
- workbench/utils/chemprop_utils.py +36 -655
- workbench/utils/meta_model_simulator.py +499 -0
- workbench/utils/metrics_utils.py +256 -0
- workbench/utils/model_utils.py +192 -54
- workbench/utils/pytorch_utils.py +33 -472
- workbench/utils/shap_utils.py +1 -55
- workbench/utils/xgboost_local_crossfold.py +267 -0
- workbench/utils/xgboost_model_utils.py +49 -356
- workbench/web_interface/components/model_plot.py +7 -1
- workbench/web_interface/components/plugins/model_details.py +30 -68
- workbench/web_interface/components/plugins/scatter_plot.py +4 -8
- {workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/METADATA +6 -5
- {workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/RECORD +76 -60
- {workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/entry_points.txt +2 -0
- workbench/core/cloud_platform/aws/aws_df_store.py +0 -404
- workbench/core/cloud_platform/aws/aws_parameter_store.py +0 -296
- workbench/model_scripts/custom_models/meta_endpoints/example.py +0 -53
- workbench/model_scripts/custom_models/proximity/proximity.py +0 -410
- workbench/model_scripts/custom_models/uq_models/meta_uq.template +0 -377
- workbench/model_scripts/custom_models/uq_models/proximity.py +0 -410
- workbench/model_scripts/uq_models/mapie.template +0 -605
- workbench/model_scripts/uq_models/requirements.txt +0 -1
- {workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/WHEEL +0 -0
- {workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/licenses/LICENSE +0 -0
- {workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/top_level.txt +0 -0
|
@@ -12,16 +12,8 @@ from typing import Union, Optional
|
|
|
12
12
|
import hashlib
|
|
13
13
|
|
|
14
14
|
# Model Performance Scores
|
|
15
|
-
from sklearn.metrics import
|
|
16
|
-
|
|
17
|
-
r2_score,
|
|
18
|
-
median_absolute_error,
|
|
19
|
-
roc_auc_score,
|
|
20
|
-
confusion_matrix,
|
|
21
|
-
precision_recall_fscore_support,
|
|
22
|
-
mean_squared_error,
|
|
23
|
-
)
|
|
24
|
-
from sklearn.preprocessing import OneHotEncoder
|
|
15
|
+
from sklearn.metrics import confusion_matrix
|
|
16
|
+
from workbench.utils.metrics_utils import compute_regression_metrics, compute_classification_metrics
|
|
25
17
|
|
|
26
18
|
# SageMaker Imports
|
|
27
19
|
from sagemaker.serializers import CSVSerializer
|
|
@@ -35,7 +27,7 @@ from workbench.utils.endpoint_metrics import EndpointMetrics
|
|
|
35
27
|
from workbench.utils.cache import Cache
|
|
36
28
|
from workbench.utils.s3_utils import compute_s3_object_hash
|
|
37
29
|
from workbench.utils.model_utils import uq_metrics
|
|
38
|
-
from workbench.utils.xgboost_model_utils import
|
|
30
|
+
from workbench.utils.xgboost_model_utils import pull_cv_results as xgboost_pull_cv
|
|
39
31
|
from workbench.utils.pytorch_utils import pull_cv_results as pytorch_pull_cv
|
|
40
32
|
from workbench.utils.chemprop_utils import pull_cv_results as chemprop_pull_cv
|
|
41
33
|
from workbench_bridges.endpoints.fast_inference import fast_inference
|
|
@@ -338,12 +330,8 @@ class EndpointCore(Artifact):
|
|
|
338
330
|
self.details()
|
|
339
331
|
return True
|
|
340
332
|
|
|
341
|
-
def auto_inference(self
|
|
342
|
-
"""Run inference on the endpoint using
|
|
343
|
-
|
|
344
|
-
Args:
|
|
345
|
-
capture (bool, optional): Capture the inference results and metrics (default=False)
|
|
346
|
-
"""
|
|
333
|
+
def auto_inference(self) -> pd.DataFrame:
|
|
334
|
+
"""Run inference on the endpoint using the test data from the model training view"""
|
|
347
335
|
|
|
348
336
|
# Sanity Check that we have a model
|
|
349
337
|
model = ModelCore(self.get_input())
|
|
@@ -351,22 +339,40 @@ class EndpointCore(Artifact):
|
|
|
351
339
|
self.log.error("No model found for this endpoint. Returning empty DataFrame.")
|
|
352
340
|
return pd.DataFrame()
|
|
353
341
|
|
|
354
|
-
#
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
342
|
+
# Grab the evaluation data from the Model's training view
|
|
343
|
+
all_df = model.training_view().pull_dataframe()
|
|
344
|
+
eval_df = all_df[~all_df["training"]]
|
|
345
|
+
|
|
346
|
+
# Remove AWS created columns
|
|
347
|
+
aws_cols = ["write_time", "api_invocation_time", "is_deleted", "event_time"]
|
|
348
|
+
eval_df = eval_df.drop(columns=aws_cols, errors="ignore")
|
|
349
|
+
|
|
350
|
+
# Run inference
|
|
351
|
+
return self.inference(eval_df, "auto_inference")
|
|
352
|
+
|
|
353
|
+
def full_inference(self) -> pd.DataFrame:
|
|
354
|
+
"""Run inference on the endpoint using all the data from the model training view"""
|
|
355
|
+
|
|
356
|
+
# Sanity Check that we have a model
|
|
357
|
+
model = ModelCore(self.get_input())
|
|
358
|
+
if not model.exists():
|
|
359
|
+
self.log.error("No model found for this endpoint. Returning empty DataFrame.")
|
|
358
360
|
return pd.DataFrame()
|
|
359
361
|
|
|
360
|
-
# Grab the
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
362
|
+
# Grab the full data from the Model's training view
|
|
363
|
+
eval_df = model.training_view().pull_dataframe()
|
|
364
|
+
|
|
365
|
+
# Remove AWS created columns
|
|
366
|
+
aws_cols = ["write_time", "api_invocation_time", "is_deleted", "event_time"]
|
|
367
|
+
eval_df = eval_df.drop(columns=aws_cols, errors="ignore")
|
|
368
|
+
|
|
369
|
+
# Run inference
|
|
370
|
+
return self.inference(eval_df, "full_inference")
|
|
365
371
|
|
|
366
372
|
def inference(
|
|
367
373
|
self, eval_df: pd.DataFrame, capture_name: str = None, id_column: str = None, drop_error_rows: bool = False
|
|
368
374
|
) -> pd.DataFrame:
|
|
369
|
-
"""Run inference
|
|
375
|
+
"""Run inference on the Endpoint using the provided DataFrame
|
|
370
376
|
|
|
371
377
|
Args:
|
|
372
378
|
eval_df (pd.DataFrame): DataFrame to run predictions on (must have superset of features)
|
|
@@ -389,7 +395,7 @@ class EndpointCore(Artifact):
|
|
|
389
395
|
# Grab the model features and target column
|
|
390
396
|
model = ModelCore(self.model_name)
|
|
391
397
|
features = model.features()
|
|
392
|
-
|
|
398
|
+
targets = model.target() # Note: We have multi-target models (so this could be a list)
|
|
393
399
|
|
|
394
400
|
# Run predictions on the evaluation data
|
|
395
401
|
prediction_df = self._predict(eval_df, features, drop_error_rows)
|
|
@@ -397,19 +403,25 @@ class EndpointCore(Artifact):
|
|
|
397
403
|
self.log.warning("No predictions were made. Returning empty DataFrame.")
|
|
398
404
|
return prediction_df
|
|
399
405
|
|
|
406
|
+
# Normalize targets to handle both string and list formats
|
|
407
|
+
if isinstance(targets, list):
|
|
408
|
+
primary_target = targets[0] if targets else None
|
|
409
|
+
else:
|
|
410
|
+
primary_target = targets
|
|
411
|
+
|
|
400
412
|
# Sanity Check that the target column is present
|
|
401
|
-
if
|
|
402
|
-
self.log.important(f"Target Column {
|
|
413
|
+
if primary_target not in prediction_df.columns:
|
|
414
|
+
self.log.important(f"Target Column {primary_target} not found in prediction_df!")
|
|
403
415
|
self.log.important("In order to compute metrics, the target column must be present!")
|
|
404
416
|
metrics = pd.DataFrame()
|
|
405
417
|
|
|
406
418
|
# Compute the standard performance metrics for this model
|
|
407
419
|
else:
|
|
408
420
|
if model.model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
|
|
409
|
-
prediction_df = self.residuals(
|
|
410
|
-
metrics = self.regression_metrics(
|
|
421
|
+
prediction_df = self.residuals(primary_target, prediction_df)
|
|
422
|
+
metrics = self.regression_metrics(primary_target, prediction_df)
|
|
411
423
|
elif model.model_type == ModelType.CLASSIFIER:
|
|
412
|
-
metrics = self.classification_metrics(
|
|
424
|
+
metrics = self.classification_metrics(primary_target, prediction_df)
|
|
413
425
|
else:
|
|
414
426
|
# For other model types, we don't compute metrics
|
|
415
427
|
self.log.info(f"Model Type: {model.model_type} doesn't have metrics...")
|
|
@@ -420,30 +432,77 @@ class EndpointCore(Artifact):
|
|
|
420
432
|
print(metrics.head())
|
|
421
433
|
|
|
422
434
|
# Capture the inference results and metrics
|
|
423
|
-
if
|
|
435
|
+
if primary_target and capture_name:
|
|
424
436
|
|
|
425
437
|
# If we don't have an id_column, we'll pull it from the model's FeatureSet
|
|
426
438
|
if id_column is None:
|
|
427
439
|
fs = FeatureSetCore(model.get_input())
|
|
428
440
|
id_column = fs.id_column
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
# For
|
|
435
|
-
|
|
436
|
-
|
|
441
|
+
|
|
442
|
+
# Normalize targets to a list for iteration
|
|
443
|
+
target_list = targets if isinstance(targets, list) else [targets]
|
|
444
|
+
primary_target = target_list[0]
|
|
445
|
+
|
|
446
|
+
# For single-target models (99% of cases), just save with capture_name
|
|
447
|
+
# For multi-target models, save each as {prefix}_{target} plus primary as capture_name
|
|
448
|
+
is_multi_target = len(target_list) > 1
|
|
449
|
+
|
|
450
|
+
if is_multi_target:
|
|
451
|
+
prefix = "auto" if capture_name == "auto_inference" else capture_name
|
|
452
|
+
|
|
453
|
+
for target in target_list:
|
|
454
|
+
# Drop rows with NaN target values for metrics/plots
|
|
455
|
+
target_df = prediction_df.dropna(subset=[target])
|
|
456
|
+
|
|
457
|
+
# For multi-target models, prediction column is {target}_pred, otherwise "prediction"
|
|
458
|
+
pred_col = f"{target}_pred" if is_multi_target else "prediction"
|
|
459
|
+
|
|
460
|
+
# Compute per-target metrics
|
|
461
|
+
if model.model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
|
|
462
|
+
target_metrics = self.regression_metrics(target, target_df, prediction_col=pred_col)
|
|
463
|
+
elif model.model_type == ModelType.CLASSIFIER:
|
|
464
|
+
target_metrics = self.classification_metrics(target, target_df, prediction_col=pred_col)
|
|
465
|
+
else:
|
|
466
|
+
target_metrics = pd.DataFrame()
|
|
467
|
+
|
|
468
|
+
if is_multi_target:
|
|
469
|
+
# Multi-target: save as {prefix}_{target}
|
|
470
|
+
target_capture_name = f"{prefix}_{target}"
|
|
471
|
+
description = target_capture_name.replace("_", " ").title()
|
|
472
|
+
self._capture_inference_results(
|
|
473
|
+
target_capture_name,
|
|
474
|
+
target_df,
|
|
475
|
+
target,
|
|
476
|
+
model.model_type,
|
|
477
|
+
target_metrics,
|
|
478
|
+
description,
|
|
479
|
+
features,
|
|
480
|
+
id_column,
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
# Save primary target (or single target) with original capture_name
|
|
484
|
+
if target == primary_target:
|
|
485
|
+
self._capture_inference_results(
|
|
486
|
+
capture_name,
|
|
487
|
+
target_df,
|
|
488
|
+
target,
|
|
489
|
+
model.model_type,
|
|
490
|
+
target_metrics,
|
|
491
|
+
capture_name.replace("_", " ").title(),
|
|
492
|
+
features,
|
|
493
|
+
id_column,
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
# Capture uncertainty metrics if prediction_std is available (UQ, ChemProp, etc.)
|
|
497
|
+
if "prediction_std" in prediction_df.columns:
|
|
498
|
+
metrics = uq_metrics(prediction_df, primary_target)
|
|
437
499
|
self.param_store.upsert(f"/workbench/models/{model.name}/inference/{capture_name}", metrics)
|
|
438
500
|
|
|
439
501
|
# Return the prediction DataFrame
|
|
440
502
|
return prediction_df
|
|
441
503
|
|
|
442
|
-
def cross_fold_inference(self
|
|
443
|
-
"""
|
|
444
|
-
|
|
445
|
-
Args:
|
|
446
|
-
nfolds (int): Number of folds to use for cross-fold (default: 5)
|
|
504
|
+
def cross_fold_inference(self) -> pd.DataFrame:
|
|
505
|
+
"""Pull cross-fold inference training results for this Endpoint's model
|
|
447
506
|
|
|
448
507
|
Returns:
|
|
449
508
|
pd.DataFrame: A DataFrame with cross fold predictions
|
|
@@ -455,8 +514,8 @@ class EndpointCore(Artifact):
|
|
|
455
514
|
# Compute CrossFold (Metrics and Prediction Dataframe)
|
|
456
515
|
# For PyTorch and ChemProp, pull pre-computed CV results from training
|
|
457
516
|
if model.model_framework in [ModelFramework.UNKNOWN, ModelFramework.XGBOOST]:
|
|
458
|
-
cross_fold_metrics, out_of_fold_df =
|
|
459
|
-
elif model.model_framework == ModelFramework.
|
|
517
|
+
cross_fold_metrics, out_of_fold_df = xgboost_pull_cv(model)
|
|
518
|
+
elif model.model_framework == ModelFramework.PYTORCH:
|
|
460
519
|
cross_fold_metrics, out_of_fold_df = pytorch_pull_cv(model)
|
|
461
520
|
elif model.model_framework == ModelFramework.CHEMPROP:
|
|
462
521
|
cross_fold_metrics, out_of_fold_df = chemprop_pull_cv(model)
|
|
@@ -476,58 +535,74 @@ class EndpointCore(Artifact):
|
|
|
476
535
|
return out_of_fold_df
|
|
477
536
|
|
|
478
537
|
# Capture the results
|
|
479
|
-
|
|
480
|
-
description = capture_name.replace("_", " ").title()
|
|
481
|
-
target_column = model.target()
|
|
538
|
+
targets = model.target() # Note: We have multi-target models (so this could be a list)
|
|
482
539
|
model_type = model.model_type
|
|
483
540
|
|
|
484
541
|
# Get the id_column from the model's FeatureSet
|
|
485
542
|
fs = FeatureSetCore(model.get_input())
|
|
486
543
|
id_column = fs.id_column
|
|
487
544
|
|
|
488
|
-
#
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
self.log.important("UQ Regressor detected, running full inference to get uncertainty estimates...")
|
|
545
|
+
# Normalize targets to a list for iteration
|
|
546
|
+
target_list = targets if isinstance(targets, list) else [targets]
|
|
547
|
+
primary_target = target_list[0]
|
|
492
548
|
|
|
493
|
-
|
|
494
|
-
|
|
549
|
+
# Collect UQ columns (q_*, confidence) for additional tracking
|
|
550
|
+
additional_columns = [col for col in out_of_fold_df.columns if col.startswith("q_") or col == "confidence"]
|
|
551
|
+
if additional_columns:
|
|
552
|
+
self.log.info(f"UQ columns from training: {', '.join(additional_columns)}")
|
|
495
553
|
|
|
496
|
-
|
|
497
|
-
|
|
554
|
+
# Capture uncertainty metrics if prediction_std is available (UQ, ChemProp, etc.)
|
|
555
|
+
if "prediction_std" in out_of_fold_df.columns:
|
|
556
|
+
metrics = uq_metrics(out_of_fold_df, primary_target)
|
|
557
|
+
self.param_store.upsert(f"/workbench/models/{model.name}/inference/full_cross_fold", metrics)
|
|
498
558
|
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
]
|
|
559
|
+
# For single-target models (99% of cases), just save as "full_cross_fold"
|
|
560
|
+
# For multi-target models, save each as cv_{target} plus primary as "full_cross_fold"
|
|
561
|
+
is_multi_target = len(target_list) > 1
|
|
503
562
|
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
uq_df = uq_df[[id_column] + uq_columns]
|
|
563
|
+
for target in target_list:
|
|
564
|
+
# Drop rows with NaN target values for metrics/plots
|
|
565
|
+
target_df = out_of_fold_df.dropna(subset=[target])
|
|
508
566
|
|
|
509
|
-
|
|
510
|
-
|
|
567
|
+
# For multi-target models, prediction column is {target}_pred, otherwise "prediction"
|
|
568
|
+
pred_col = f"{target}_pred" if is_multi_target else "prediction"
|
|
511
569
|
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
570
|
+
# Compute per-target metrics
|
|
571
|
+
if model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
|
|
572
|
+
target_metrics = self.regression_metrics(target, target_df, prediction_col=pred_col)
|
|
573
|
+
elif model_type == ModelType.CLASSIFIER:
|
|
574
|
+
target_metrics = self.classification_metrics(target, target_df, prediction_col=pred_col)
|
|
575
|
+
else:
|
|
576
|
+
target_metrics = pd.DataFrame()
|
|
577
|
+
|
|
578
|
+
if is_multi_target:
|
|
579
|
+
# Multi-target: save as cv_{target}
|
|
580
|
+
capture_name = f"cv_{target}"
|
|
581
|
+
description = capture_name.replace("_", " ").title()
|
|
582
|
+
self._capture_inference_results(
|
|
583
|
+
capture_name,
|
|
584
|
+
target_df,
|
|
585
|
+
target,
|
|
586
|
+
model_type,
|
|
587
|
+
target_metrics,
|
|
588
|
+
description,
|
|
589
|
+
features=additional_columns,
|
|
590
|
+
id_column=id_column,
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
# Save primary target (or single target) as "full_cross_fold"
|
|
594
|
+
if target == primary_target:
|
|
595
|
+
self._capture_inference_results(
|
|
596
|
+
"full_cross_fold",
|
|
597
|
+
target_df,
|
|
598
|
+
target,
|
|
599
|
+
model_type,
|
|
600
|
+
target_metrics,
|
|
601
|
+
"Full Cross Fold",
|
|
602
|
+
features=additional_columns,
|
|
603
|
+
id_column=id_column,
|
|
604
|
+
)
|
|
520
605
|
|
|
521
|
-
self._capture_inference_results(
|
|
522
|
-
capture_name,
|
|
523
|
-
out_of_fold_df,
|
|
524
|
-
target_column,
|
|
525
|
-
model_type,
|
|
526
|
-
cross_fold_metrics,
|
|
527
|
-
description,
|
|
528
|
-
features=additional_columns,
|
|
529
|
-
id_column=id_column,
|
|
530
|
-
)
|
|
531
606
|
return out_of_fold_df
|
|
532
607
|
|
|
533
608
|
def fast_inference(self, eval_df: pd.DataFrame, threads: int = 4) -> pd.DataFrame:
|
|
@@ -737,19 +812,19 @@ class EndpointCore(Artifact):
|
|
|
737
812
|
self,
|
|
738
813
|
capture_name: str,
|
|
739
814
|
pred_results_df: pd.DataFrame,
|
|
740
|
-
|
|
815
|
+
target: str,
|
|
741
816
|
model_type: ModelType,
|
|
742
817
|
metrics: pd.DataFrame,
|
|
743
818
|
description: str,
|
|
744
819
|
features: list,
|
|
745
820
|
id_column: str = None,
|
|
746
821
|
):
|
|
747
|
-
"""Internal: Capture the inference results and metrics to S3
|
|
822
|
+
"""Internal: Capture the inference results and metrics to S3 for a single target
|
|
748
823
|
|
|
749
824
|
Args:
|
|
750
825
|
capture_name (str): Name of the inference capture
|
|
751
826
|
pred_results_df (pd.DataFrame): DataFrame with the prediction results
|
|
752
|
-
|
|
827
|
+
target (str): Target column name
|
|
753
828
|
model_type (ModelType): Type of the model (e.g. REGRESSOR, CLASSIFIER)
|
|
754
829
|
metrics (pd.DataFrame): DataFrame with the performance metrics
|
|
755
830
|
description (str): Description of the inference results
|
|
@@ -780,26 +855,12 @@ class EndpointCore(Artifact):
|
|
|
780
855
|
self.log.info(f"Writing metrics to {inference_capture_path}/inference_metrics.csv")
|
|
781
856
|
wr.s3.to_csv(metrics, f"{inference_capture_path}/inference_metrics.csv", index=False)
|
|
782
857
|
|
|
783
|
-
#
|
|
784
|
-
|
|
785
|
-
if id_column and id_column in pred_results_df.columns:
|
|
786
|
-
output_columns.append(id_column)
|
|
787
|
-
if target_column in pred_results_df.columns:
|
|
788
|
-
output_columns.append(target_column)
|
|
789
|
-
|
|
790
|
-
# Grab the prediction column, any _proba columns, and UQ columns
|
|
791
|
-
output_columns += [col for col in pred_results_df.columns if "prediction" in col]
|
|
792
|
-
output_columns += [col for col in pred_results_df.columns if col.endswith("_proba")]
|
|
793
|
-
output_columns += [col for col in pred_results_df.columns if col.startswith("q_") or col == "confidence"]
|
|
794
|
-
|
|
795
|
-
# Write the predictions to our S3 Model Inference Folder
|
|
796
|
-
self.log.info(f"Writing predictions to {inference_capture_path}/inference_predictions.csv")
|
|
797
|
-
subset_df = pred_results_df[output_columns]
|
|
798
|
-
wr.s3.to_csv(subset_df, f"{inference_capture_path}/inference_predictions.csv", index=False)
|
|
858
|
+
# Save the inference predictions for this target
|
|
859
|
+
self._save_target_inference(inference_capture_path, pred_results_df, target, id_column)
|
|
799
860
|
|
|
800
861
|
# CLASSIFIER: Write the confusion matrix to our S3 Model Inference Folder
|
|
801
862
|
if model_type == ModelType.CLASSIFIER:
|
|
802
|
-
conf_mtx = self.generate_confusion_matrix(
|
|
863
|
+
conf_mtx = self.generate_confusion_matrix(target, pred_results_df)
|
|
803
864
|
self.log.info(f"Writing confusion matrix to {inference_capture_path}/inference_cm.csv")
|
|
804
865
|
# Note: Unlike other dataframes here, we want to write the index (labels) to the CSV
|
|
805
866
|
wr.s3.to_csv(conf_mtx, f"{inference_capture_path}/inference_cm.csv", index=True)
|
|
@@ -809,60 +870,54 @@ class EndpointCore(Artifact):
|
|
|
809
870
|
model = ModelCore(self.model_name)
|
|
810
871
|
model._load_inference_metrics(capture_name)
|
|
811
872
|
|
|
812
|
-
def
|
|
873
|
+
def _save_target_inference(
|
|
874
|
+
self,
|
|
875
|
+
inference_capture_path: str,
|
|
876
|
+
pred_results_df: pd.DataFrame,
|
|
877
|
+
target: str,
|
|
878
|
+
id_column: str = None,
|
|
879
|
+
):
|
|
880
|
+
"""Save inference results for a single target.
|
|
881
|
+
|
|
882
|
+
Args:
|
|
883
|
+
inference_capture_path (str): S3 path for inference capture
|
|
884
|
+
pred_results_df (pd.DataFrame): DataFrame with prediction results
|
|
885
|
+
target (str): Target column name
|
|
886
|
+
id_column (str, optional): Name of the ID column
|
|
887
|
+
"""
|
|
888
|
+
cols = pred_results_df.columns
|
|
889
|
+
|
|
890
|
+
# Build output columns: id, target, prediction, prediction_std, UQ columns, proba columns
|
|
891
|
+
output_columns = []
|
|
892
|
+
if id_column and id_column in cols:
|
|
893
|
+
output_columns.append(id_column)
|
|
894
|
+
if target and target in cols:
|
|
895
|
+
output_columns.append(target)
|
|
896
|
+
|
|
897
|
+
output_columns += [c for c in ["prediction", "prediction_std"] if c in cols]
|
|
898
|
+
|
|
899
|
+
# Add UQ columns (q_*, confidence) and proba columns
|
|
900
|
+
output_columns += [c for c in cols if c.startswith("q_") or c == "confidence" or c.endswith("_proba")]
|
|
901
|
+
|
|
902
|
+
# Write the predictions to S3
|
|
903
|
+
output_file = f"{inference_capture_path}/inference_predictions.csv"
|
|
904
|
+
self.log.info(f"Writing predictions to {output_file}")
|
|
905
|
+
wr.s3.to_csv(pred_results_df[output_columns], output_file, index=False)
|
|
906
|
+
|
|
907
|
+
def regression_metrics(
|
|
908
|
+
self, target_column: str, prediction_df: pd.DataFrame, prediction_col: str = "prediction"
|
|
909
|
+
) -> pd.DataFrame:
|
|
813
910
|
"""Compute the performance metrics for this Endpoint
|
|
911
|
+
|
|
814
912
|
Args:
|
|
815
913
|
target_column (str): Name of the target column
|
|
816
914
|
prediction_df (pd.DataFrame): DataFrame with the prediction results
|
|
915
|
+
prediction_col (str): Name of the prediction column (default: "prediction")
|
|
916
|
+
|
|
817
917
|
Returns:
|
|
818
918
|
pd.DataFrame: DataFrame with the performance metrics
|
|
819
919
|
"""
|
|
820
|
-
|
|
821
|
-
# Sanity Check the prediction DataFrame
|
|
822
|
-
if prediction_df.empty:
|
|
823
|
-
self.log.warning("No predictions were made. Returning empty DataFrame.")
|
|
824
|
-
return pd.DataFrame()
|
|
825
|
-
|
|
826
|
-
# Check for NaN values in target or prediction columns
|
|
827
|
-
prediction_col = "prediction" if "prediction" in prediction_df.columns else "predictions"
|
|
828
|
-
if prediction_df[target_column].isnull().any() or prediction_df[prediction_col].isnull().any():
|
|
829
|
-
# Compute the number of NaN values in each column
|
|
830
|
-
num_nan_target = prediction_df[target_column].isnull().sum()
|
|
831
|
-
num_nan_prediction = prediction_df[prediction_col].isnull().sum()
|
|
832
|
-
self.log.warning(
|
|
833
|
-
f"NaNs Found: {target_column} {num_nan_target} and {prediction_col}: {num_nan_prediction}."
|
|
834
|
-
)
|
|
835
|
-
self.log.warning(
|
|
836
|
-
"NaN values found in target or prediction columns. Dropping NaN rows for metric computation."
|
|
837
|
-
)
|
|
838
|
-
prediction_df = prediction_df.dropna(subset=[target_column, prediction_col])
|
|
839
|
-
|
|
840
|
-
# Compute the metrics
|
|
841
|
-
try:
|
|
842
|
-
y_true = prediction_df[target_column]
|
|
843
|
-
y_pred = prediction_df[prediction_col]
|
|
844
|
-
|
|
845
|
-
mae = mean_absolute_error(y_true, y_pred)
|
|
846
|
-
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
|
|
847
|
-
r2 = r2_score(y_true, y_pred)
|
|
848
|
-
# Mean Absolute Percentage Error
|
|
849
|
-
mape = np.mean(np.where(y_true != 0, np.abs((y_true - y_pred) / y_true), np.abs(y_true - y_pred))) * 100
|
|
850
|
-
# Median Absolute Error
|
|
851
|
-
medae = median_absolute_error(y_true, y_pred)
|
|
852
|
-
|
|
853
|
-
# Organize and return the metrics
|
|
854
|
-
metrics = {
|
|
855
|
-
"MAE": round(mae, 3),
|
|
856
|
-
"RMSE": round(rmse, 3),
|
|
857
|
-
"R2": round(r2, 3),
|
|
858
|
-
"MAPE": round(mape, 3),
|
|
859
|
-
"MedAE": round(medae, 3),
|
|
860
|
-
"NumRows": len(prediction_df),
|
|
861
|
-
}
|
|
862
|
-
return pd.DataFrame.from_records([metrics])
|
|
863
|
-
except Exception as e:
|
|
864
|
-
self.log.warning(f"Error computing regression metrics: {str(e)}")
|
|
865
|
-
return pd.DataFrame()
|
|
920
|
+
return compute_regression_metrics(prediction_df, target_column, prediction_col)
|
|
866
921
|
|
|
867
922
|
def residuals(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
|
|
868
923
|
"""Add the residuals to the prediction DataFrame
|
|
@@ -872,11 +927,13 @@ class EndpointCore(Artifact):
|
|
|
872
927
|
Returns:
|
|
873
928
|
pd.DataFrame: DataFrame with two new columns called 'residuals' and 'residuals_abs'
|
|
874
929
|
"""
|
|
930
|
+
# Check for prediction column
|
|
931
|
+
if "prediction" not in prediction_df.columns:
|
|
932
|
+
self.log.warning("No 'prediction' column found. Cannot compute residuals.")
|
|
933
|
+
return prediction_df
|
|
875
934
|
|
|
876
|
-
# Compute the residuals
|
|
877
935
|
y_true = prediction_df[target_column]
|
|
878
|
-
|
|
879
|
-
y_pred = prediction_df[prediction_col]
|
|
936
|
+
y_pred = prediction_df["prediction"]
|
|
880
937
|
|
|
881
938
|
# Check for classification scenario
|
|
882
939
|
if not pd.api.types.is_numeric_dtype(y_true) or not pd.api.types.is_numeric_dtype(y_pred):
|
|
@@ -890,92 +947,22 @@ class EndpointCore(Artifact):
|
|
|
890
947
|
|
|
891
948
|
return prediction_df
|
|
892
949
|
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
Args:
|
|
898
|
-
prediction_df (pd.DataFrame): DataFrame with the prediction results
|
|
899
|
-
class_labels (list): List of class labels
|
|
900
|
-
guessing (bool, optional): Whether we're guessing the class labels. Defaults to False.
|
|
901
|
-
"""
|
|
902
|
-
proba_columns = [col.replace("_proba", "") for col in prediction_df.columns if col.endswith("_proba")]
|
|
903
|
-
|
|
904
|
-
if sorted(class_labels) != sorted(proba_columns):
|
|
905
|
-
if guessing:
|
|
906
|
-
raise ValueError(f"_proba columns {proba_columns} != GUESSED class_labels {class_labels}!")
|
|
907
|
-
else:
|
|
908
|
-
raise ValueError(f"_proba columns {proba_columns} != class_labels {class_labels}!")
|
|
909
|
-
|
|
910
|
-
def classification_metrics(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
|
|
950
|
+
def classification_metrics(
|
|
951
|
+
self, target_column: str, prediction_df: pd.DataFrame, prediction_col: str = "prediction"
|
|
952
|
+
) -> pd.DataFrame:
|
|
911
953
|
"""Compute the performance metrics for this Endpoint
|
|
912
954
|
|
|
913
955
|
Args:
|
|
914
956
|
target_column (str): Name of the target column
|
|
915
957
|
prediction_df (pd.DataFrame): DataFrame with the prediction results
|
|
958
|
+
prediction_col (str): Name of the prediction column (default: "prediction")
|
|
916
959
|
|
|
917
960
|
Returns:
|
|
918
961
|
pd.DataFrame: DataFrame with the performance metrics
|
|
919
962
|
"""
|
|
920
|
-
#
|
|
921
|
-
prediction_col = "prediction" if "prediction" in prediction_df.columns else "predictions"
|
|
922
|
-
nan_mask = prediction_df[prediction_col].isna()
|
|
923
|
-
if nan_mask.any():
|
|
924
|
-
n_nan = nan_mask.sum()
|
|
925
|
-
self.log.warning(f"Dropping {n_nan} rows with NaN predictions for metrics calculation")
|
|
926
|
-
prediction_df = prediction_df[~nan_mask].copy()
|
|
927
|
-
|
|
928
|
-
# Get the class labels from the model
|
|
963
|
+
# Get class labels from the model (metrics_utils will infer if None)
|
|
929
964
|
class_labels = ModelCore(self.model_name).class_labels()
|
|
930
|
-
|
|
931
|
-
self.log.warning(
|
|
932
|
-
"Class labels not found in the model. Guessing class labels from the prediction DataFrame."
|
|
933
|
-
)
|
|
934
|
-
class_labels = prediction_df[target_column].unique().tolist()
|
|
935
|
-
self.validate_proba_columns(prediction_df, class_labels, guessing=True)
|
|
936
|
-
else:
|
|
937
|
-
self.validate_proba_columns(prediction_df, class_labels)
|
|
938
|
-
|
|
939
|
-
# Calculate precision, recall, f1, and support, handling zero division
|
|
940
|
-
scores = precision_recall_fscore_support(
|
|
941
|
-
prediction_df[target_column],
|
|
942
|
-
prediction_df[prediction_col],
|
|
943
|
-
average=None,
|
|
944
|
-
labels=class_labels,
|
|
945
|
-
zero_division=0,
|
|
946
|
-
)
|
|
947
|
-
|
|
948
|
-
# Identify the probability columns and keep them as a Pandas DataFrame
|
|
949
|
-
proba_columns = [f"{label}_proba" for label in class_labels]
|
|
950
|
-
y_score = prediction_df[proba_columns]
|
|
951
|
-
|
|
952
|
-
# One-hot encode the true labels using all class labels (fit with class_labels)
|
|
953
|
-
encoder = OneHotEncoder(categories=[class_labels], sparse_output=False)
|
|
954
|
-
y_true = encoder.fit_transform(prediction_df[[target_column]])
|
|
955
|
-
|
|
956
|
-
# Calculate ROC AUC per label and handle exceptions for missing classes
|
|
957
|
-
roc_auc_per_label = []
|
|
958
|
-
for i, label in enumerate(class_labels):
|
|
959
|
-
try:
|
|
960
|
-
roc_auc = roc_auc_score(y_true[:, i], y_score.iloc[:, i])
|
|
961
|
-
except ValueError as e:
|
|
962
|
-
self.log.warning(f"ROC AUC calculation failed for label {label}.")
|
|
963
|
-
self.log.warning(f"{str(e)}")
|
|
964
|
-
roc_auc = 0.0
|
|
965
|
-
roc_auc_per_label.append(roc_auc)
|
|
966
|
-
|
|
967
|
-
# Put the scores into a DataFrame
|
|
968
|
-
score_df = pd.DataFrame(
|
|
969
|
-
{
|
|
970
|
-
target_column: class_labels,
|
|
971
|
-
"precision": scores[0],
|
|
972
|
-
"recall": scores[1],
|
|
973
|
-
"f1": scores[2],
|
|
974
|
-
"roc_auc": roc_auc_per_label,
|
|
975
|
-
"support": scores[3],
|
|
976
|
-
}
|
|
977
|
-
)
|
|
978
|
-
return score_df
|
|
965
|
+
return compute_classification_metrics(prediction_df, target_column, class_labels, prediction_col)
|
|
979
966
|
|
|
980
967
|
def generate_confusion_matrix(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
|
|
981
968
|
"""Compute the confusion matrix for this Endpoint
|
|
@@ -987,16 +974,20 @@ class EndpointCore(Artifact):
|
|
|
987
974
|
Returns:
|
|
988
975
|
pd.DataFrame: DataFrame with the confusion matrix
|
|
989
976
|
"""
|
|
977
|
+
# Check for prediction column
|
|
978
|
+
if "prediction" not in prediction_df.columns:
|
|
979
|
+
self.log.warning("No 'prediction' column found in DataFrame")
|
|
980
|
+
return pd.DataFrame()
|
|
981
|
+
|
|
990
982
|
# Drop rows with NaN predictions (can't include in confusion matrix)
|
|
991
|
-
|
|
992
|
-
nan_mask = prediction_df[prediction_col].isna()
|
|
983
|
+
nan_mask = prediction_df["prediction"].isna()
|
|
993
984
|
if nan_mask.any():
|
|
994
985
|
n_nan = nan_mask.sum()
|
|
995
986
|
self.log.warning(f"Dropping {n_nan} rows with NaN predictions for confusion matrix")
|
|
996
987
|
prediction_df = prediction_df[~nan_mask].copy()
|
|
997
988
|
|
|
998
989
|
y_true = prediction_df[target_column]
|
|
999
|
-
y_pred = prediction_df[
|
|
990
|
+
y_pred = prediction_df["prediction"]
|
|
1000
991
|
|
|
1001
992
|
# Get model class labels
|
|
1002
993
|
model_class_labels = ModelCore(self.model_name).class_labels()
|