PyPI - workbench - Versions diffs - 0.8.162__py3-none-any.whl → 0.8.220__py3-none-any.whl - Mend

workbench 0.8.162py3-none-any.whl → 0.8.220py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of workbench might be problematic. Click here for more details.

Files changed (147) hide show

workbench/algorithms/dataframe/__init__.py +1 -2
workbench/algorithms/dataframe/compound_dataset_overlap.py +321 -0
workbench/algorithms/dataframe/feature_space_proximity.py +168 -75
workbench/algorithms/dataframe/fingerprint_proximity.py +422 -86
workbench/algorithms/dataframe/projection_2d.py +44 -21
workbench/algorithms/dataframe/proximity.py +259 -305
workbench/algorithms/graph/light/proximity_graph.py +14 -12
workbench/algorithms/models/cleanlab_model.py +382 -0
workbench/algorithms/models/noise_model.py +388 -0
workbench/algorithms/sql/outliers.py +3 -3
workbench/api/__init__.py +5 -1
workbench/api/compound.py +1 -1
workbench/api/df_store.py +17 -108
workbench/api/endpoint.py +18 -5
workbench/api/feature_set.py +121 -15
workbench/api/meta.py +5 -2
workbench/api/meta_model.py +289 -0
workbench/api/model.py +55 -21
workbench/api/monitor.py +1 -16
workbench/api/parameter_store.py +3 -52
workbench/cached/cached_model.py +4 -4
workbench/core/artifacts/__init__.py +11 -2
workbench/core/artifacts/artifact.py +16 -8
workbench/core/artifacts/data_capture_core.py +355 -0
workbench/core/artifacts/df_store_core.py +114 -0
workbench/core/artifacts/endpoint_core.py +382 -253
workbench/core/artifacts/feature_set_core.py +249 -45
workbench/core/artifacts/model_core.py +135 -80
workbench/core/artifacts/monitor_core.py +33 -248
workbench/core/artifacts/parameter_store_core.py +98 -0
workbench/core/cloud_platform/aws/aws_account_clamp.py +50 -1
workbench/core/cloud_platform/aws/aws_meta.py +12 -5
workbench/core/cloud_platform/aws/aws_session.py +4 -4
workbench/core/pipelines/pipeline_executor.py +1 -1
workbench/core/transforms/data_to_features/light/molecular_descriptors.py +4 -4
workbench/core/transforms/features_to_model/features_to_model.py +62 -40
workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +76 -15
workbench/core/transforms/pandas_transforms/pandas_to_features.py +38 -2
workbench/core/views/training_view.py +113 -42
workbench/core/views/view.py +53 -3
workbench/core/views/view_utils.py +4 -4
workbench/model_script_utils/model_script_utils.py +339 -0
workbench/model_script_utils/pytorch_utils.py +405 -0
workbench/model_script_utils/uq_harness.py +278 -0
workbench/model_scripts/chemprop/chemprop.template +649 -0
workbench/model_scripts/chemprop/generated_model_script.py +649 -0
workbench/model_scripts/chemprop/model_script_utils.py +339 -0
workbench/model_scripts/chemprop/requirements.txt +3 -0
workbench/model_scripts/custom_models/chem_info/fingerprints.py +175 -0
workbench/model_scripts/custom_models/chem_info/mol_descriptors.py +483 -0
workbench/model_scripts/custom_models/chem_info/mol_standardize.py +450 -0
workbench/model_scripts/custom_models/chem_info/molecular_descriptors.py +7 -9
workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py +1 -1
workbench/model_scripts/custom_models/proximity/feature_space_proximity.py +194 -0
workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +8 -10
workbench/model_scripts/custom_models/uq_models/bayesian_ridge.template +7 -8
workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +20 -21
workbench/model_scripts/custom_models/uq_models/feature_space_proximity.py +194 -0
workbench/model_scripts/custom_models/uq_models/gaussian_process.template +5 -11
workbench/model_scripts/custom_models/uq_models/ngboost.template +30 -18
workbench/model_scripts/custom_models/uq_models/requirements.txt +1 -3
workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +15 -17
workbench/model_scripts/meta_model/generated_model_script.py +209 -0
workbench/model_scripts/meta_model/meta_model.template +209 -0
workbench/model_scripts/pytorch_model/generated_model_script.py +444 -500
workbench/model_scripts/pytorch_model/model_script_utils.py +339 -0
workbench/model_scripts/pytorch_model/pytorch.template +440 -496
workbench/model_scripts/pytorch_model/pytorch_utils.py +405 -0
workbench/model_scripts/pytorch_model/requirements.txt +1 -1
workbench/model_scripts/pytorch_model/uq_harness.py +278 -0
workbench/model_scripts/scikit_learn/generated_model_script.py +7 -12
workbench/model_scripts/scikit_learn/scikit_learn.template +4 -9
workbench/model_scripts/script_generation.py +20 -11
workbench/model_scripts/uq_models/generated_model_script.py +248 -0
workbench/model_scripts/xgb_model/generated_model_script.py +372 -404
workbench/model_scripts/xgb_model/model_script_utils.py +339 -0
workbench/model_scripts/xgb_model/uq_harness.py +278 -0
workbench/model_scripts/xgb_model/xgb_model.template +369 -401
workbench/repl/workbench_shell.py +28 -19
workbench/resources/open_source_api.key +1 -1
workbench/scripts/endpoint_test.py +162 -0
workbench/scripts/lambda_test.py +73 -0
workbench/scripts/meta_model_sim.py +35 -0
workbench/scripts/ml_pipeline_batch.py +137 -0
workbench/scripts/ml_pipeline_sqs.py +186 -0
workbench/scripts/monitor_cloud_watch.py +20 -100
workbench/scripts/training_test.py +85 -0
workbench/utils/aws_utils.py +4 -3
workbench/utils/chem_utils/__init__.py +0 -0
workbench/utils/chem_utils/fingerprints.py +175 -0
workbench/utils/chem_utils/misc.py +194 -0
workbench/utils/chem_utils/mol_descriptors.py +483 -0
workbench/utils/chem_utils/mol_standardize.py +450 -0
workbench/utils/chem_utils/mol_tagging.py +348 -0
workbench/utils/chem_utils/projections.py +219 -0
workbench/utils/chem_utils/salts.py +256 -0
workbench/utils/chem_utils/sdf.py +292 -0
workbench/utils/chem_utils/toxicity.py +250 -0
workbench/utils/chem_utils/vis.py +253 -0
workbench/utils/chemprop_utils.py +141 -0
workbench/utils/cloudwatch_handler.py +1 -1
workbench/utils/cloudwatch_utils.py +137 -0
workbench/utils/config_manager.py +3 -7
workbench/utils/endpoint_utils.py +5 -7
workbench/utils/license_manager.py +2 -6
workbench/utils/meta_model_simulator.py +499 -0
workbench/utils/metrics_utils.py +256 -0
workbench/utils/model_utils.py +278 -79
workbench/utils/monitor_utils.py +44 -62
workbench/utils/pandas_utils.py +3 -3
workbench/utils/pytorch_utils.py +87 -0
workbench/utils/shap_utils.py +11 -57
workbench/utils/workbench_logging.py +0 -3
workbench/utils/workbench_sqs.py +1 -1
workbench/utils/xgboost_local_crossfold.py +267 -0
workbench/utils/xgboost_model_utils.py +127 -219
workbench/web_interface/components/model_plot.py +14 -2
workbench/web_interface/components/plugin_unit_test.py +5 -2
workbench/web_interface/components/plugins/dashboard_status.py +3 -1
workbench/web_interface/components/plugins/generated_compounds.py +1 -1
workbench/web_interface/components/plugins/model_details.py +38 -74
workbench/web_interface/components/plugins/scatter_plot.py +6 -10
{workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/METADATA +31 -9
{workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/RECORD +128 -96
workbench-0.8.220.dist-info/entry_points.txt +11 -0
{workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/licenses/LICENSE +1 -1
workbench/core/cloud_platform/aws/aws_df_store.py +0 -404
workbench/core/cloud_platform/aws/aws_parameter_store.py +0 -280
workbench/model_scripts/custom_models/chem_info/local_utils.py +0 -769
workbench/model_scripts/custom_models/chem_info/tautomerize.py +0 -83
workbench/model_scripts/custom_models/meta_endpoints/example.py +0 -53
workbench/model_scripts/custom_models/proximity/generated_model_script.py +0 -138
workbench/model_scripts/custom_models/proximity/proximity.py +0 -384
workbench/model_scripts/custom_models/uq_models/generated_model_script.py +0 -393
workbench/model_scripts/custom_models/uq_models/mapie_xgb.template +0 -203
workbench/model_scripts/custom_models/uq_models/meta_uq.template +0 -273
workbench/model_scripts/custom_models/uq_models/proximity.py +0 -384
workbench/model_scripts/ensemble_xgb/generated_model_script.py +0 -279
workbench/model_scripts/quant_regression/quant_regression.template +0 -279
workbench/model_scripts/quant_regression/requirements.txt +0 -1
workbench/utils/chem_utils.py +0 -1556
workbench/utils/execution_environment.py +0 -211
workbench/utils/fast_inference.py +0 -167
workbench/utils/resource_utils.py +0 -39
workbench-0.8.162.dist-info/entry_points.txt +0 -5
{workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/WHEEL +0 -0
{workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/top_level.txt +0 -0

workbench/model_scripts/pytorch_model/generated_model_script.py CHANGED Viewed

@@ -1,576 +1,520 @@
-# Imports for PyTorch Tabular Model
-import os
-import awswrangler as wr
-import numpy as np
-# PyTorch compatibility: pytorch-tabular saves complex objects, not just tensors
-# Use legacy loading behavior for compatibility (recommended by PyTorch docs for this scenario)
-os.environ["TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD"] = "1"
-from pytorch_tabular import TabularModel
-from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig
-from pytorch_tabular.models import CategoryEmbeddingModelConfig
-# Model Performance Scores
-from sklearn.metrics import (
-    mean_absolute_error,
-    r2_score,
-    root_mean_squared_error,
-    precision_recall_fscore_support,
-    confusion_matrix,
-)
-# Classification Encoder
-from sklearn.preprocessing import LabelEncoder
+# PyTorch Tabular Model Template for Workbench
+#
+# This template handles both classification and regression models with:
+# - K-fold cross-validation ensemble training (or single train/val split)
+# - Out-of-fold predictions for validation metrics
+# - Categorical feature embedding via TabularMLP
+# - Compressed feature decompression
+#
+# NOTE: Imports are structured to minimize serverless endpoint startup time.
+# Heavy imports (sklearn, awswrangler) are deferred to training time.
-# Scikit Learn Imports
-from sklearn.model_selection import train_test_split
-from io import StringIO
 import json
-import argparse
-import joblib
 import os
+import joblib
+import numpy as np
 import pandas as pd
-from typing import List, Tuple
+import torch
+from model_script_utils import (
+    convert_categorical_types,
+    decompress_features,
+    expand_proba_column,
+    input_fn,
+    match_features_case_insensitive,
+    output_fn,
+)
+from pytorch_utils import (
+    FeatureScaler,
+    load_model,
+    predict,
+    prepare_data,
+)
+from uq_harness import (
+    compute_confidence,
+    load_uq_models,
+    predict_intervals,
+)
-# Template Parameters
-TEMPLATE_PARAMS = {
-    "model_type": "classifier",
-    "target_column": "solubility_class",
-    "features": ['molwt', 'mollogp', 'molmr', 'heavyatomcount', 'numhacceptors', 'numhdonors', 'numheteroatoms', 'numrotatablebonds', 'numvalenceelectrons', 'numaromaticrings', 'numsaturatedrings', 'numaliphaticrings', 'ringcount', 'tpsa', 'labuteasa', 'balabanj', 'bertzct'],
-    "compressed_features": [],
-    "model_metrics_s3_path": "s3://sandbox-sageworks-artifacts/models/aqsol-pytorch-class/training",
-    "train_all_data": False,
-    "hyperparameters": {'training_config': {'max_epochs': 150}, 'model_config': {'layers': '256-128-64'}}
+# =============================================================================
+# Default Hyperparameters
+# =============================================================================
+DEFAULT_HYPERPARAMETERS = {
+    # Training parameters
+    "n_folds": 5,
+    "max_epochs": 200,
+    "early_stopping_patience": 30,
+    "batch_size": 128,
+    # Model architecture (larger capacity - ensemble provides regularization)
+    "layers": "512-256-128",
+    "learning_rate": 1e-3,
+    "dropout": 0.05,
+    "use_batch_norm": True,
+    # Loss function for regression (L1Loss=MAE, MSELoss=MSE, HuberLoss, SmoothL1Loss)
+    "loss": "L1Loss",
+    # Random seed
+    "seed": 42,
 }
+# Template parameters (filled in by Workbench)
+TEMPLATE_PARAMS = {
+    "model_type": "uq_regressor",
+    "target": "udm_asy_res_efflux_ratio",
+    "features": ['chi2v', 'fr_sulfone', 'chi1v', 'bcut2d_logplow', 'fr_piperzine', 'kappa3', 'smr_vsa1', 'slogp_vsa5', 'fr_ketone_topliss', 'fr_sulfonamd', 'fr_imine', 'fr_benzene', 'fr_ester', 'chi2n', 'labuteasa', 'peoe_vsa2', 'smr_vsa6', 'bcut2d_chglo', 'fr_sh', 'peoe_vsa1', 'fr_allylic_oxid', 'chi4n', 'fr_ar_oh', 'fr_nh0', 'fr_term_acetylene', 'slogp_vsa7', 'slogp_vsa4', 'estate_vsa1', 'vsa_estate4', 'numbridgeheadatoms', 'numheterocycles', 'fr_ketone', 'fr_morpholine', 'fr_guanido', 'estate_vsa2', 'numheteroatoms', 'fr_nitro_arom_nonortho', 'fr_piperdine', 'nocount', 'numspiroatoms', 'fr_aniline', 'fr_thiophene', 'slogp_vsa10', 'fr_amide', 'slogp_vsa2', 'fr_epoxide', 'vsa_estate7', 'fr_ar_coo', 'fr_imidazole', 'fr_nitrile', 'fr_oxazole', 'numsaturatedrings', 'fr_pyridine', 'fr_hoccn', 'fr_ndealkylation1', 'numaliphaticheterocycles', 'fr_phenol', 'maxpartialcharge', 'vsa_estate5', 'peoe_vsa13', 'minpartialcharge', 'qed', 'fr_al_oh', 'slogp_vsa11', 'chi0n', 'fr_bicyclic', 'peoe_vsa12', 'fpdensitymorgan1', 'fr_oxime', 'molwt', 'fr_dihydropyridine', 'smr_vsa5', 'peoe_vsa5', 'fr_nitro', 'hallkieralpha', 'heavyatommolwt', 'fr_alkyl_halide', 'peoe_vsa8', 'fr_nhpyrrole', 'fr_isocyan', 'bcut2d_chghi', 'fr_lactam', 'peoe_vsa11', 'smr_vsa9', 'tpsa', 'chi4v', 'slogp_vsa1', 'phi', 'bcut2d_logphi', 'avgipc', 'estate_vsa11', 'fr_coo', 'bcut2d_mwhi', 'numunspecifiedatomstereocenters', 'vsa_estate10', 'estate_vsa8', 'numvalenceelectrons', 'fr_nh2', 'fr_lactone', 'vsa_estate1', 'estate_vsa4', 'numatomstereocenters', 'vsa_estate8', 'fr_para_hydroxylation', 'peoe_vsa3', 'fr_thiazole', 'peoe_vsa10', 'fr_ndealkylation2', 'slogp_vsa12', 'peoe_vsa9', 'maxestateindex', 'fr_quatn', 'smr_vsa7', 'minestateindex', 'numaromaticheterocycles', 'numrotatablebonds', 'fr_ar_nh', 'fr_ether', 'exactmolwt', 'fr_phenol_noorthohbond', 'slogp_vsa3', 'fr_ar_n', 'sps', 'fr_c_o_nocoo', 'bertzct', 'peoe_vsa7', 'slogp_vsa8', 'numradicalelectrons', 'molmr', 'fr_tetrazole', 'numsaturatedcarbocycles', 'bcut2d_mrhi', 'kappa1', 'numamidebonds', 'fpdensitymorgan2', 'smr_vsa8', 'chi1n', 'estate_vsa6', 'fr_barbitur', 'fr_diazo', 'kappa2', 'chi0', 'bcut2d_mrlow', 'balabanj', 'peoe_vsa4', 'numhacceptors', 'fr_sulfide', 'chi3n', 'smr_vsa2', 'fr_al_oh_notert', 'fr_benzodiazepine', 'fr_phos_ester', 'fr_aldehyde', 'fr_coo2', 'estate_vsa5', 'fr_prisulfonamd', 'numaromaticcarbocycles', 'fr_unbrch_alkane', 'fr_urea', 'fr_nitroso', 'smr_vsa10', 'fr_c_s', 'smr_vsa3', 'fr_methoxy', 'maxabspartialcharge', 'slogp_vsa9', 'heavyatomcount', 'fr_azide', 'chi3v', 'smr_vsa4', 'mollogp', 'chi0v', 'fr_aryl_methyl', 'fr_nh1', 'fpdensitymorgan3', 'fr_furan', 'fr_hdrzine', 'fr_arn', 'numaromaticrings', 'vsa_estate3', 'fr_azo', 'fr_halogen', 'estate_vsa9', 'fr_hdrzone', 'numhdonors', 'fr_alkyl_carbamate', 'fr_isothiocyan', 'minabspartialcharge', 'fr_al_coo', 'ringcount', 'chi1', 'estate_vsa7', 'fr_nitro_arom', 'vsa_estate9', 'minabsestateindex', 'maxabsestateindex', 'vsa_estate6', 'estate_vsa10', 'estate_vsa3', 'fr_n_o', 'fr_amidine', 'fr_thiocyan', 'fr_phos_acid', 'fr_c_o', 'fr_imide', 'numaliphaticrings', 'peoe_vsa6', 'vsa_estate2', 'nhohcount', 'numsaturatedheterocycles', 'slogp_vsa6', 'peoe_vsa14', 'fractioncsp3', 'bcut2d_mwlow', 'numaliphaticcarbocycles', 'fr_priamide', 'nacid', 'nbase', 'naromatom', 'narombond', 'sz', 'sm', 'sv', 'sse', 'spe', 'sare', 'sp', 'si', 'mz', 'mm', 'mv', 'mse', 'mpe', 'mare', 'mp', 'mi', 'xch_3d', 'xch_4d', 'xch_5d', 'xch_6d', 'xch_7d', 'xch_3dv', 'xch_4dv', 'xch_5dv', 'xch_6dv', 'xch_7dv', 'xc_3d', 'xc_4d', 'xc_5d', 'xc_6d', 'xc_3dv', 'xc_4dv', 'xc_5dv', 'xc_6dv', 'xpc_4d', 'xpc_5d', 'xpc_6d', 'xpc_4dv', 'xpc_5dv', 'xpc_6dv', 'xp_0d', 'xp_1d', 'xp_2d', 'xp_3d', 'xp_4d', 'xp_5d', 'xp_6d', 'xp_7d', 'axp_0d', 'axp_1d', 'axp_2d', 'axp_3d', 'axp_4d', 'axp_5d', 'axp_6d', 'axp_7d', 'xp_0dv', 'xp_1dv', 'xp_2dv', 'xp_3dv', 'xp_4dv', 'xp_5dv', 'xp_6dv', 'xp_7dv', 'axp_0dv', 'axp_1dv', 'axp_2dv', 'axp_3dv', 'axp_4dv', 'axp_5dv', 'axp_6dv', 'axp_7dv', 'c1sp1', 'c2sp1', 'c1sp2', 'c2sp2', 'c3sp2', 'c1sp3', 'c2sp3', 'c3sp3', 'c4sp3', 'hybratio', 'fcsp3', 'num_stereocenters', 'num_unspecified_stereocenters', 'num_defined_stereocenters', 'num_r_centers', 'num_s_centers', 'num_stereobonds', 'num_e_bonds', 'num_z_bonds', 'stereo_complexity', 'frac_defined_stereo'],
+    "id_column": "udm_mol_bat_id",
+    "compressed_features": ['fingerprint'],
+    "model_metrics_s3_path": "s3://ideaya-sageworks-bucket/models/caco2-er-reg-temporal/training",
+    "hyperparameters": {'n_folds': 1},
+}
-# Function to check if dataframe is empty
-def check_dataframe(df: pd.DataFrame, df_name: str) -> None:
-    """
-    Check if the provided dataframe is empty and raise an exception if it is.
-    Args:
-        df (pd.DataFrame): DataFrame to check
-        df_name (str): Name of the DataFrame
-    """
-    if df.empty:
-        msg = f"*** The training data {df_name} has 0 rows! ***STOPPING***"
-        print(msg)
-        raise ValueError(msg)
-def expand_proba_column(df: pd.DataFrame, class_labels: List[str]) -> pd.DataFrame:
-    """
-    Expands a column in a DataFrame containing a list of probabilities into separate columns.
-    Args:
-        df (pd.DataFrame): DataFrame containing a "pred_proba" column
-        class_labels (List[str]): List of class labels
-    Returns:
-        pd.DataFrame: DataFrame with the "pred_proba" expanded into separate columns
-    """
-    # Sanity check
-    proba_column = "pred_proba"
-    if proba_column not in df.columns:
-        raise ValueError('DataFrame does not contain a "pred_proba" column')
-    # Construct new column names with '_proba' suffix
-    proba_splits = [f"{label}_proba" for label in class_labels]
-    # Expand the proba_column into separate columns for each probability
-    proba_df = pd.DataFrame(df[proba_column].tolist(), columns=proba_splits)
-    # Drop any proba columns and reset the index in prep for the concat
-    df = df.drop(columns=[proba_column] + proba_splits, errors="ignore")
-    df = df.reset_index(drop=True)
-    # Concatenate the new columns with the original DataFrame
-    df = pd.concat([df, proba_df], axis=1)
-    print(df)
-    return df
-def match_features_case_insensitive(df: pd.DataFrame, model_features: list) -> pd.DataFrame:
-    """
-    Matches and renames DataFrame columns to match model feature names (case-insensitive).
-    Prioritizes exact matches, then case-insensitive matches.
-    Raises ValueError if any model features cannot be matched.
-    """
-    df_columns_lower = {col.lower(): col for col in df.columns}
-    rename_dict = {}
-    missing = []
-    for feature in model_features:
-        if feature in df.columns:
-            continue  # Exact match
-        elif feature.lower() in df_columns_lower:
-            rename_dict[df_columns_lower[feature.lower()]] = feature
-        else:
-            missing.append(feature)
-    if missing:
-        raise ValueError(f"Features not found: {missing}")
-    return df.rename(columns=rename_dict)
-def convert_categorical_types(df: pd.DataFrame, features: list, category_mappings={}) -> tuple:
-    """
-    Converts appropriate columns to categorical type with consistent mappings.
-    Args:
-        df (pd.DataFrame): The DataFrame to process.
-        features (list): List of feature names to consider for conversion.
-        category_mappings (dict, optional): Existing category mappings. If empty dict, we're in
-                                            training mode. If populated, we're in inference mode.
-    Returns:
-        tuple: (processed DataFrame, category mappings dictionary)
-    """
-    # Training mode
-    if category_mappings == {}:
-        for col in df.select_dtypes(include=["object", "string"]):
-            if col in features and df[col].nunique() < 20:
-                print(f"Training mode: Converting {col} to category")
-                df[col] = df[col].astype("category")
-                category_mappings[col] = df[col].cat.categories.tolist()  # Store category mappings
-    # Inference mode
-    else:
-        for col, categories in category_mappings.items():
-            if col in df.columns:
-                print(f"Inference mode: Applying categorical mapping for {col}")
-                df[col] = pd.Categorical(df[col], categories=categories)  # Apply consistent categorical mapping
-    return df, category_mappings
-def decompress_features(
-    df: pd.DataFrame, features: List[str], compressed_features: List[str]
-) -> Tuple[pd.DataFrame, List[str]]:
-    """Prepare features for the model
-    Args:
-        df (pd.DataFrame): The features DataFrame
-        features (List[str]): Full list of feature names
-        compressed_features (List[str]): List of feature names to decompress (bitstrings)
-    Returns:
-        pd.DataFrame: DataFrame with the decompressed features
-        List[str]: Updated list of feature names after decompression
-    Raises:
-        ValueError: If any missing values are found in the specified features
-    """
-    # Check for any missing values in the required features
-    missing_counts = df[features].isna().sum()
-    if missing_counts.any():
-        missing_features = missing_counts[missing_counts > 0]
-        print(
-            f"WARNING: Found missing values in features: {missing_features.to_dict()}. "
-            "WARNING: You might want to remove/replace all NaN values before processing."
-        )
-    # Decompress the specified compressed features
-    decompressed_features = features
-    for feature in compressed_features:
-        if (feature not in df.columns) or (feature not in features):
-            print(f"Feature '{feature}' not in the features list, skipping decompression.")
-            continue
-        # Remove the feature from the list of features to avoid duplication
-        decompressed_features.remove(feature)
-        # Handle all compressed features as bitstrings
-        bit_matrix = np.array([list(bitstring) for bitstring in df[feature]], dtype=np.uint8)
-        prefix = feature[:3]
-        # Create all new columns at once - avoids fragmentation
-        new_col_names = [f"{prefix}_{i}" for i in range(bit_matrix.shape[1])]
-        new_df = pd.DataFrame(bit_matrix, columns=new_col_names, index=df.index)
-        # Add to features list
-        decompressed_features.extend(new_col_names)
-        # Drop original column and concatenate new ones
-        df = df.drop(columns=[feature])
-        df = pd.concat([df, new_df], axis=1)
-    return df, decompressed_features
-def model_fn(model_dir):
-    # Save current working directory
-    original_cwd = os.getcwd()
-    try:
-        # Change to /tmp because Pytorch Tabular needs write access (creates a .pt_tmp directory)
-        os.chdir('/tmp')
-        # Load the model
-        model_path = os.path.join(model_dir, "tabular_model")
-        model = TabularModel.load_model(model_path)
-    # Restore the original working directory
-    finally:
-        os.chdir(original_cwd)
-    return model
-def input_fn(input_data, content_type):
-    """Parse input data and return a DataFrame."""
-    if not input_data:
-        raise ValueError("Empty input data is not supported!")
-    # Decode bytes to string if necessary
-    if isinstance(input_data, bytes):
-        input_data = input_data.decode("utf-8")
-    if "text/csv" in content_type:
-        return pd.read_csv(StringIO(input_data))
-    elif "application/json" in content_type:
-        return pd.DataFrame(json.loads(input_data))  # Assumes JSON array of records
+# =============================================================================
+# Model Loading (for SageMaker inference)
+# =============================================================================
+def model_fn(model_dir: str) -> dict:
+    """Load PyTorch TabularMLP ensemble from the specified directory."""
+    # Load ensemble metadata
+    metadata_path = os.path.join(model_dir, "ensemble_metadata.joblib")
+    if os.path.exists(metadata_path):
+        metadata = joblib.load(metadata_path)
+        n_ensemble = metadata["n_ensemble"]
     else:
-        raise ValueError(f"{content_type} not supported!")
+        n_ensemble = 1
+    # Determine device
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    # Load ensemble models
+    ensemble_models = []
+    for i in range(n_ensemble):
+        model_path = os.path.join(model_dir, f"model_{i}")
+        model = load_model(model_path, device=device)
+        ensemble_models.append(model)
+    print(f"Loaded {len(ensemble_models)} model(s)")
+    # Load feature scaler
+    scaler = FeatureScaler.load(os.path.join(model_dir, "scaler.joblib"))
+    # Load UQ models (regression only)
+    uq_models, uq_metadata = None, None
+    uq_path = os.path.join(model_dir, "uq_metadata.json")
+    if os.path.exists(uq_path):
+        uq_models, uq_metadata = load_uq_models(model_dir)
+    return {
+        "ensemble_models": ensemble_models,
+        "n_ensemble": n_ensemble,
+        "scaler": scaler,
+        "uq_models": uq_models,
+        "uq_metadata": uq_metadata,
+    }
-def output_fn(output_df, accept_type):
-    """Supports both CSV and JSON output formats."""
-    if "text/csv" in accept_type:
-        csv_output = output_df.fillna("N/A").to_csv(index=False)  # CSV with N/A for missing values
-        return csv_output, "text/csv"
-    elif "application/json" in accept_type:
-        return output_df.to_json(orient="records"), "application/json"  # JSON array of records (NaNs -> null)
-    else:
-        raise RuntimeError(f"{accept_type} accept type is not supported by this script.")
+# =============================================================================
+# Inference (for SageMaker inference)
+# =============================================================================
+def predict_fn(df: pd.DataFrame, model_dict: dict) -> pd.DataFrame:
+    """Make predictions with PyTorch TabularMLP ensemble."""
+    model_type = TEMPLATE_PARAMS["model_type"]
+    compressed_features = TEMPLATE_PARAMS["compressed_features"]
+    model_dir = os.environ.get("SM_MODEL_DIR", "/opt/ml/model")
+    # Load artifacts
+    ensemble_models = model_dict["ensemble_models"]
+    scaler = model_dict["scaler"]
+    uq_models = model_dict.get("uq_models")
+    uq_metadata = model_dict.get("uq_metadata")
-def predict_fn(df, model) -> pd.DataFrame:
-    """Make Predictions with our PyTorch Tabular Model
+    with open(os.path.join(model_dir, "feature_columns.json")) as f:
+        features = json.load(f)
+    with open(os.path.join(model_dir, "category_mappings.json")) as f:
+        category_mappings = json.load(f)
+    with open(os.path.join(model_dir, "feature_metadata.json")) as f:
+        feature_metadata = json.load(f)
-    Args:
-        df (pd.DataFrame): The input DataFrame
-        model: The TabularModel use for predictions
+    continuous_cols = feature_metadata["continuous_cols"]
+    categorical_cols = feature_metadata["categorical_cols"]
-    Returns:
-        pd.DataFrame: The DataFrame with the predictions added
-    """
-    compressed_features = TEMPLATE_PARAMS["compressed_features"]
+    label_encoder = None
+    encoder_path = os.path.join(model_dir, "label_encoder.joblib")
+    if os.path.exists(encoder_path):
+        label_encoder = joblib.load(encoder_path)
-    # Grab our feature columns (from training)
-    model_dir = os.environ.get("SM_MODEL_DIR", "/opt/ml/model")
-    with open(os.path.join(model_dir, "feature_columns.json")) as fp:
-        features = json.load(fp)
     print(f"Model Features: {features}")
-    # Load the category mappings (from training)
-    with open(os.path.join(model_dir, "category_mappings.json")) as fp:
-        category_mappings = json.load(fp)
-    # Load our Label Encoder if we have one
-    label_encoder = None
-    if os.path.exists(os.path.join(model_dir, "label_encoder.joblib")):
-        label_encoder = joblib.load(os.path.join(model_dir, "label_encoder.joblib"))
-    # We're going match features in a case-insensitive manner, accounting for all the permutations
-    # - Model has a feature list that's any case ("Id", "taCos", "cOunT", "likes_tacos")
-    # - Incoming data has columns that are mixed case ("ID", "Tacos", "Count", "Likes_Tacos")
+    # Prepare features
     matched_df = match_features_case_insensitive(df, features)
-    # Detect categorical types in the incoming DataFrame
     matched_df, _ = convert_categorical_types(matched_df, features, category_mappings)
-    # If we have compressed features, decompress them
     if compressed_features:
         print("Decompressing features for prediction...")
         matched_df, features = decompress_features(matched_df, features, compressed_features)
-    # Make predictions using the TabularModel
-    result = model.predict(matched_df[features])
+    # Track missing features
+    missing_mask = matched_df[features].isna().any(axis=1)
+    if missing_mask.any():
+        print(f"Warning: {missing_mask.sum()} rows have missing features")
-    # pytorch-tabular returns predictions using f"{target}_prediction" column
-    # and classification probabilities in columns ending with "_probability"
-    target = TEMPLATE_PARAMS["target_column"]
-    prediction_column = f"{target}_prediction"
-    if prediction_column in result.columns:
-        predictions = result[prediction_column].values
-    else:
-        raise ValueError(f"Cannot find prediction column in: {result.columns.tolist()}")
+    # Initialize output columns
+    df["prediction"] = np.nan
+    if model_type in ["regressor", "uq_regressor"]:
+        df["prediction_std"] = np.nan
-    # If we have a label encoder, decode the predictions
-    if label_encoder:
-        predictions = label_encoder.inverse_transform(predictions.astype(int))
+    complete_df = matched_df[~missing_mask].copy()
+    if len(complete_df) == 0:
+        print("Warning: No complete rows to predict on")
+        return df
+    # Prepare data for inference (with standardization)
+    x_cont, x_cat, _, _, _ = prepare_data(
+        complete_df, continuous_cols, categorical_cols, category_mappings=category_mappings, scaler=scaler
+    )
-    # Set the predictions on the DataFrame
-    df["prediction"] = predictions
+    # Collect ensemble predictions
+    all_preds = []
+    for model in ensemble_models:
+        preds = predict(model, x_cont, x_cat)
+        all_preds.append(preds)
-    # For classification, get probabilities
-    if label_encoder is not None:
-        prob_cols = [col for col in result.columns if col.endswith("_probability")]
-        if prob_cols:
-            probs = result[prob_cols].values
-            df["pred_proba"] = [p.tolist() for p in probs]
+    # Aggregate predictions
+    ensemble_preds = np.stack(all_preds, axis=0)
+    preds = np.mean(ensemble_preds, axis=0)
+    preds_std = np.std(ensemble_preds, axis=0)
-            # Expand the pred_proba column into separate columns for each class
-            df = expand_proba_column(df, label_encoder.classes_)
+    print(f"Inference complete: {len(preds)} predictions, {len(ensemble_models)} ensemble members")
-    # All done, return the DataFrame with new columns for the predictions
+    if label_encoder is not None:
+        # Classification: average probabilities, then argmax
+        avg_probs = preds  # Already softmax output
+        class_preds = np.argmax(avg_probs, axis=1)
+        predictions = label_encoder.inverse_transform(class_preds)
+        all_proba = pd.Series([None] * len(df), index=df.index, dtype=object)
+        all_proba.loc[~missing_mask] = [p.tolist() for p in avg_probs]
+        df["pred_proba"] = all_proba
+        df = expand_proba_column(df, label_encoder.classes_)
+    else:
+        # Regression
+        predictions = preds.flatten()
+        df.loc[~missing_mask, "prediction_std"] = preds_std.flatten()
+        # Add UQ intervals if available
+        if uq_models and uq_metadata:
+            X_complete = complete_df[features]
+            df_complete = df.loc[~missing_mask].copy()
+            df_complete["prediction"] = predictions  # Set prediction before compute_confidence
+            df_complete = predict_intervals(df_complete, X_complete, uq_models, uq_metadata)
+            df_complete = compute_confidence(df_complete, uq_metadata["median_interval_width"], "q_10", "q_90")
+            # Copy UQ columns back to main dataframe
+            for col in df_complete.columns:
+                if col.startswith("q_") or col == "confidence":
+                    df.loc[~missing_mask, col] = df_complete[col].values
+    df.loc[~missing_mask, "prediction"] = predictions
     return df
+# =============================================================================
+# Training
+# =============================================================================
 if __name__ == "__main__":
-    """The main function is for training the PyTorch Tabular model"""
+    # -------------------------------------------------------------------------
+    # Training-only imports (deferred to reduce serverless startup time)
+    # -------------------------------------------------------------------------
+    import argparse
+    import awswrangler as wr
+    from sklearn.model_selection import KFold, StratifiedKFold, train_test_split
+    from sklearn.preprocessing import LabelEncoder
+    # Enable Tensor Core optimization for GPUs that support it
+    torch.set_float32_matmul_precision("medium")
+    from model_script_utils import (
+        check_dataframe,
+        compute_classification_metrics,
+        compute_regression_metrics,
+        print_classification_metrics,
+        print_confusion_matrix,
+        print_regression_metrics,
+    )
+    from pytorch_utils import (
+        create_model,
+        save_model,
+        train_model,
+    )
+    from uq_harness import (
+        save_uq_models,
+        train_uq_models,
+    )
-    # Harness Template Parameters
-    target = TEMPLATE_PARAMS["target_column"]
+    # -------------------------------------------------------------------------
+    # Setup: Parse arguments and load data
+    # -------------------------------------------------------------------------
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model-dir", type=str, default=os.environ.get("SM_MODEL_DIR", "/opt/ml/model"))
+    parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAIN", "/opt/ml/input/data/train"))
+    parser.add_argument("--output-data-dir", type=str, default=os.environ.get("SM_OUTPUT_DATA_DIR", "/opt/ml/output/data"))
+    args = parser.parse_args()
+    # Extract template parameters
+    target = TEMPLATE_PARAMS["target"]
     features = TEMPLATE_PARAMS["features"]
     orig_features = features.copy()
+    id_column = TEMPLATE_PARAMS["id_column"]
     compressed_features = TEMPLATE_PARAMS["compressed_features"]
     model_type = TEMPLATE_PARAMS["model_type"]
     model_metrics_s3_path = TEMPLATE_PARAMS["model_metrics_s3_path"]
-    train_all_data = TEMPLATE_PARAMS["train_all_data"]
-    hyperparameters = TEMPLATE_PARAMS["hyperparameters"]
-    validation_split = 0.2
-    # Script arguments for input/output directories
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model-dir", type=str, default=os.environ.get("SM_MODEL_DIR", "/opt/ml/model"))
-    parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAIN", "/opt/ml/input/data/train"))
-    parser.add_argument(
-        "--output-data-dir", type=str, default=os.environ.get("SM_OUTPUT_DATA_DIR", "/opt/ml/output/data")
-    )
-    args = parser.parse_args()
+    hyperparameters = {**DEFAULT_HYPERPARAMETERS, **(TEMPLATE_PARAMS["hyperparameters"] or {})}
-    # Read the training data into DataFrames
-    training_files = [
-        os.path.join(args.train, file)
-        for file in os.listdir(args.train)
-        if file.endswith(".csv")
-    ]
+    # Load training data
+    training_files = [os.path.join(args.train, f) for f in os.listdir(args.train) if f.endswith(".csv")]
     print(f"Training Files: {training_files}")
-    # Combine files and read them all into a single pandas dataframe
-    all_df = pd.concat([pd.read_csv(file, engine="python") for file in training_files])
-    # Check if the dataframe is empty
+    all_df = pd.concat([pd.read_csv(f, engine="python") for f in training_files])
     check_dataframe(all_df, "training_df")
-    # Features/Target output
+    # Drop rows with missing features
+    initial_count = len(all_df)
+    all_df = all_df.dropna(subset=features)
+    if len(all_df) < initial_count:
+        print(f"Dropped {initial_count - len(all_df)} rows with missing features")
     print(f"Target: {target}")
-    print(f"Features: {str(features)}")
+    print(f"Features: {features}")
+    print(f"Hyperparameters: {hyperparameters}")
-    # Convert any features that might be categorical to 'category' type
+    # -------------------------------------------------------------------------
+    # Preprocessing
+    # -------------------------------------------------------------------------
     all_df, category_mappings = convert_categorical_types(all_df, features)
-    # If we have compressed features, decompress them
     if compressed_features:
-        print(f"Decompressing features {compressed_features}...")
+        print(f"Decompressing features: {compressed_features}")
         all_df, features = decompress_features(all_df, features, compressed_features)
-    # Do we want to train on all the data?
-    if train_all_data:
-        print("Training on ALL of the data")
-        df_train = all_df.copy()
-        df_val = all_df.copy()
-    # Does the dataframe have a training column?
-    elif "training" in all_df.columns:
-        print("Found training column, splitting data based on training column")
-        df_train = all_df[all_df["training"]]
-        df_val = all_df[~all_df["training"]]
-    else:
-        # Just do a random training Split
-        print("WARNING: No training column found, splitting data with random state=42")
-        df_train, df_val = train_test_split(all_df, test_size=validation_split, random_state=42)
-    print(f"FIT/TRAIN: {df_train.shape}")
-    print(f"VALIDATION: {df_val.shape}")
-    # Determine categorical and continuous columns
-    categorical_cols = [col for col in features if df_train[col].dtype.name == "category"]
-    continuous_cols = [col for col in features if col not in categorical_cols]
-    print(f"Categorical columns: {categorical_cols}")
-    print(f"Continuous columns: {continuous_cols}")
-    # Set up PyTorch Tabular configuration
-    data_config = DataConfig(
-        target=[target],
-        continuous_cols=continuous_cols,
-        categorical_cols=categorical_cols,
-    )
+    # Determine categorical vs continuous columns
+    categorical_cols = [c for c in features if all_df[c].dtype.name == "category"]
+    continuous_cols = [c for c in features if c not in categorical_cols]
+    all_df[continuous_cols] = all_df[continuous_cols].astype("float64")
+    print(f"Categorical: {categorical_cols}")
+    print(f"Continuous: {len(continuous_cols)} columns")
-    # Choose the 'task' based on model type also set up the label encoder if needed
+    # -------------------------------------------------------------------------
+    # Classification setup
+    # -------------------------------------------------------------------------
+    label_encoder = None
+    n_outputs = 1
     if model_type == "classifier":
-        task = "classification"
-        # Encode the target column
         label_encoder = LabelEncoder()
-        df_train[target] = label_encoder.fit_transform(df_train[target])
-        df_val[target] = label_encoder.transform(df_val[target])
+        all_df[target] = label_encoder.fit_transform(all_df[target])
+        n_outputs = len(label_encoder.classes_)
+        print(f"Class labels: {label_encoder.classes_.tolist()}")
+    # -------------------------------------------------------------------------
+    # Cross-validation setup
+    # -------------------------------------------------------------------------
+    n_folds = hyperparameters["n_folds"]
+    task = "classification" if model_type == "classifier" else "regression"
+    hidden_layers = [int(x) for x in hyperparameters["layers"].split("-")]
+    # Get categorical cardinalities
+    categorical_cardinalities = [len(category_mappings.get(col, {})) for col in categorical_cols]
+    if n_folds == 1:
+        if "training" in all_df.columns:
+            print("Using 'training' column for train/val split")
+            train_idx = np.where(all_df["training"])[0]
+            val_idx = np.where(~all_df["training"])[0]
+        else:
+            print("WARNING: No 'training' column found, using random 80/20 split")
+            train_idx, val_idx = train_test_split(np.arange(len(all_df)), test_size=0.2, random_state=42)
+        folds = [(train_idx, val_idx)]
     else:
-        task = "regression"
-        label_encoder = None
+        if model_type == "classifier":
+            kfold = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
+            folds = list(kfold.split(all_df, all_df[target]))
+        else:
+            kfold = KFold(n_splits=n_folds, shuffle=True, random_state=42)
+            folds = list(kfold.split(all_df))
-    # Use any hyperparameters to set up both the trainer and model configurations
-    print(f"Hyperparameters: {hyperparameters}")
+    print(f"Training {'single model' if n_folds == 1 else f'{n_folds}-fold ensemble'}...")
-    # Set up PyTorch Tabular configuration with defaults
-    trainer_defaults = {
-        "auto_lr_find": True,
-        "batch_size": min(1024, max(32, len(df_train) // 4)),
-        "max_epochs": 100,
-        "early_stopping": "valid_loss",
-        "early_stopping_patience": 15,
-        "checkpoints": "valid_loss",
-        "accelerator": "auto",
-        "progress_bar": "none",
-        "gradient_clip_val": 1.0,
-    }
+    # Fit scaler on all training data (used across all folds)
+    scaler = FeatureScaler()
+    scaler.fit(all_df, continuous_cols)
+    print(f"Fitted scaler on {len(continuous_cols)} continuous features")
-    # Override defaults with training_config if present
-    training_overrides = {k: v for k, v in hyperparameters.get('training_config', {}).items()
-                          if k in trainer_defaults}
-    # Print overwrites
-    for key, value in training_overrides.items():
-        print(f"TRAINING CONFIG Override: {key}: {trainer_defaults[key]} → {value}")
-    trainer_params = {**trainer_defaults, **training_overrides}
-    trainer_config = TrainerConfig(**trainer_params)
-    # Model config defaults
-    model_defaults = {
-        "layers": "1024-512-512",
-        "activation": "ReLU",
-        "learning_rate": 1e-3,
-        "dropout": 0.1,
-        "use_batch_norm": True,
-        "initialization": "kaiming",
-    }
-    # Override defaults with model_config if present
-    model_overrides = {k: v for k, v in hyperparameters.get('model_config', {}).items()
-                          if k in model_defaults}
-    # Print overwrites
-    for key, value in model_overrides.items():
-        print(f"MODEL CONFIG Override: {key}: {model_defaults[key]} → {value}")
-    model_params = {**model_defaults, **model_overrides}
-    # Use CategoryEmbedding model configuration for general-purpose tabular modeling.
-    # Works effectively for both regression and classification as the foundational
-    # architecture in PyTorch Tabular
-    model_config = CategoryEmbeddingModelConfig(
-        task=task,
-        **model_params
-    )
-    optimizer_config = OptimizerConfig()
-    #####################################
-    # Create and train the TabularModel #
-    #####################################
-    tabular_model = TabularModel(
-        data_config=data_config,
-        model_config=model_config,
-        optimizer_config=optimizer_config,
-        trainer_config=trainer_config,
-    )
-    tabular_model.fit(train=df_train, validation=df_val)
+    # Determine device
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    print(f"Using device: {device}")
-    # Make Predictions on the Validation Set
-    print("Making Predictions on Validation Set...")
-    result = tabular_model.predict(df_val, include_input_features=False)
+    # -------------------------------------------------------------------------
+    # Training loop
+    # -------------------------------------------------------------------------
+    oof_predictions = np.full((len(all_df), n_outputs), np.nan, dtype=np.float64)
-    # pytorch-tabular returns predictions using f"{target}_prediction" column
-    # and classification probabilities in columns ending with "_probability"
-    if model_type == "classifier":
-        preds = result[f"{target}_prediction"].values
+    ensemble_models = []
+    for fold_idx, (train_idx, val_idx) in enumerate(folds):
+        print(f"\n{'='*50}")
+        print(f"Fold {fold_idx + 1}/{len(folds)} - Train: {len(train_idx)}, Val: {len(val_idx)}")
+        print(f"{'='*50}")
+        df_train = all_df.iloc[train_idx].reset_index(drop=True)
+        df_val = all_df.iloc[val_idx].reset_index(drop=True)
+        # Prepare data (using pre-fitted scaler)
+        train_x_cont, train_x_cat, train_y, _, _ = prepare_data(
+            df_train, continuous_cols, categorical_cols, target, category_mappings, scaler=scaler
+        )
+        val_x_cont, val_x_cat, val_y, _, _ = prepare_data(
+            df_val, continuous_cols, categorical_cols, target, category_mappings, scaler=scaler
+        )
+        # Create model
+        torch.manual_seed(hyperparameters["seed"] + fold_idx)
+        model = create_model(
+            n_continuous=len(continuous_cols),
+            categorical_cardinalities=categorical_cardinalities,
+            hidden_layers=hidden_layers,
+            n_outputs=n_outputs,
+            task=task,
+            dropout=hyperparameters["dropout"],
+            use_batch_norm=hyperparameters["use_batch_norm"],
+        )
+        # Train
+        model, history = train_model(
+            model,
+            train_x_cont, train_x_cat, train_y,
+            val_x_cont, val_x_cat, val_y,
+            task=task,
+            max_epochs=hyperparameters["max_epochs"],
+            patience=hyperparameters["early_stopping_patience"],
+            batch_size=hyperparameters["batch_size"],
+            learning_rate=hyperparameters["learning_rate"],
+            loss=hyperparameters.get("loss", "L1Loss"),
+            device=device,
+        )
+        ensemble_models.append(model)
+        # Out-of-fold predictions
+        fold_preds = predict(model, val_x_cont, val_x_cat)
+        oof_predictions[val_idx] = fold_preds
+    print(f"\nTraining complete! Trained {len(ensemble_models)} model(s).")
+    # -------------------------------------------------------------------------
+    # Prepare validation results
+    # -------------------------------------------------------------------------
+    if n_folds == 1:
+        val_mask = ~np.isnan(oof_predictions[:, 0])
+        df_val = all_df[val_mask].copy()
+        predictions = oof_predictions[val_mask]
     else:
-        # Regression: use the target column name
-        preds = result[f"{target}_prediction"].values
+        df_val = all_df.copy()
+        predictions = oof_predictions
+    # Decode labels for classification
     if model_type == "classifier":
-        # Get probabilities for classification
-        print("Processing Probabilities...")
-        prob_cols = [col for col in result.columns if col.endswith("_probability")]
-        if prob_cols:
-            probs = result[prob_cols].values
-            df_val["pred_proba"] = [p.tolist() for p in probs]
-            # Expand the pred_proba column into separate columns for each class
-            print(df_val.columns)
-            df_val = expand_proba_column(df_val, label_encoder.classes_)
-            print(df_val.columns)
-        # Decode the target and prediction labels
-        y_validate = label_encoder.inverse_transform(df_val[target])
-        preds = label_encoder.inverse_transform(preds.astype(int))
+        class_preds = np.argmax(predictions, axis=1)
+        df_val[target] = label_encoder.inverse_transform(df_val[target].astype(int))
+        df_val["prediction"] = label_encoder.inverse_transform(class_preds)
+        df_val["pred_proba"] = [p.tolist() for p in predictions]
+        df_val = expand_proba_column(df_val, label_encoder.classes_)
     else:
-        y_validate = df_val[target].values
-    # Save predictions to S3 (just the target, prediction, and '_probability' columns)
-    df_val["prediction"] = preds
-    output_columns = [target, "prediction"]
-    output_columns += [col for col in df_val.columns if col.endswith("_probability")]
-    wr.s3.to_csv(
-        df_val[output_columns],
-        path=f"{model_metrics_s3_path}/validation_predictions.csv",
-        index=False,
-    )
+        df_val["prediction"] = predictions.flatten()
+    # -------------------------------------------------------------------------
+    # Compute and print metrics
+    # -------------------------------------------------------------------------
+    y_true = df_val[target].values
+    y_pred = df_val["prediction"].values
-    # Report Performance Metrics
     if model_type == "classifier":
-        # Get the label names and their integer mapping
-        label_names = label_encoder.classes_
-        # Calculate various model performance metrics
-        scores = precision_recall_fscore_support(y_validate, preds, average=None, labels=label_names)
-        # Put the scores into a dataframe
-        score_df = pd.DataFrame(
-            {
-                target: label_names,
-                "precision": scores[0],
-                "recall": scores[1],
-                "fscore": scores[2],
-                "support": scores[3],
-            }
+        score_df = compute_classification_metrics(y_true, y_pred, label_encoder.classes_, target)
+        print_classification_metrics(score_df, target, label_encoder.classes_)
+        print_confusion_matrix(y_true, y_pred, label_encoder.classes_)
+    else:
+        metrics = compute_regression_metrics(y_true, y_pred)
+        print_regression_metrics(metrics)
+        # Compute ensemble prediction_std
+        if n_folds > 1:
+            # Re-run inference with all models to get std
+            x_cont, x_cat, _, _, _ = prepare_data(
+                df_val, continuous_cols, categorical_cols, category_mappings=category_mappings, scaler=scaler
+            )
+            all_preds = [predict(m, x_cont, x_cat).flatten() for m in ensemble_models]
+            df_val["prediction_std"] = np.std(np.stack(all_preds), axis=0)
+            print(f"Ensemble std - mean: {df_val['prediction_std'].mean():.4f}, max: {df_val['prediction_std'].max():.4f}")
+        else:
+            df_val["prediction_std"] = 0.0
+        # Train UQ models for uncertainty quantification
+        print("\n" + "=" * 50)
+        print("Training UQ Models")
+        print("=" * 50)
+        uq_models, uq_metadata = train_uq_models(
+            all_df[features], all_df[target], df_val[features], y_true
         )
+        df_val = predict_intervals(df_val, df_val[features], uq_models, uq_metadata)
+        df_val = compute_confidence(df_val, uq_metadata["median_interval_width"])
+    # -------------------------------------------------------------------------
+    # Save validation predictions to S3
+    # -------------------------------------------------------------------------
+    output_columns = []
+    if id_column in df_val.columns:
+        output_columns.append(id_column)
+    output_columns += [target, "prediction"]
+    if model_type != "classifier":
+        output_columns.append("prediction_std")
+        output_columns += [c for c in df_val.columns if c.startswith("q_") or c == "confidence"]
+    output_columns += [c for c in df_val.columns if c.endswith("_proba")]
+    wr.s3.to_csv(df_val[output_columns], f"{model_metrics_s3_path}/validation_predictions.csv", index=False)
+    # -------------------------------------------------------------------------
+    # Save model artifacts
+    # -------------------------------------------------------------------------
+    model_config = {
+        "n_continuous": len(continuous_cols),
+        "categorical_cardinalities": categorical_cardinalities,
+        "hidden_layers": hidden_layers,
+        "n_outputs": n_outputs,
+        "task": task,
+        "dropout": hyperparameters["dropout"],
+        "use_batch_norm": hyperparameters["use_batch_norm"],
+    }
-        # We need to get creative with the Classification Metrics
-        metrics = ["precision", "recall", "fscore", "support"]
-        for t in label_names:
-            for m in metrics:
-                value = score_df.loc[score_df[target] == t, m].iloc[0]
-                print(f"Metrics:{t}:{m} {value}")
+    for idx, m in enumerate(ensemble_models):
+        save_model(m, os.path.join(args.model_dir, f"model_{idx}"), model_config)
+    print(f"Saved {len(ensemble_models)} model(s)")
-        # Compute and output the confusion matrix
-        conf_mtx = confusion_matrix(y_validate, preds, labels=label_names)
-        for i, row_name in enumerate(label_names):
-            for j, col_name in enumerate(label_names):
-                value = conf_mtx[i, j]
-                print(f"ConfusionMatrix:{row_name}:{col_name} {value}")
+    joblib.dump({"n_ensemble": len(ensemble_models), "n_folds": n_folds}, os.path.join(args.model_dir, "ensemble_metadata.joblib"))
+    with open(os.path.join(args.model_dir, "feature_columns.json"), "w") as f:
+        json.dump(orig_features, f)
+    with open(os.path.join(args.model_dir, "category_mappings.json"), "w") as f:
+        json.dump(category_mappings, f)
+    with open(os.path.join(args.model_dir, "feature_metadata.json"), "w") as f:
+        json.dump({"continuous_cols": continuous_cols, "categorical_cols": categorical_cols}, f)
+    with open(os.path.join(args.model_dir, "hyperparameters.json"), "w") as f:
+        json.dump(hyperparameters, f, indent=2)
+    scaler.save(os.path.join(args.model_dir, "scaler.joblib"))
-    else:
-        # Calculate various model performance metrics (regression)
-        rmse = root_mean_squared_error(y_validate, preds)
-        mae = mean_absolute_error(y_validate, preds)
-        r2 = r2_score(y_validate, preds)
-        print(f"RMSE: {rmse:.3f}")
-        print(f"MAE: {mae:.3f}")
-        print(f"R2: {r2:.3f}")
-        print(f"NumRows: {len(df_val)}")
-    # Save the model to the standard place/name
-    tabular_model.save_model(os.path.join(args.model_dir, "tabular_model"))
     if label_encoder:
         joblib.dump(label_encoder, os.path.join(args.model_dir, "label_encoder.joblib"))
-    # Save the features (this will validate input during predictions)
-    with open(os.path.join(args.model_dir, "feature_columns.json"), "w") as fp:
-        json.dump(orig_features, fp)  # We save the original features, not the decompressed ones
+    if model_type != "classifier":
+        save_uq_models(uq_models, uq_metadata, args.model_dir)
-    # Save the category mappings
-    with open(os.path.join(args.model_dir, "category_mappings.json"), "w") as fp:
-        json.dump(category_mappings, fp)
+    print(f"\nModel training complete! Artifacts saved to {args.model_dir}")

workbench 0.8.162__py3-none-any.whl → 0.8.220__py3-none-any.whl

Potentially problematic release.

workbench 0.8.162py3-none-any.whl → 0.8.220py3-none-any.whl