workbench 0.8.198__py3-none-any.whl → 0.8.203__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- workbench/algorithms/dataframe/proximity.py +11 -4
- workbench/api/__init__.py +2 -1
- workbench/api/df_store.py +17 -108
- workbench/api/feature_set.py +48 -11
- workbench/api/model.py +1 -1
- workbench/api/parameter_store.py +3 -52
- workbench/core/artifacts/__init__.py +11 -2
- workbench/core/artifacts/artifact.py +5 -5
- workbench/core/artifacts/df_store_core.py +114 -0
- workbench/core/artifacts/endpoint_core.py +261 -78
- workbench/core/artifacts/feature_set_core.py +69 -1
- workbench/core/artifacts/model_core.py +48 -14
- workbench/core/artifacts/parameter_store_core.py +98 -0
- workbench/core/transforms/features_to_model/features_to_model.py +50 -33
- workbench/core/transforms/pandas_transforms/pandas_to_features.py +11 -2
- workbench/core/views/view.py +2 -2
- workbench/model_scripts/chemprop/chemprop.template +933 -0
- workbench/model_scripts/chemprop/generated_model_script.py +933 -0
- workbench/model_scripts/chemprop/requirements.txt +11 -0
- workbench/model_scripts/custom_models/chem_info/fingerprints.py +134 -0
- workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py +1 -1
- workbench/model_scripts/custom_models/proximity/proximity.py +11 -4
- workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +11 -5
- workbench/model_scripts/custom_models/uq_models/meta_uq.template +11 -5
- workbench/model_scripts/custom_models/uq_models/ngboost.template +11 -5
- workbench/model_scripts/custom_models/uq_models/proximity.py +11 -4
- workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +11 -5
- workbench/model_scripts/pytorch_model/generated_model_script.py +365 -173
- workbench/model_scripts/pytorch_model/pytorch.template +362 -170
- workbench/model_scripts/scikit_learn/generated_model_script.py +302 -0
- workbench/model_scripts/script_generation.py +10 -7
- workbench/model_scripts/uq_models/generated_model_script.py +43 -27
- workbench/model_scripts/uq_models/mapie.template +40 -24
- workbench/model_scripts/xgb_model/generated_model_script.py +36 -7
- workbench/model_scripts/xgb_model/xgb_model.template +36 -7
- workbench/repl/workbench_shell.py +14 -5
- workbench/resources/open_source_api.key +1 -1
- workbench/scripts/endpoint_test.py +162 -0
- workbench/scripts/{lambda_launcher.py → lambda_test.py} +10 -0
- workbench/utils/chemprop_utils.py +761 -0
- workbench/utils/pytorch_utils.py +527 -0
- workbench/utils/xgboost_model_utils.py +10 -5
- workbench/web_interface/components/model_plot.py +7 -1
- {workbench-0.8.198.dist-info → workbench-0.8.203.dist-info}/METADATA +3 -3
- {workbench-0.8.198.dist-info → workbench-0.8.203.dist-info}/RECORD +49 -43
- {workbench-0.8.198.dist-info → workbench-0.8.203.dist-info}/entry_points.txt +2 -1
- workbench/core/cloud_platform/aws/aws_df_store.py +0 -404
- workbench/core/cloud_platform/aws/aws_parameter_store.py +0 -280
- workbench/model_scripts/__pycache__/script_generation.cpython-312.pyc +0 -0
- workbench/model_scripts/__pycache__/script_generation.cpython-313.pyc +0 -0
- {workbench-0.8.198.dist-info → workbench-0.8.203.dist-info}/WHEEL +0 -0
- {workbench-0.8.198.dist-info → workbench-0.8.203.dist-info}/licenses/LICENSE +0 -0
- {workbench-0.8.198.dist-info → workbench-0.8.203.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# Requirements for ChemProp model scripts
|
|
2
|
+
# Note: These are the local dev requirements. The Docker images have their own requirements.txt
|
|
3
|
+
chemprop==2.2.1
|
|
4
|
+
rdkit==2025.9.1
|
|
5
|
+
torch>=2.0.0
|
|
6
|
+
lightning>=2.0.0
|
|
7
|
+
pandas>=2.0.0
|
|
8
|
+
numpy>=1.24.0
|
|
9
|
+
scikit-learn>=1.3.0
|
|
10
|
+
awswrangler>=3.0.0
|
|
11
|
+
joblib>=1.3.0
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""Molecular fingerprint computation utilities"""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
# Molecular Descriptor Imports
|
|
7
|
+
from rdkit import Chem
|
|
8
|
+
from rdkit.Chem import rdFingerprintGenerator
|
|
9
|
+
from rdkit.Chem.MolStandardize import rdMolStandardize
|
|
10
|
+
|
|
11
|
+
# Set up the logger
|
|
12
|
+
log = logging.getLogger("workbench")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def compute_morgan_fingerprints(df: pd.DataFrame, radius=2, n_bits=2048, counts=True) -> pd.DataFrame:
|
|
16
|
+
"""Compute and add Morgan fingerprints to the DataFrame.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
df (pd.DataFrame): Input DataFrame containing SMILES strings.
|
|
20
|
+
radius (int): Radius for the Morgan fingerprint.
|
|
21
|
+
n_bits (int): Number of bits for the fingerprint.
|
|
22
|
+
counts (bool): Count simulation for the fingerprint.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
pd.DataFrame: The input DataFrame with the Morgan fingerprints added as bit strings.
|
|
26
|
+
|
|
27
|
+
Note:
|
|
28
|
+
See: https://greglandrum.github.io/rdkit-blog/posts/2021-07-06-simulating-counts.html
|
|
29
|
+
"""
|
|
30
|
+
delete_mol_column = False
|
|
31
|
+
|
|
32
|
+
# Check for the SMILES column (case-insensitive)
|
|
33
|
+
smiles_column = next((col for col in df.columns if col.lower() == "smiles"), None)
|
|
34
|
+
if smiles_column is None:
|
|
35
|
+
raise ValueError("Input DataFrame must have a 'smiles' column")
|
|
36
|
+
|
|
37
|
+
# Sanity check the molecule column (sometimes it gets serialized, which doesn't work)
|
|
38
|
+
if "molecule" in df.columns and df["molecule"].dtype == "string":
|
|
39
|
+
log.warning("Detected serialized molecules in 'molecule' column. Removing...")
|
|
40
|
+
del df["molecule"]
|
|
41
|
+
|
|
42
|
+
# Convert SMILES to RDKit molecule objects (vectorized)
|
|
43
|
+
if "molecule" not in df.columns:
|
|
44
|
+
log.info("Converting SMILES to RDKit Molecules...")
|
|
45
|
+
delete_mol_column = True
|
|
46
|
+
df["molecule"] = df[smiles_column].apply(Chem.MolFromSmiles)
|
|
47
|
+
# Make sure our molecules are not None
|
|
48
|
+
failed_smiles = df[df["molecule"].isnull()][smiles_column].tolist()
|
|
49
|
+
if failed_smiles:
|
|
50
|
+
log.error(f"Failed to convert the following SMILES to molecules: {failed_smiles}")
|
|
51
|
+
df = df.dropna(subset=["molecule"])
|
|
52
|
+
|
|
53
|
+
# If we have fragments in our compounds, get the largest fragment before computing fingerprints
|
|
54
|
+
largest_frags = df["molecule"].apply(
|
|
55
|
+
lambda mol: rdMolStandardize.LargestFragmentChooser().choose(mol) if mol else None
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Create a Morgan fingerprint generator
|
|
59
|
+
if counts:
|
|
60
|
+
n_bits *= 4 # Multiply by 4 to simulate counts
|
|
61
|
+
morgan_generator = rdFingerprintGenerator.GetMorganGenerator(radius=radius, fpSize=n_bits, countSimulation=counts)
|
|
62
|
+
|
|
63
|
+
# Compute Morgan fingerprints (vectorized)
|
|
64
|
+
fingerprints = largest_frags.apply(
|
|
65
|
+
lambda mol: (morgan_generator.GetFingerprint(mol).ToBitString() if mol else pd.NA)
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Add the fingerprints to the DataFrame
|
|
69
|
+
df["fingerprint"] = fingerprints
|
|
70
|
+
|
|
71
|
+
# Drop the intermediate 'molecule' column if it was added
|
|
72
|
+
if delete_mol_column:
|
|
73
|
+
del df["molecule"]
|
|
74
|
+
return df
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
if __name__ == "__main__":
|
|
78
|
+
print("Running molecular fingerprint tests...")
|
|
79
|
+
print("Note: This requires molecular_screening module to be available")
|
|
80
|
+
|
|
81
|
+
# Test molecules
|
|
82
|
+
test_molecules = {
|
|
83
|
+
"aspirin": "CC(=O)OC1=CC=CC=C1C(=O)O",
|
|
84
|
+
"caffeine": "CN1C=NC2=C1C(=O)N(C(=O)N2C)C",
|
|
85
|
+
"glucose": "C([C@@H]1[C@H]([C@@H]([C@H](C(O1)O)O)O)O)O", # With stereochemistry
|
|
86
|
+
"sodium_acetate": "CC(=O)[O-].[Na+]", # Salt
|
|
87
|
+
"benzene": "c1ccccc1",
|
|
88
|
+
"butene_e": "C/C=C/C", # E-butene
|
|
89
|
+
"butene_z": "C/C=C\\C", # Z-butene
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
# Test 1: Morgan Fingerprints
|
|
93
|
+
print("\n1. Testing Morgan fingerprint generation...")
|
|
94
|
+
|
|
95
|
+
test_df = pd.DataFrame({"SMILES": list(test_molecules.values()), "name": list(test_molecules.keys())})
|
|
96
|
+
|
|
97
|
+
fp_df = compute_morgan_fingerprints(test_df.copy(), radius=2, n_bits=512, counts=False)
|
|
98
|
+
|
|
99
|
+
print(" Fingerprint generation results:")
|
|
100
|
+
for _, row in fp_df.iterrows():
|
|
101
|
+
fp = row.get("fingerprint", "N/A")
|
|
102
|
+
fp_len = len(fp) if fp != "N/A" else 0
|
|
103
|
+
print(f" {row['name']:15} → {fp_len} bits")
|
|
104
|
+
|
|
105
|
+
# Test 2: Different fingerprint parameters
|
|
106
|
+
print("\n2. Testing different fingerprint parameters...")
|
|
107
|
+
|
|
108
|
+
# Test with counts enabled
|
|
109
|
+
fp_counts_df = compute_morgan_fingerprints(test_df.copy(), radius=3, n_bits=256, counts=True)
|
|
110
|
+
|
|
111
|
+
print(" With count simulation (256 bits * 4):")
|
|
112
|
+
for _, row in fp_counts_df.iterrows():
|
|
113
|
+
fp = row.get("fingerprint", "N/A")
|
|
114
|
+
fp_len = len(fp) if fp != "N/A" else 0
|
|
115
|
+
print(f" {row['name']:15} → {fp_len} bits")
|
|
116
|
+
|
|
117
|
+
# Test 3: Edge cases
|
|
118
|
+
print("\n3. Testing edge cases...")
|
|
119
|
+
|
|
120
|
+
# Invalid SMILES
|
|
121
|
+
invalid_df = pd.DataFrame({"SMILES": ["INVALID", ""]})
|
|
122
|
+
try:
|
|
123
|
+
fp_invalid = compute_morgan_fingerprints(invalid_df.copy())
|
|
124
|
+
print(f" ✓ Invalid SMILES handled: {len(fp_invalid)} valid molecules")
|
|
125
|
+
except Exception as e:
|
|
126
|
+
print(f" ✓ Invalid SMILES properly raised error: {type(e).__name__}")
|
|
127
|
+
|
|
128
|
+
# Test with pre-existing molecule column
|
|
129
|
+
mol_df = test_df.copy()
|
|
130
|
+
mol_df["molecule"] = mol_df["SMILES"].apply(Chem.MolFromSmiles)
|
|
131
|
+
fp_with_mol = compute_morgan_fingerprints(mol_df)
|
|
132
|
+
print(f" ✓ Pre-existing molecule column handled: {len(fp_with_mol)} fingerprints generated")
|
|
133
|
+
|
|
134
|
+
print("\n✅ All fingerprint tests completed!")
|
|
@@ -69,6 +69,7 @@ class Proximity:
|
|
|
69
69
|
top_percent: float = 1.0,
|
|
70
70
|
min_delta: Optional[float] = None,
|
|
71
71
|
k_neighbors: int = 4,
|
|
72
|
+
only_coincident: bool = False,
|
|
72
73
|
) -> pd.DataFrame:
|
|
73
74
|
"""
|
|
74
75
|
Find compounds with steep target gradients (data quality issues and activity cliffs).
|
|
@@ -81,6 +82,7 @@ class Proximity:
|
|
|
81
82
|
top_percent: Percentage of compounds with steepest gradients to return (e.g., 1.0 = top 1%)
|
|
82
83
|
min_delta: Minimum absolute target difference to consider. If None, defaults to target_range/100
|
|
83
84
|
k_neighbors: Number of neighbors to use for median calculation (default: 4)
|
|
85
|
+
only_coincident: If True, only consider compounds that are coincident (default: False)
|
|
84
86
|
|
|
85
87
|
Returns:
|
|
86
88
|
DataFrame of compounds with steepest gradients, sorted by gradient (descending)
|
|
@@ -99,10 +101,15 @@ class Proximity:
|
|
|
99
101
|
min_delta = self.target_range / 100.0 if self.target_range > 0 else 0.0
|
|
100
102
|
candidates = candidates[candidates["nn_target_diff"] >= min_delta]
|
|
101
103
|
|
|
102
|
-
#
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
104
|
+
# Filter based on mode
|
|
105
|
+
if only_coincident:
|
|
106
|
+
# Only keep coincident points (nn_distance ~= 0)
|
|
107
|
+
candidates = candidates[candidates["nn_distance"] < epsilon].copy()
|
|
108
|
+
else:
|
|
109
|
+
# Get top X% by initial gradient
|
|
110
|
+
percentile = 100 - top_percent
|
|
111
|
+
threshold = np.percentile(candidates["gradient"], percentile)
|
|
112
|
+
candidates = candidates[candidates["gradient"] >= threshold].copy()
|
|
106
113
|
|
|
107
114
|
# Phase 2: Verify with k-neighbor median to filter out cases where nearest neighbor is the outlier
|
|
108
115
|
results = []
|
|
@@ -4,9 +4,10 @@ import awswrangler as wr
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
|
|
6
6
|
# Model Performance Scores
|
|
7
|
-
from sklearn.metrics import mean_absolute_error, r2_score, root_mean_squared_error
|
|
7
|
+
from sklearn.metrics import mean_absolute_error, median_absolute_error, r2_score, root_mean_squared_error
|
|
8
8
|
from sklearn.model_selection import KFold
|
|
9
9
|
from scipy.optimize import minimize
|
|
10
|
+
from scipy.stats import spearmanr
|
|
10
11
|
|
|
11
12
|
from io import StringIO
|
|
12
13
|
import json
|
|
@@ -217,11 +218,16 @@ if __name__ == "__main__":
|
|
|
217
218
|
# Report Performance Metrics
|
|
218
219
|
rmse = root_mean_squared_error(result_df[target], result_df["prediction"])
|
|
219
220
|
mae = mean_absolute_error(result_df[target], result_df["prediction"])
|
|
221
|
+
medae = median_absolute_error(result_df[target], result_df["prediction"])
|
|
220
222
|
r2 = r2_score(result_df[target], result_df["prediction"])
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
print(f"
|
|
224
|
-
print(f"
|
|
223
|
+
spearman_corr = spearmanr(result_df[target], result_df["prediction"]).correlation
|
|
224
|
+
support = len(result_df)
|
|
225
|
+
print(f"rmse: {rmse:.3f}")
|
|
226
|
+
print(f"mae: {mae:.3f}")
|
|
227
|
+
print(f"medae: {medae:.3f}")
|
|
228
|
+
print(f"r2: {r2:.3f}")
|
|
229
|
+
print(f"spearmanr: {spearman_corr:.3f}")
|
|
230
|
+
print(f"support: {support}")
|
|
225
231
|
|
|
226
232
|
# Now save the models
|
|
227
233
|
for name, model in models.items():
|
|
@@ -5,7 +5,8 @@ from xgboost import XGBRegressor # Point Estimator
|
|
|
5
5
|
from sklearn.model_selection import train_test_split
|
|
6
6
|
|
|
7
7
|
# Model Performance Scores
|
|
8
|
-
from sklearn.metrics import mean_absolute_error, r2_score, root_mean_squared_error
|
|
8
|
+
from sklearn.metrics import mean_absolute_error, median_absolute_error, r2_score, root_mean_squared_error
|
|
9
|
+
from scipy.stats import spearmanr
|
|
9
10
|
|
|
10
11
|
from io import StringIO
|
|
11
12
|
import json
|
|
@@ -238,11 +239,16 @@ if __name__ == "__main__":
|
|
|
238
239
|
# Calculate various model performance metrics (regression)
|
|
239
240
|
rmse = root_mean_squared_error(y_validate, preds)
|
|
240
241
|
mae = mean_absolute_error(y_validate, preds)
|
|
242
|
+
medae = median_absolute_error(y_validate, preds)
|
|
241
243
|
r2 = r2_score(y_validate, preds)
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
print(f"
|
|
245
|
-
print(f"
|
|
244
|
+
spearman_corr = spearmanr(y_validate, preds).correlation
|
|
245
|
+
support = len(df_val)
|
|
246
|
+
print(f"rmse: {rmse:.3f}")
|
|
247
|
+
print(f"mae: {mae:.3f}")
|
|
248
|
+
print(f"medae: {medae:.3f}")
|
|
249
|
+
print(f"r2: {r2:.3f}")
|
|
250
|
+
print(f"spearmanr: {spearman_corr:.3f}")
|
|
251
|
+
print(f"support: {support}")
|
|
246
252
|
|
|
247
253
|
# Save the trained XGBoost model
|
|
248
254
|
xgb_model.save_model(os.path.join(args.model_dir, "xgb_model.json"))
|
|
@@ -3,7 +3,8 @@ from ngboost import NGBRegressor
|
|
|
3
3
|
from sklearn.model_selection import train_test_split
|
|
4
4
|
|
|
5
5
|
# Model Performance Scores
|
|
6
|
-
from sklearn.metrics import mean_absolute_error, r2_score, root_mean_squared_error
|
|
6
|
+
from sklearn.metrics import mean_absolute_error, median_absolute_error, r2_score, root_mean_squared_error
|
|
7
|
+
from scipy.stats import spearmanr
|
|
7
8
|
|
|
8
9
|
from io import StringIO
|
|
9
10
|
import json
|
|
@@ -129,11 +130,16 @@ if __name__ == "__main__":
|
|
|
129
130
|
# Calculate various model performance metrics (regression)
|
|
130
131
|
rmse = root_mean_squared_error(y_validate, preds)
|
|
131
132
|
mae = mean_absolute_error(y_validate, preds)
|
|
133
|
+
medae = median_absolute_error(y_validate, preds)
|
|
132
134
|
r2 = r2_score(y_validate, preds)
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
print(f"
|
|
136
|
-
print(f"
|
|
135
|
+
spearman_corr = spearmanr(y_validate, preds).correlation
|
|
136
|
+
support = len(df_val)
|
|
137
|
+
print(f"rmse: {rmse:.3f}")
|
|
138
|
+
print(f"mae: {mae:.3f}")
|
|
139
|
+
print(f"medae: {medae:.3f}")
|
|
140
|
+
print(f"r2: {r2:.3f}")
|
|
141
|
+
print(f"spearmanr: {spearman_corr:.3f}")
|
|
142
|
+
print(f"support: {support}")
|
|
137
143
|
|
|
138
144
|
# Save the trained NGBoost model
|
|
139
145
|
joblib.dump(ngb_model, os.path.join(args.model_dir, "ngb_model.joblib"))
|
|
@@ -69,6 +69,7 @@ class Proximity:
|
|
|
69
69
|
top_percent: float = 1.0,
|
|
70
70
|
min_delta: Optional[float] = None,
|
|
71
71
|
k_neighbors: int = 4,
|
|
72
|
+
only_coincident: bool = False,
|
|
72
73
|
) -> pd.DataFrame:
|
|
73
74
|
"""
|
|
74
75
|
Find compounds with steep target gradients (data quality issues and activity cliffs).
|
|
@@ -81,6 +82,7 @@ class Proximity:
|
|
|
81
82
|
top_percent: Percentage of compounds with steepest gradients to return (e.g., 1.0 = top 1%)
|
|
82
83
|
min_delta: Minimum absolute target difference to consider. If None, defaults to target_range/100
|
|
83
84
|
k_neighbors: Number of neighbors to use for median calculation (default: 4)
|
|
85
|
+
only_coincident: If True, only consider compounds that are coincident (default: False)
|
|
84
86
|
|
|
85
87
|
Returns:
|
|
86
88
|
DataFrame of compounds with steepest gradients, sorted by gradient (descending)
|
|
@@ -99,10 +101,15 @@ class Proximity:
|
|
|
99
101
|
min_delta = self.target_range / 100.0 if self.target_range > 0 else 0.0
|
|
100
102
|
candidates = candidates[candidates["nn_target_diff"] >= min_delta]
|
|
101
103
|
|
|
102
|
-
#
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
104
|
+
# Filter based on mode
|
|
105
|
+
if only_coincident:
|
|
106
|
+
# Only keep coincident points (nn_distance ~= 0)
|
|
107
|
+
candidates = candidates[candidates["nn_distance"] < epsilon].copy()
|
|
108
|
+
else:
|
|
109
|
+
# Get top X% by initial gradient
|
|
110
|
+
percentile = 100 - top_percent
|
|
111
|
+
threshold = np.percentile(candidates["gradient"], percentile)
|
|
112
|
+
candidates = candidates[candidates["gradient"] >= threshold].copy()
|
|
106
113
|
|
|
107
114
|
# Phase 2: Verify with k-neighbor median to filter out cases where nearest neighbor is the outlier
|
|
108
115
|
results = []
|
|
@@ -12,7 +12,8 @@ import awswrangler as wr
|
|
|
12
12
|
import numpy as np
|
|
13
13
|
|
|
14
14
|
# Model Performance Scores
|
|
15
|
-
from sklearn.metrics import mean_absolute_error, r2_score, root_mean_squared_error
|
|
15
|
+
from sklearn.metrics import mean_absolute_error, median_absolute_error, r2_score, root_mean_squared_error
|
|
16
|
+
from scipy.stats import spearmanr
|
|
16
17
|
|
|
17
18
|
from io import StringIO
|
|
18
19
|
import json
|
|
@@ -153,11 +154,16 @@ if __name__ == "__main__":
|
|
|
153
154
|
# Report Performance Metrics
|
|
154
155
|
rmse = root_mean_squared_error(result_df[target], result_df["prediction"])
|
|
155
156
|
mae = mean_absolute_error(result_df[target], result_df["prediction"])
|
|
157
|
+
medae = median_absolute_error(result_df[target], result_df["prediction"])
|
|
156
158
|
r2 = r2_score(result_df[target], result_df["prediction"])
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
print(f"
|
|
160
|
-
print(f"
|
|
159
|
+
spearman_corr = spearmanr(result_df[target], result_df["prediction"]).correlation
|
|
160
|
+
support = len(result_df)
|
|
161
|
+
print(f"rmse: {rmse:.3f}")
|
|
162
|
+
print(f"mae: {mae:.3f}")
|
|
163
|
+
print(f"medae: {medae:.3f}")
|
|
164
|
+
print(f"r2: {r2:.3f}")
|
|
165
|
+
print(f"spearmanr: {spearman_corr:.3f}")
|
|
166
|
+
print(f"support: {support}")
|
|
161
167
|
|
|
162
168
|
# Now save the models
|
|
163
169
|
for name, model in models.items():
|