workbench 0.8.172__py3-none-any.whl → 0.8.173__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- workbench/algorithms/graph/light/proximity_graph.py +2 -1
- workbench/api/compound.py +1 -1
- workbench/api/monitor.py +1 -16
- workbench/core/artifacts/data_capture_core.py +315 -0
- workbench/core/artifacts/endpoint_core.py +9 -3
- workbench/core/artifacts/monitor_core.py +33 -249
- workbench/core/transforms/data_to_features/light/molecular_descriptors.py +4 -4
- workbench/model_scripts/custom_models/chem_info/mol_descriptors.py +471 -0
- workbench/model_scripts/custom_models/chem_info/mol_standardize.py +428 -0
- workbench/model_scripts/custom_models/chem_info/molecular_descriptors.py +7 -9
- workbench/model_scripts/custom_models/uq_models/generated_model_script.py +95 -204
- workbench/model_scripts/xgb_model/generated_model_script.py +5 -5
- workbench/repl/workbench_shell.py +3 -3
- workbench/utils/chem_utils/__init__.py +0 -0
- workbench/utils/chem_utils/fingerprints.py +134 -0
- workbench/utils/chem_utils/misc.py +194 -0
- workbench/utils/chem_utils/mol_descriptors.py +471 -0
- workbench/utils/chem_utils/mol_standardize.py +428 -0
- workbench/utils/chem_utils/mol_tagging.py +348 -0
- workbench/utils/chem_utils/projections.py +209 -0
- workbench/utils/chem_utils/salts.py +256 -0
- workbench/utils/chem_utils/sdf.py +292 -0
- workbench/utils/chem_utils/toxicity.py +250 -0
- workbench/utils/chem_utils/vis.py +253 -0
- workbench/utils/monitor_utils.py +49 -56
- workbench/utils/pandas_utils.py +3 -3
- workbench/web_interface/components/plugins/generated_compounds.py +1 -1
- {workbench-0.8.172.dist-info → workbench-0.8.173.dist-info}/METADATA +1 -1
- {workbench-0.8.172.dist-info → workbench-0.8.173.dist-info}/RECORD +33 -22
- workbench/model_scripts/custom_models/chem_info/local_utils.py +0 -769
- workbench/model_scripts/custom_models/chem_info/tautomerize.py +0 -83
- workbench/utils/chem_utils.py +0 -1556
- {workbench-0.8.172.dist-info → workbench-0.8.173.dist-info}/WHEEL +0 -0
- {workbench-0.8.172.dist-info → workbench-0.8.173.dist-info}/entry_points.txt +0 -0
- {workbench-0.8.172.dist-info → workbench-0.8.173.dist-info}/licenses/LICENSE +0 -0
- {workbench-0.8.172.dist-info → workbench-0.8.173.dist-info}/top_level.txt +0 -0
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
# Model: tautomerization_processor
|
|
2
|
-
#
|
|
3
|
-
# Description: The tautomerization_processor model uses RDKit to perform tautomer enumeration
|
|
4
|
-
# and canonicalization of chemical compounds. Tautomerization is the chemical process where
|
|
5
|
-
# compounds can interconvert between structurally distinct forms, often affecting their
|
|
6
|
-
# chemical properties and reactivity. This model provides a robust approach to identifying
|
|
7
|
-
# and processing tautomers, crucial for improving molecular modeling and cheminformatics tasks
|
|
8
|
-
# like virtual screening, QSAR modeling, and property prediction.
|
|
9
|
-
#
|
|
10
|
-
import argparse
|
|
11
|
-
import os
|
|
12
|
-
import joblib
|
|
13
|
-
from io import StringIO
|
|
14
|
-
import pandas as pd
|
|
15
|
-
import json
|
|
16
|
-
|
|
17
|
-
# Local imports
|
|
18
|
-
from local_utils import tautomerize_smiles
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
# TRAINING SECTION
|
|
22
|
-
#
|
|
23
|
-
# This section (__main__) is where SageMaker will execute the job and save the model artifacts.
|
|
24
|
-
#
|
|
25
|
-
if __name__ == "__main__":
|
|
26
|
-
# Script arguments for input/output directories
|
|
27
|
-
parser = argparse.ArgumentParser()
|
|
28
|
-
parser.add_argument("--model-dir", type=str, default=os.environ.get("SM_MODEL_DIR", "/opt/ml/model"))
|
|
29
|
-
parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAIN", "/opt/ml/input/data/train"))
|
|
30
|
-
parser.add_argument(
|
|
31
|
-
"--output-data-dir", type=str, default=os.environ.get("SM_OUTPUT_DATA_DIR", "/opt/ml/output/data")
|
|
32
|
-
)
|
|
33
|
-
args = parser.parse_args()
|
|
34
|
-
|
|
35
|
-
# This model doesn't get trained; it's a feature processing 'model'
|
|
36
|
-
|
|
37
|
-
# Sagemaker expects a model artifact, so we'll save a placeholder
|
|
38
|
-
placeholder_model = {}
|
|
39
|
-
joblib.dump(placeholder_model, os.path.join(args.model_dir, "model.joblib"))
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
# Model loading and prediction functions
|
|
43
|
-
def model_fn(model_dir):
|
|
44
|
-
return joblib.load(os.path.join(model_dir, "model.joblib"))
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def input_fn(input_data, content_type):
|
|
48
|
-
"""Parse input data and return a DataFrame."""
|
|
49
|
-
if not input_data:
|
|
50
|
-
raise ValueError("Empty input data is not supported!")
|
|
51
|
-
|
|
52
|
-
# Decode bytes to string if necessary
|
|
53
|
-
if isinstance(input_data, bytes):
|
|
54
|
-
input_data = input_data.decode("utf-8")
|
|
55
|
-
|
|
56
|
-
if "text/csv" in content_type:
|
|
57
|
-
return pd.read_csv(StringIO(input_data))
|
|
58
|
-
elif "application/json" in content_type:
|
|
59
|
-
return pd.DataFrame(json.loads(input_data)) # Assumes JSON array of records
|
|
60
|
-
else:
|
|
61
|
-
raise ValueError(f"{content_type} not supported!")
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def output_fn(output_df, accept_type):
|
|
65
|
-
"""Supports both CSV and JSON output formats."""
|
|
66
|
-
use_explicit_na = False
|
|
67
|
-
if "text/csv" in accept_type:
|
|
68
|
-
if use_explicit_na:
|
|
69
|
-
csv_output = output_df.fillna("N/A").to_csv(index=False) # CSV with N/A for missing values
|
|
70
|
-
else:
|
|
71
|
-
csv_output = output_df.to_csv(index=False)
|
|
72
|
-
return csv_output, "text/csv"
|
|
73
|
-
elif "application/json" in accept_type:
|
|
74
|
-
return output_df.to_json(orient="records"), "application/json" # JSON array of records (NaNs -> null)
|
|
75
|
-
else:
|
|
76
|
-
raise RuntimeError(f"{accept_type} accept type is not supported by this script.")
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
# Prediction function
|
|
80
|
-
def predict_fn(df, model):
|
|
81
|
-
# Perform Tautomerization
|
|
82
|
-
df = tautomerize_smiles(df)
|
|
83
|
-
return df
|