workbench 0.8.172__py3-none-any.whl → 0.8.174__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of workbench might be problematic. Click here for more details.

Files changed (36) hide show
  1. workbench/algorithms/graph/light/proximity_graph.py +2 -1
  2. workbench/api/compound.py +1 -1
  3. workbench/api/monitor.py +1 -16
  4. workbench/core/artifacts/data_capture_core.py +348 -0
  5. workbench/core/artifacts/endpoint_core.py +9 -3
  6. workbench/core/artifacts/monitor_core.py +33 -249
  7. workbench/core/transforms/data_to_features/light/molecular_descriptors.py +4 -4
  8. workbench/model_scripts/custom_models/chem_info/mol_descriptors.py +471 -0
  9. workbench/model_scripts/custom_models/chem_info/mol_standardize.py +428 -0
  10. workbench/model_scripts/custom_models/chem_info/molecular_descriptors.py +7 -9
  11. workbench/model_scripts/custom_models/uq_models/generated_model_script.py +95 -204
  12. workbench/model_scripts/xgb_model/generated_model_script.py +5 -5
  13. workbench/repl/workbench_shell.py +3 -3
  14. workbench/utils/chem_utils/__init__.py +0 -0
  15. workbench/utils/chem_utils/fingerprints.py +134 -0
  16. workbench/utils/chem_utils/misc.py +194 -0
  17. workbench/utils/chem_utils/mol_descriptors.py +471 -0
  18. workbench/utils/chem_utils/mol_standardize.py +428 -0
  19. workbench/utils/chem_utils/mol_tagging.py +348 -0
  20. workbench/utils/chem_utils/projections.py +209 -0
  21. workbench/utils/chem_utils/salts.py +256 -0
  22. workbench/utils/chem_utils/sdf.py +292 -0
  23. workbench/utils/chem_utils/toxicity.py +250 -0
  24. workbench/utils/chem_utils/vis.py +253 -0
  25. workbench/utils/monitor_utils.py +44 -62
  26. workbench/utils/pandas_utils.py +3 -3
  27. workbench/web_interface/components/plugins/generated_compounds.py +1 -1
  28. {workbench-0.8.172.dist-info → workbench-0.8.174.dist-info}/METADATA +1 -1
  29. {workbench-0.8.172.dist-info → workbench-0.8.174.dist-info}/RECORD +33 -22
  30. workbench/model_scripts/custom_models/chem_info/local_utils.py +0 -769
  31. workbench/model_scripts/custom_models/chem_info/tautomerize.py +0 -83
  32. workbench/utils/chem_utils.py +0 -1556
  33. {workbench-0.8.172.dist-info → workbench-0.8.174.dist-info}/WHEEL +0 -0
  34. {workbench-0.8.172.dist-info → workbench-0.8.174.dist-info}/entry_points.txt +0 -0
  35. {workbench-0.8.172.dist-info → workbench-0.8.174.dist-info}/licenses/LICENSE +0 -0
  36. {workbench-0.8.172.dist-info → workbench-0.8.174.dist-info}/top_level.txt +0 -0
@@ -1,83 +0,0 @@
1
- # Model: tautomerization_processor
2
- #
3
- # Description: The tautomerization_processor model uses RDKit to perform tautomer enumeration
4
- # and canonicalization of chemical compounds. Tautomerization is the chemical process where
5
- # compounds can interconvert between structurally distinct forms, often affecting their
6
- # chemical properties and reactivity. This model provides a robust approach to identifying
7
- # and processing tautomers, crucial for improving molecular modeling and cheminformatics tasks
8
- # like virtual screening, QSAR modeling, and property prediction.
9
- #
10
- import argparse
11
- import os
12
- import joblib
13
- from io import StringIO
14
- import pandas as pd
15
- import json
16
-
17
- # Local imports
18
- from local_utils import tautomerize_smiles
19
-
20
-
21
- # TRAINING SECTION
22
- #
23
- # This section (__main__) is where SageMaker will execute the job and save the model artifacts.
24
- #
25
- if __name__ == "__main__":
26
- # Script arguments for input/output directories
27
- parser = argparse.ArgumentParser()
28
- parser.add_argument("--model-dir", type=str, default=os.environ.get("SM_MODEL_DIR", "/opt/ml/model"))
29
- parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAIN", "/opt/ml/input/data/train"))
30
- parser.add_argument(
31
- "--output-data-dir", type=str, default=os.environ.get("SM_OUTPUT_DATA_DIR", "/opt/ml/output/data")
32
- )
33
- args = parser.parse_args()
34
-
35
- # This model doesn't get trained; it's a feature processing 'model'
36
-
37
- # Sagemaker expects a model artifact, so we'll save a placeholder
38
- placeholder_model = {}
39
- joblib.dump(placeholder_model, os.path.join(args.model_dir, "model.joblib"))
40
-
41
-
42
- # Model loading and prediction functions
43
- def model_fn(model_dir):
44
- return joblib.load(os.path.join(model_dir, "model.joblib"))
45
-
46
-
47
- def input_fn(input_data, content_type):
48
- """Parse input data and return a DataFrame."""
49
- if not input_data:
50
- raise ValueError("Empty input data is not supported!")
51
-
52
- # Decode bytes to string if necessary
53
- if isinstance(input_data, bytes):
54
- input_data = input_data.decode("utf-8")
55
-
56
- if "text/csv" in content_type:
57
- return pd.read_csv(StringIO(input_data))
58
- elif "application/json" in content_type:
59
- return pd.DataFrame(json.loads(input_data)) # Assumes JSON array of records
60
- else:
61
- raise ValueError(f"{content_type} not supported!")
62
-
63
-
64
- def output_fn(output_df, accept_type):
65
- """Supports both CSV and JSON output formats."""
66
- use_explicit_na = False
67
- if "text/csv" in accept_type:
68
- if use_explicit_na:
69
- csv_output = output_df.fillna("N/A").to_csv(index=False) # CSV with N/A for missing values
70
- else:
71
- csv_output = output_df.to_csv(index=False)
72
- return csv_output, "text/csv"
73
- elif "application/json" in accept_type:
74
- return output_df.to_json(orient="records"), "application/json" # JSON array of records (NaNs -> null)
75
- else:
76
- raise RuntimeError(f"{accept_type} accept type is not supported by this script.")
77
-
78
-
79
- # Prediction function
80
- def predict_fn(df, model):
81
- # Perform Tautomerization
82
- df = tautomerize_smiles(df)
83
- return df