workbench 0.8.168__py3-none-any.whl → 0.8.193__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. workbench/algorithms/dataframe/proximity.py +143 -102
  2. workbench/algorithms/graph/light/proximity_graph.py +2 -1
  3. workbench/api/compound.py +1 -1
  4. workbench/api/endpoint.py +3 -2
  5. workbench/api/feature_set.py +4 -4
  6. workbench/api/model.py +16 -12
  7. workbench/api/monitor.py +1 -16
  8. workbench/core/artifacts/artifact.py +11 -3
  9. workbench/core/artifacts/data_capture_core.py +355 -0
  10. workbench/core/artifacts/endpoint_core.py +113 -27
  11. workbench/core/artifacts/feature_set_core.py +72 -13
  12. workbench/core/artifacts/model_core.py +71 -49
  13. workbench/core/artifacts/monitor_core.py +33 -249
  14. workbench/core/cloud_platform/aws/aws_account_clamp.py +50 -1
  15. workbench/core/cloud_platform/aws/aws_meta.py +11 -4
  16. workbench/core/transforms/data_to_features/light/molecular_descriptors.py +4 -4
  17. workbench/core/transforms/features_to_model/features_to_model.py +11 -6
  18. workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +36 -6
  19. workbench/core/transforms/pandas_transforms/pandas_to_features.py +27 -0
  20. workbench/core/views/training_view.py +49 -53
  21. workbench/core/views/view.py +51 -1
  22. workbench/core/views/view_utils.py +4 -4
  23. workbench/model_scripts/custom_models/chem_info/mol_descriptors.py +483 -0
  24. workbench/model_scripts/custom_models/chem_info/mol_standardize.py +450 -0
  25. workbench/model_scripts/custom_models/chem_info/molecular_descriptors.py +7 -9
  26. workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +3 -5
  27. workbench/model_scripts/custom_models/proximity/proximity.py +143 -102
  28. workbench/model_scripts/custom_models/uq_models/bayesian_ridge.template +7 -8
  29. workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +10 -17
  30. workbench/model_scripts/custom_models/uq_models/gaussian_process.template +5 -11
  31. workbench/model_scripts/custom_models/uq_models/meta_uq.template +156 -58
  32. workbench/model_scripts/custom_models/uq_models/ngboost.template +20 -14
  33. workbench/model_scripts/custom_models/uq_models/proximity.py +143 -102
  34. workbench/model_scripts/custom_models/uq_models/requirements.txt +1 -3
  35. workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +5 -13
  36. workbench/model_scripts/pytorch_model/pytorch.template +9 -18
  37. workbench/model_scripts/scikit_learn/scikit_learn.template +4 -9
  38. workbench/model_scripts/script_generation.py +7 -2
  39. workbench/model_scripts/uq_models/mapie.template +492 -0
  40. workbench/model_scripts/uq_models/requirements.txt +1 -0
  41. workbench/model_scripts/xgb_model/generated_model_script.py +34 -43
  42. workbench/model_scripts/xgb_model/xgb_model.template +31 -40
  43. workbench/repl/workbench_shell.py +4 -4
  44. workbench/scripts/lambda_launcher.py +63 -0
  45. workbench/scripts/{ml_pipeline_launcher.py → ml_pipeline_batch.py} +49 -51
  46. workbench/scripts/ml_pipeline_sqs.py +186 -0
  47. workbench/utils/chem_utils/__init__.py +0 -0
  48. workbench/utils/chem_utils/fingerprints.py +134 -0
  49. workbench/utils/chem_utils/misc.py +194 -0
  50. workbench/utils/chem_utils/mol_descriptors.py +483 -0
  51. workbench/utils/chem_utils/mol_standardize.py +450 -0
  52. workbench/utils/chem_utils/mol_tagging.py +348 -0
  53. workbench/utils/chem_utils/projections.py +209 -0
  54. workbench/utils/chem_utils/salts.py +256 -0
  55. workbench/utils/chem_utils/sdf.py +292 -0
  56. workbench/utils/chem_utils/toxicity.py +250 -0
  57. workbench/utils/chem_utils/vis.py +253 -0
  58. workbench/utils/config_manager.py +2 -6
  59. workbench/utils/endpoint_utils.py +5 -7
  60. workbench/utils/license_manager.py +2 -6
  61. workbench/utils/model_utils.py +89 -31
  62. workbench/utils/monitor_utils.py +44 -62
  63. workbench/utils/pandas_utils.py +3 -3
  64. workbench/utils/shap_utils.py +10 -2
  65. workbench/utils/workbench_sqs.py +1 -1
  66. workbench/utils/xgboost_model_utils.py +300 -151
  67. workbench/web_interface/components/model_plot.py +7 -1
  68. workbench/web_interface/components/plugins/dashboard_status.py +3 -1
  69. workbench/web_interface/components/plugins/generated_compounds.py +1 -1
  70. workbench/web_interface/components/plugins/model_details.py +7 -2
  71. workbench/web_interface/components/plugins/scatter_plot.py +3 -3
  72. {workbench-0.8.168.dist-info → workbench-0.8.193.dist-info}/METADATA +24 -2
  73. {workbench-0.8.168.dist-info → workbench-0.8.193.dist-info}/RECORD +77 -72
  74. {workbench-0.8.168.dist-info → workbench-0.8.193.dist-info}/entry_points.txt +3 -1
  75. {workbench-0.8.168.dist-info → workbench-0.8.193.dist-info}/licenses/LICENSE +1 -1
  76. workbench/model_scripts/custom_models/chem_info/local_utils.py +0 -769
  77. workbench/model_scripts/custom_models/chem_info/tautomerize.py +0 -83
  78. workbench/model_scripts/custom_models/proximity/generated_model_script.py +0 -138
  79. workbench/model_scripts/custom_models/uq_models/generated_model_script.py +0 -393
  80. workbench/model_scripts/custom_models/uq_models/mapie_xgb.template +0 -203
  81. workbench/model_scripts/ensemble_xgb/generated_model_script.py +0 -279
  82. workbench/model_scripts/pytorch_model/generated_model_script.py +0 -576
  83. workbench/model_scripts/quant_regression/quant_regression.template +0 -279
  84. workbench/model_scripts/quant_regression/requirements.txt +0 -1
  85. workbench/model_scripts/scikit_learn/generated_model_script.py +0 -307
  86. workbench/utils/chem_utils.py +0 -1556
  87. workbench/utils/fast_inference.py +0 -167
  88. workbench/utils/resource_utils.py +0 -39
  89. {workbench-0.8.168.dist-info → workbench-0.8.193.dist-info}/WHEEL +0 -0
  90. {workbench-0.8.168.dist-info → workbench-0.8.193.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,250 @@
1
+ """Toxicity detection utilities for molecular compounds"""
2
+
3
+ from typing import List, Optional, Tuple
4
+ from rdkit import Chem
5
+ from rdkit.Chem import Mol
6
+ from rdkit.Chem import FunctionalGroups as FG
7
+
8
+ # Precompiled SMARTS patterns for custom toxic functional groups
9
+ toxic_smarts_patterns = [
10
+ ("C(=S)N"), # Dithiocarbamate
11
+ ("P(=O)(O)(O)O"), # Phosphate Ester
12
+ ("[As](=O)(=O)-[OH]"), # Arsenic Oxide
13
+ ("[C](Cl)(Cl)(Cl)"), # Trichloromethyl
14
+ ("[Cr](=O)(=O)=O"), # Chromium(VI)
15
+ ("[N+](C)(C)(C)(C)"), # Quaternary Ammonium
16
+ ("[Se][Se]"), # Diselenide
17
+ ("c1c(Cl)c(Cl)c(Cl)c1"), # Trichlorinated Aromatic Ring
18
+ ("[CX3](=O)[CX4][Cl,Br,F,I]"), # Halogenated Carbonyl
19
+ ("[P+](C*)(C*)(C*)(C*)"), # Phosphonium Group
20
+ ("NC(=S)c1c(Cl)cccc1Cl"), # Chlorobenzene Thiocarbamate
21
+ ("NC(=S)Nc1ccccc1"), # Phenyl Thiocarbamate
22
+ ("S=C1NCCN1"), # Thiourea Derivative
23
+ ]
24
+ compiled_toxic_smarts = [Chem.MolFromSmarts(smarts) for smarts in toxic_smarts_patterns]
25
+
26
+ # Precompiled SMARTS patterns for exemptions
27
+ exempt_smarts_patterns = [
28
+ "c1ccc(O)c(O)c1", # Phenols
29
+ ]
30
+ compiled_exempt_smarts = [Chem.MolFromSmarts(smarts) for smarts in exempt_smarts_patterns]
31
+
32
+ # Load functional group hierarchy once during initialization
33
+ fgroup_hierarchy = FG.BuildFuncGroupHierarchy()
34
+
35
+
36
+ def contains_heavy_metals(mol: Mol) -> bool:
37
+ """
38
+ Check if a molecule contains any heavy metals (broad filter).
39
+
40
+ Args:
41
+ mol: RDKit molecule object.
42
+
43
+ Returns:
44
+ bool: True if any heavy metals are detected, False otherwise.
45
+ """
46
+ heavy_metals = {"Zn", "Cu", "Fe", "Mn", "Co", "Pb", "Hg", "Cd", "As"}
47
+ return any(atom.GetSymbol() in heavy_metals for atom in mol.GetAtoms())
48
+
49
+
50
+ def halogen_toxicity_score(mol: Mol) -> Tuple[int, int]:
51
+ """
52
+ Calculate the halogen count and toxicity threshold for a molecule.
53
+
54
+ Args:
55
+ mol: RDKit molecule object.
56
+
57
+ Returns:
58
+ Tuple[int, int]: (halogen_count, halogen_threshold), where the threshold
59
+ scales with molecule size (minimum of 2 or 20% of atom count).
60
+ """
61
+ # Define halogens and count their occurrences
62
+ halogens = {"Cl", "Br", "I", "F"}
63
+ halogen_count = sum(1 for atom in mol.GetAtoms() if atom.GetSymbol() in halogens)
64
+
65
+ # Define threshold: small molecules tolerate fewer halogens
66
+ # Threshold scales with molecule size to account for reasonable substitution
67
+ molecule_size = mol.GetNumAtoms()
68
+ halogen_threshold = max(2, int(molecule_size * 0.2)) # Minimum 2, scaled by 20% of molecule size
69
+
70
+ return halogen_count, halogen_threshold
71
+
72
+
73
+ def toxic_elements(mol: Mol) -> Optional[List[str]]:
74
+ """
75
+ Identifies toxic elements or specific forms of elements in a molecule.
76
+
77
+ Args:
78
+ mol: RDKit molecule object.
79
+
80
+ Returns:
81
+ Optional[List[str]]: List of toxic elements or specific forms if found, otherwise None.
82
+
83
+ Notes:
84
+ Halogen toxicity logic integrates with `halogen_toxicity_score` and scales thresholds
85
+ based on molecule size.
86
+ """
87
+ # Always toxic elements (heavy metals and known toxic single elements)
88
+ always_toxic = {"Pb", "Hg", "Cd", "As", "Be", "Tl", "Sb"}
89
+ toxic_found = set()
90
+
91
+ for atom in mol.GetAtoms():
92
+ symbol = atom.GetSymbol()
93
+ formal_charge = atom.GetFormalCharge()
94
+
95
+ # Check for always toxic elements
96
+ if symbol in always_toxic:
97
+ toxic_found.add(symbol)
98
+
99
+ # Conditionally toxic nitrogen (positively charged)
100
+ if symbol == "N" and formal_charge > 0:
101
+ # Exclude benign quaternary ammonium (e.g., choline-like structures)
102
+ if mol.HasSubstructMatch(Chem.MolFromSmarts("[N+](C)(C)(C)C")): # Example benign structure
103
+ continue
104
+ toxic_found.add("N+")
105
+
106
+ # Halogen toxicity: Uses halogen_toxicity_score to flag excessive halogenation
107
+ if symbol in {"Cl", "Br", "I", "F"}:
108
+ halogen_count, halogen_threshold = halogen_toxicity_score(mol)
109
+ if halogen_count > halogen_threshold:
110
+ toxic_found.add(symbol)
111
+
112
+ return list(toxic_found) if toxic_found else None
113
+
114
+
115
+ def toxic_groups(mol: Chem.Mol) -> Optional[List[str]]:
116
+ """
117
+ Check if a molecule contains known toxic functional groups using RDKit's functional groups and SMARTS patterns.
118
+
119
+ Args:
120
+ mol (rdkit.Chem.Mol): The molecule to evaluate.
121
+
122
+ Returns:
123
+ Optional[List[str]]: List of SMARTS patterns for toxic groups if found, otherwise None.
124
+ """
125
+ toxic_smarts_matches = []
126
+
127
+ # Use RDKit's functional group definitions
128
+ toxic_group_names = ["Nitro", "Azide", "Alcohol", "Aldehyde", "Halogen", "TerminalAlkyne"]
129
+ for group_name in toxic_group_names:
130
+ group_node = next(node for node in fgroup_hierarchy if node.label == group_name)
131
+ if mol.HasSubstructMatch(Chem.MolFromSmarts(group_node.smarts)):
132
+ toxic_smarts_matches.append(group_node.smarts) # Use group_node's SMARTS directly
133
+
134
+ # Check for custom precompiled toxic SMARTS patterns
135
+ for smarts, compiled in zip(toxic_smarts_patterns, compiled_toxic_smarts):
136
+ if mol.HasSubstructMatch(compiled): # Use precompiled SMARTS
137
+ toxic_smarts_matches.append(smarts)
138
+
139
+ # Special handling for N+
140
+ if mol.HasSubstructMatch(Chem.MolFromSmarts("[N+]")):
141
+ if not mol.HasSubstructMatch(Chem.MolFromSmarts("C[N+](C)(C)C")): # Exclude benign
142
+ toxic_smarts_matches.append("[N+]") # Append as SMARTS
143
+
144
+ # Exempt stabilizing functional groups using precompiled patterns
145
+ for compiled in compiled_exempt_smarts:
146
+ if mol.HasSubstructMatch(compiled):
147
+ return None
148
+
149
+ return toxic_smarts_matches if toxic_smarts_matches else None
150
+
151
+
152
+ if __name__ == "__main__":
153
+ print("Running toxicity detection tests...")
154
+
155
+ # Test molecules with descriptions
156
+ test_molecules = {
157
+ # Safe molecules
158
+ "water": ("O", "Water - should be safe"),
159
+ "benzene": ("c1ccccc1", "Benzene - simple aromatic"),
160
+ "glucose": ("C(C1C(C(C(C(O1)O)O)O)O)O", "Glucose - sugar"),
161
+ "ethanol": ("CCO", "Ethanol - simple alcohol"),
162
+ # Heavy metal containing
163
+ "lead_acetate": ("CC(=O)[O-].CC(=O)[O-].[Pb+2]", "Lead acetate - contains Pb"),
164
+ "mercury_chloride": ("Cl[Hg]Cl", "Mercury chloride - contains Hg"),
165
+ "arsenic_trioxide": ("O=[As]O[As]=O", "Arsenic trioxide - contains As"),
166
+ # Halogenated compounds
167
+ "chloroform": ("C(Cl)(Cl)Cl", "Chloroform - trichloromethyl"),
168
+ "ddt": ("c1ccc(cc1)C(c2ccc(cc2)Cl)C(Cl)(Cl)Cl", "DDT - heavily chlorinated"),
169
+ "fluorobenzene": ("Fc1ccccc1", "Fluorobenzene - single halogen"),
170
+ # Nitrogen compounds
171
+ "nitrobenzene": ("c1ccc(cc1)[N+](=O)[O-]", "Nitrobenzene - nitro group"),
172
+ "choline": ("C[N+](C)(C)CCO", "Choline - benign quaternary ammonium"),
173
+ "toxic_quat": ("[N+](C)(C)(C)(C)", "Toxic quaternary ammonium"),
174
+ # Phenol (exempt)
175
+ "catechol": ("c1ccc(O)c(O)c1", "Catechol - phenol, should be exempt"),
176
+ # Phosphate
177
+ "phosphate": ("P(=O)(O)(O)O", "Phosphate ester - toxic pattern"),
178
+ }
179
+
180
+ # Test 1: Heavy Metals Detection
181
+ print("\n1. Testing heavy metals detection...")
182
+ for name, (smiles, desc) in test_molecules.items():
183
+ mol = Chem.MolFromSmiles(smiles)
184
+ if mol:
185
+ has_metals = contains_heavy_metals(mol)
186
+ expected = name in ["lead_acetate", "mercury_chloride", "arsenic_trioxide"]
187
+ status = "✓" if has_metals == expected else "✗"
188
+ print(f" {status} {name}: {has_metals} (expected: {expected})")
189
+
190
+ # Test 2: Halogen Toxicity Score
191
+ print("\n2. Testing halogen toxicity scoring...")
192
+ halogen_tests = ["chloroform", "ddt", "fluorobenzene", "benzene"]
193
+ for name in halogen_tests:
194
+ if name in test_molecules:
195
+ smiles, desc = test_molecules[name]
196
+ mol = Chem.MolFromSmiles(smiles)
197
+ if mol:
198
+ count, threshold = halogen_toxicity_score(mol)
199
+ print(f" {name}: {count} halogens, threshold: {threshold}, toxic: {count > threshold}")
200
+
201
+ # Test 3: Toxic Elements
202
+ print("\n3. Testing toxic elements detection...")
203
+ for name, (smiles, desc) in test_molecules.items():
204
+ mol = Chem.MolFromSmiles(smiles)
205
+ if mol:
206
+ toxics = toxic_elements(mol)
207
+ if toxics:
208
+ print(f" ⚠ {name}: {toxics}")
209
+ elif name in ["lead_acetate", "mercury_chloride", "arsenic_trioxide", "chloroform", "ddt"]:
210
+ print(f" ✗ {name}: Should have detected toxic elements")
211
+ else:
212
+ print(f" ✓ {name}: No toxic elements (as expected)")
213
+
214
+ # Test 4: Toxic Groups
215
+ print("\n4. Testing toxic functional groups...")
216
+ for name, (smiles, desc) in test_molecules.items():
217
+ mol = Chem.MolFromSmiles(smiles)
218
+ if mol:
219
+ groups = toxic_groups(mol)
220
+ if groups:
221
+ print(f" ⚠ {name}: Found {len(groups)} toxic group(s)")
222
+ for g in groups[:3]: # Show first 3 patterns
223
+ print(f" - {g[:50]}...")
224
+ elif name == "catechol":
225
+ print(f" ✓ {name}: Exempt (phenol)")
226
+ elif name in ["nitrobenzene", "phosphate", "chloroform", "ethanol"]:
227
+ print(f" ✗ {name}: Should have detected toxic groups")
228
+ else:
229
+ print(f" ✓ {name}: No toxic groups")
230
+
231
+ # Test 5: Edge Cases
232
+ print("\n5. Testing edge cases...")
233
+ edge_cases = [
234
+ ("", "Empty SMILES"),
235
+ ("INVALID", "Invalid SMILES"),
236
+ ("C" * 100, "Very long carbon chain"),
237
+ ("[N+](C)(C)(C)C", "Benign quaternary ammonium"),
238
+ ]
239
+
240
+ for smiles, desc in edge_cases:
241
+ mol = Chem.MolFromSmiles(smiles)
242
+ if mol:
243
+ metals = contains_heavy_metals(mol)
244
+ elements = toxic_elements(mol)
245
+ groups = toxic_groups(mol)
246
+ print(f" {desc}: metals={metals}, elements={elements is not None}, groups={groups is not None}")
247
+ else:
248
+ print(f" {desc}: Invalid molecule (as expected)")
249
+
250
+ print("\n✅ All toxicity detection tests completed!")
@@ -0,0 +1,253 @@
1
+ """Molecular visualization utilities for Workbench"""
2
+
3
+ import logging
4
+ import base64
5
+ import re
6
+ from typing import Optional, Tuple
7
+ from rdkit import Chem
8
+ from rdkit.Chem import AllChem, Draw
9
+ from rdkit.Chem.Draw import rdMolDraw2D
10
+
11
+ # Set up the logger
12
+ log = logging.getLogger("workbench")
13
+
14
+
15
+ def _is_dark(color: str) -> bool:
16
+ """Determine if an rgba color is dark based on RGB average.
17
+
18
+ Args:
19
+ color: Color in rgba(...) format
20
+
21
+ Returns:
22
+ True if the color is dark, False otherwise
23
+ """
24
+ match = re.match(r"rgba?\((\d+),\s*(\d+),\s*(\d+)", color)
25
+ if not match:
26
+ log.warning(f"Invalid color format: {color}, defaulting to dark")
27
+ return True # Default to dark mode on error
28
+
29
+ r, g, b = map(int, match.groups())
30
+ return (r + g + b) / 3 < 128
31
+
32
+
33
+ def _rgba_to_tuple(rgba: str) -> Tuple[float, float, float, float]:
34
+ """Convert rgba string to normalized tuple (R, G, B, A).
35
+
36
+ Args:
37
+ rgba: RGBA color string (e.g., "rgba(255, 0, 0, 0.5)")
38
+
39
+ Returns:
40
+ Normalized tuple of (R, G, B, A) with RGB in [0, 1]
41
+ """
42
+ try:
43
+ components = rgba.strip("rgba() ").split(",")
44
+ r, g, b = (int(components[i]) / 255 for i in range(3))
45
+ a = float(components[3]) if len(components) > 3 else 1.0
46
+ return r, g, b, a
47
+ except (IndexError, ValueError) as e:
48
+ log.warning(f"Error parsing color '{rgba}': {e}, using default")
49
+ return 0.25, 0.25, 0.25, 1.0 # Default dark grey
50
+
51
+
52
+ def _validate_molecule(smiles: str) -> Optional[Chem.Mol]:
53
+ """Validate and return RDKit molecule from SMILES.
54
+
55
+ Args:
56
+ smiles: SMILES string
57
+
58
+ Returns:
59
+ RDKit molecule or None if invalid
60
+ """
61
+ try:
62
+ mol = Chem.MolFromSmiles(smiles)
63
+ if mol is None:
64
+ log.warning(f"Invalid SMILES: {smiles}")
65
+ return mol
66
+ except Exception as e:
67
+ log.error(f"Error parsing SMILES '{smiles}': {e}")
68
+ return None
69
+
70
+
71
+ def _configure_draw_options(options: Draw.MolDrawOptions, background: str) -> None:
72
+ """Configure drawing options for molecule visualization.
73
+
74
+ Args:
75
+ options: RDKit drawing options object
76
+ background: Background color string
77
+ """
78
+ if _is_dark(background):
79
+ rdMolDraw2D.SetDarkMode(options)
80
+ options.setBackgroundColour(_rgba_to_tuple(background))
81
+
82
+
83
+ def img_from_smiles(
84
+ smiles: str, width: int = 500, height: int = 500, background: str = "rgba(64, 64, 64, 1)"
85
+ ) -> Optional:
86
+ """Generate an image of the molecule from SMILES.
87
+
88
+ Args:
89
+ smiles: SMILES string representing the molecule
90
+ width: Width of the image in pixels (default: 500)
91
+ height: Height of the image in pixels (default: 500)
92
+ background: Background color (default: dark grey)
93
+
94
+ Returns:
95
+ PIL Image object or None if SMILES is invalid
96
+ """
97
+ mol = _validate_molecule(smiles)
98
+ if not mol:
99
+ return None
100
+
101
+ # Set up drawing options
102
+ dos = Draw.MolDrawOptions()
103
+ _configure_draw_options(dos, background)
104
+
105
+ # Generate and return image
106
+ return Draw.MolToImage(mol, options=dos, size=(width, height))
107
+
108
+
109
+ def svg_from_smiles(
110
+ smiles: str, width: int = 500, height: int = 500, background: str = "rgba(64, 64, 64, 1)"
111
+ ) -> Optional[str]:
112
+ """Generate an SVG image of the molecule from SMILES.
113
+
114
+ Args:
115
+ smiles: SMILES string representing the molecule
116
+ width: Width of the image in pixels (default: 500)
117
+ height: Height of the image in pixels (default: 500)
118
+ background: Background color (default: dark grey)
119
+
120
+ Returns:
121
+ Base64-encoded SVG data URI or None if SMILES is invalid
122
+ """
123
+ mol = _validate_molecule(smiles)
124
+ if not mol:
125
+ return None
126
+
127
+ # Compute 2D coordinates
128
+ AllChem.Compute2DCoords(mol)
129
+
130
+ # Initialize SVG drawer
131
+ drawer = rdMolDraw2D.MolDraw2DSVG(width, height)
132
+
133
+ # Configure drawing options
134
+ _configure_draw_options(drawer.drawOptions(), background)
135
+
136
+ # Draw molecule
137
+ drawer.DrawMolecule(mol)
138
+ drawer.FinishDrawing()
139
+
140
+ # Encode SVG
141
+ svg = drawer.GetDrawingText()
142
+ encoded_svg = base64.b64encode(svg.encode("utf-8")).decode("utf-8")
143
+ return f"data:image/svg+xml;base64,{encoded_svg}"
144
+
145
+
146
+ def show(smiles: str, width: int = 500, height: int = 500, background: str = "rgba(64, 64, 64, 1)") -> None:
147
+ """Display an image of the molecule.
148
+
149
+ Args:
150
+ smiles: SMILES string representing the molecule
151
+ width: Width of the image in pixels (default: 500)
152
+ height: Height of the image in pixels (default: 500)
153
+ background: Background color (default: dark grey)
154
+ """
155
+ img = img_from_smiles(smiles, width, height, background)
156
+ if img:
157
+ img.show()
158
+ else:
159
+ log.error(f"Cannot display molecule for SMILES: {smiles}")
160
+
161
+
162
+ if __name__ == "__main__":
163
+ # Test suite
164
+ print("Running molecular visualization tests...")
165
+
166
+ # Test molecules
167
+ test_molecules = {
168
+ "benzene": "c1ccccc1",
169
+ "caffeine": "CN1C=NC2=C1C(=O)N(C(=O)N2C)C",
170
+ "aspirin": "CC(=O)OC1=CC=CC=C1C(=O)O",
171
+ "invalid": "not_a_smiles",
172
+ "empty": "",
173
+ }
174
+
175
+ # Test 1: Valid SMILES image generation
176
+ print("\n1. Testing image generation from valid SMILES...")
177
+ for name, smiles in test_molecules.items():
178
+ if name not in ["invalid", "empty"]:
179
+ img = img_from_smiles(smiles, width=200, height=200)
180
+ status = "✓" if img else "✗"
181
+ print(f" {status} {name}: {'Success' if img else 'Failed'}")
182
+
183
+ # Test 2: Invalid SMILES handling
184
+ print("\n2. Testing invalid SMILES handling...")
185
+ img = img_from_smiles(test_molecules["invalid"])
186
+ print(f" {'✓' if img is None else '✗'} Invalid SMILES returns None: {img is None}")
187
+
188
+ img = img_from_smiles(test_molecules["empty"])
189
+ print(f" {'✓' if img is None else '✗'} Empty SMILES returns None: {img is None}")
190
+
191
+ # Test 3: SVG generation
192
+ print("\n3. Testing SVG generation...")
193
+ for name, smiles in test_molecules.items():
194
+ if name not in ["invalid", "empty"]:
195
+ svg = svg_from_smiles(smiles, width=200, height=200)
196
+ is_valid = svg and svg.startswith("data:image/svg+xml;base64,")
197
+ status = "✓" if is_valid else "✗"
198
+ print(f" {status} {name}: {'Valid SVG data URI' if is_valid else 'Failed'}")
199
+
200
+ # Test 4: Different backgrounds
201
+ print("\n4. Testing different background colors...")
202
+ backgrounds = [
203
+ ("Light", "rgba(255, 255, 255, 1)"),
204
+ ("Dark", "rgba(0, 0, 0, 1)"),
205
+ ("Custom", "rgba(100, 150, 200, 0.8)"),
206
+ ]
207
+
208
+ for bg_name, bg_color in backgrounds:
209
+ img = img_from_smiles(test_molecules["benzene"], background=bg_color)
210
+ status = "✓" if img else "✗"
211
+ print(f" {status} {bg_name} background: {'Success' if img else 'Failed'}")
212
+
213
+ # Test 5: Size variations
214
+ print("\n5. Testing different image sizes...")
215
+ sizes = [(100, 100), (500, 500), (1000, 800)]
216
+
217
+ for w, h in sizes:
218
+ img = img_from_smiles(test_molecules["caffeine"], width=w, height=h)
219
+ status = "✓" if img else "✗"
220
+ print(f" {status} Size {w}x{h}: {'Success' if img else 'Failed'}")
221
+
222
+ # Test 6: Color parsing functions
223
+ print("\n6. Testing color utility functions...")
224
+ test_colors = [
225
+ ("invalid_color", True, (0.25, 0.25, 0.25, 1.0)), # Should use defaults
226
+ ("rgba(255, 255, 255, 1)", False, (1.0, 1.0, 1.0, 1.0)),
227
+ ("rgba(0, 0, 0, 1)", True, (0.0, 0.0, 0.0, 1.0)),
228
+ ("rgba(64, 64, 64, 0.5)", True, (0.251, 0.251, 0.251, 0.5)),
229
+ ("rgb(128, 128, 128)", False, (0.502, 0.502, 0.502, 1.0)),
230
+ ]
231
+
232
+ for color, expected_dark, expected_tuple in test_colors:
233
+ is_dark_result = _is_dark(color)
234
+ tuple_result = _rgba_to_tuple(color)
235
+
236
+ dark_status = "✓" if is_dark_result == expected_dark else "✗"
237
+ print(f" {dark_status} is_dark('{color[:20]}...'): {is_dark_result} == {expected_dark}")
238
+
239
+ # Check tuple values with tolerance for floating point
240
+ tuple_match = all(abs(a - b) < 0.01 for a, b in zip(tuple_result, expected_tuple))
241
+ tuple_status = "✓" if tuple_match else "✗"
242
+ print(f" {tuple_status} rgba_to_tuple('{color[:20]}...'): matches expected")
243
+
244
+ # Test the show function (will open image windows)
245
+ print("\n7. Testing show function (will open image windows)...")
246
+ try:
247
+ show(test_molecules["aspirin"])
248
+ show(test_molecules["aspirin"], background="rgba(220, 220, 220, 1)")
249
+ print(" ✓ show() function executed (check for image window)")
250
+ except Exception as e:
251
+ print(f" ✗ show() function failed: {e}")
252
+
253
+ print("\n✅ All tests completed!")
@@ -4,16 +4,13 @@ import os
4
4
  import sys
5
5
  import platform
6
6
  import logging
7
- import importlib.resources as resources # noqa: F401 Python 3.9 compatibility
8
7
  from typing import Any, Dict
8
+ from importlib.resources import files, as_file
9
9
 
10
10
  # Workbench imports
11
11
  from workbench.utils.license_manager import LicenseManager
12
12
  from workbench_bridges.utils.execution_environment import running_as_service
13
13
 
14
- # Python 3.9 compatibility
15
- from workbench.utils.resource_utils import get_resource_path
16
-
17
14
 
18
15
  class FatalConfigError(Exception):
19
16
  """Exception raised for errors in the configuration."""
@@ -172,8 +169,7 @@ class ConfigManager:
172
169
  Returns:
173
170
  str: The open source API key.
174
171
  """
175
- # Python 3.9 compatibility
176
- with get_resource_path("workbench.resources", "open_source_api.key") as open_source_key_path:
172
+ with as_file(files("workbench.resources").joinpath("open_source_api.key")) as open_source_key_path:
177
173
  with open(open_source_key_path, "r") as key_file:
178
174
  return key_file.read().strip()
179
175
 
@@ -7,9 +7,7 @@ from typing import Union, Optional
7
7
  import pandas as pd
8
8
 
9
9
  # Workbench Imports
10
- from workbench.api.feature_set import FeatureSet
11
- from workbench.api.model import Model
12
- from workbench.api.endpoint import Endpoint
10
+ from workbench.api import FeatureSet, Model, Endpoint
13
11
 
14
12
  # Set up the log
15
13
  log = logging.getLogger("workbench")
@@ -77,7 +75,7 @@ def internal_model_data_url(endpoint_config_name: str, session: boto3.Session) -
77
75
  return None
78
76
 
79
77
 
80
- def fs_training_data(end: Endpoint) -> pd.DataFrame:
78
+ def get_training_data(end: Endpoint) -> pd.DataFrame:
81
79
  """Code to get the training data from the FeatureSet used to train the Model
82
80
 
83
81
  Args:
@@ -100,7 +98,7 @@ def fs_training_data(end: Endpoint) -> pd.DataFrame:
100
98
  return train_df
101
99
 
102
100
 
103
- def fs_evaluation_data(end: Endpoint) -> pd.DataFrame:
101
+ def get_evaluation_data(end: Endpoint) -> pd.DataFrame:
104
102
  """Code to get the evaluation data from the FeatureSet NOT used for training
105
103
 
106
104
  Args:
@@ -178,11 +176,11 @@ if __name__ == "__main__":
178
176
  print(model_data_url)
179
177
 
180
178
  # Get the training data
181
- my_train_df = fs_training_data(my_endpoint)
179
+ my_train_df = get_training_data(my_endpoint)
182
180
  print(my_train_df)
183
181
 
184
182
  # Get the evaluation data
185
- my_eval_df = fs_evaluation_data(my_endpoint)
183
+ my_eval_df = get_evaluation_data(my_endpoint)
186
184
  print(my_eval_df)
187
185
 
188
186
  # Backtrack to the FeatureSet
@@ -6,15 +6,12 @@ import json
6
6
  import logging
7
7
  import requests
8
8
  from typing import Union
9
- import importlib.resources as resources # noqa: F401 Python 3.9 compatibility
10
9
  from datetime import datetime
11
10
  from cryptography.hazmat.primitives import hashes
12
11
  from cryptography.hazmat.primitives.asymmetric import padding
13
12
  from cryptography.hazmat.primitives import serialization
14
13
  from cryptography.hazmat.backends import default_backend
15
-
16
- # Python 3.9 compatibility
17
- from workbench.utils.resource_utils import get_resource_path
14
+ from importlib.resources import files, as_file
18
15
 
19
16
 
20
17
  class FatalLicenseError(Exception):
@@ -140,8 +137,7 @@ class LicenseManager:
140
137
  Returns:
141
138
  The public key as an object.
142
139
  """
143
- # Python 3.9 compatibility
144
- with get_resource_path("workbench.resources", "signature_verify_pub.pem") as public_key_path:
140
+ with as_file(files("workbench.resources").joinpath("signature_verify_pub.pem")) as public_key_path:
145
141
  with open(public_key_path, "rb") as key_file:
146
142
  public_key_data = key_file.read()
147
143